-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathengine.py
More file actions
76 lines (65 loc) · 2.46 KB
/
engine.py
File metadata and controls
76 lines (65 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from collections.abc import Iterable
import chromadb
from jixia.structs import LeanName, DeclarationKind
from psycopg import Connection
from psycopg.rows import class_row
from psycopg.types.json import Jsonb
from pydantic import BaseModel, ConfigDict
from database.embedding import GeminiEmbedding
class Record(BaseModel):
module_name: LeanName
kind: DeclarationKind
name: LeanName
signature: str
type: str
value: str | None
docstring: str | None
informal_name: str
informal_description: str
model_config = ConfigDict(extra="allow")
class QueryResult(BaseModel):
result: Record
distance: float
class PhyslibSearchEngine:
def __init__(self, path: str, conn: Connection):
self.conn = conn
self.client = chromadb.PersistentClient(path)
self.collection = self.client.get_collection(name="physlibsearch", embedding_function=None)
self.embedding = GeminiEmbedding(task_type="RETRIEVAL_QUERY")
def fetch_declarations(self, names: Iterable[LeanName]) -> list[Record]:
ret = []
with self.conn.cursor(row_factory=class_row(Record)) as cursor:
for n in names:
cursor.execute(
"""
SELECT * FROM record
WHERE name = %s
""",
(Jsonb(n),),
)
ret.append(cursor.fetchone())
return ret
def find_declarations(self, queries: list[str], num_results: int) -> list[list[QueryResult]]:
query_embedding = self.embedding.embed(queries)
results = self.collection.query(
query_embeddings=query_embedding,
n_results=num_results,
include=["distances"],
)
ret = []
with self.conn.cursor(row_factory=class_row(Record)) as cursor:
for ids, distances in zip(results["ids"], results["distances"]):
current_results = []
for doc_id, distance in zip(ids, distances):
name = doc_id.split(" ")
cursor.execute(
"""
SELECT * FROM record
WHERE name = %s
""",
(Jsonb(name),),
)
result = cursor.fetchone()
current_results.append(QueryResult(result=result, distance=distance))
ret.append(current_results)
return ret