Skip to content

Commit

Permalink
Add AQuA docs
Browse files Browse the repository at this point in the history
  • Loading branch information
woodwardmw committed Jan 22, 2024
1 parent aeb9d61 commit fd2e90a
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 23 deletions.
44 changes: 21 additions & 23 deletions app/core/vectordb/postgres4langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ def add_to_collection(self, docs: List[schema.Document], **kwargs) -> None:
"SELECT 1 FROM embeddings WHERE source_id = %s", (doc.docId,))
doc_id_already_exists = cur.fetchone()
links= ",".join([str(item) for item in doc.links])
doc.text = doc.text.replace('\0', '').replace('\x00', '').replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
if not doc_id_already_exists:
data_list.append(
[
Expand Down Expand Up @@ -176,31 +177,28 @@ def add_to_collection(self, docs: List[schema.Document], **kwargs) -> None:
),
)
cur.close()
try:
cur = self.db_conn.cursor()
execute_values(
cur,
"INSERT INTO embeddings (source_id, document, label, media, links, embedding"
") VALUES %s",
data_list,
)
self.db_conn.commit()
cur = self.db_conn.cursor()
execute_values(
cur,
"INSERT INTO embeddings (source_id, document, label, media, links, embedding"
") VALUES %s",
data_list,
)
self.db_conn.commit()

# create index
cur.execute("SELECT COUNT(*) as cnt FROM embeddings;")
num_records = cur.fetchone()[0]
num_lists = num_records / 1000
num_lists = max(10, num_lists, math.sqrt(num_records))
# use the cosine distance measure, which is what we'll later use for querying
cur.execute(
"CREATE INDEX ON embeddings USING ivfflat (embedding vector_cosine_ops) "
+ f"WITH (lists = {num_lists});"
)
self.db_conn.commit()
# create index
cur.execute("SELECT COUNT(*) as cnt FROM embeddings;")
num_records = cur.fetchone()[0]
num_lists = num_records / 1000
num_lists = max(10, num_lists, math.sqrt(num_records))
# use the cosine distance measure, which is what we'll later use for querying
cur.execute(
"CREATE INDEX ON embeddings USING ivfflat (embedding vector_cosine_ops) "
+ f"WITH (lists = {num_lists});"
)
self.db_conn.commit()

cur.close()
except Exception as exe:
raise PostgresException("While adding data: " + str(exe)) from exe
cur.close()

def _get_relevant_documents(
self, query: list, run_manager: CallbackManagerForRetrieverRun| None = None, **kwargs
Expand Down
12 changes: 12 additions & 0 deletions app/templates/chat-demo-postgres.html
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,18 @@ <h1 class="font-bold mb-4">assistant.bible</h1></a
>Faith and Farming</label
>
</div>
<div class="flex items-center">
<input
type="checkbox"
id="aqua-docs"
name="aqua-docs"
value="aqua_docs"
onchange="changeLabel(this)"
/>
<label for="aqua-docs" class="ml-2"
>AQuA Docs</label
>
</div>
</div>
<div class="w-full md:w-auto mt-4 md:mt-0">
<a
Expand Down

0 comments on commit fd2e90a

Please sign in to comment.