From fd2e90a3681df10262afae4ae046400a9810200f Mon Sep 17 00:00:00 2001 From: Mark Date: Mon, 22 Jan 2024 14:35:39 -0800 Subject: [PATCH] Add AQuA docs --- app/core/vectordb/postgres4langchain.py | 44 ++++++++++++------------- app/templates/chat-demo-postgres.html | 12 +++++++ 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/app/core/vectordb/postgres4langchain.py b/app/core/vectordb/postgres4langchain.py index 242171f..7ff6fb9 100644 --- a/app/core/vectordb/postgres4langchain.py +++ b/app/core/vectordb/postgres4langchain.py @@ -141,6 +141,7 @@ def add_to_collection(self, docs: List[schema.Document], **kwargs) -> None: "SELECT 1 FROM embeddings WHERE source_id = %s", (doc.docId,)) doc_id_already_exists = cur.fetchone() links= ",".join([str(item) for item in doc.links]) + doc.text = doc.text.replace('\0', '').replace('\x00', '').replace('\n', ' ').replace('\r', ' ').replace('\t', ' ') if not doc_id_already_exists: data_list.append( [ @@ -176,31 +177,28 @@ def add_to_collection(self, docs: List[schema.Document], **kwargs) -> None: ), ) cur.close() - try: - cur = self.db_conn.cursor() - execute_values( - cur, - "INSERT INTO embeddings (source_id, document, label, media, links, embedding" - ") VALUES %s", - data_list, - ) - self.db_conn.commit() + cur = self.db_conn.cursor() + execute_values( + cur, + "INSERT INTO embeddings (source_id, document, label, media, links, embedding" + ") VALUES %s", + data_list, + ) + self.db_conn.commit() - # create index - cur.execute("SELECT COUNT(*) as cnt FROM embeddings;") - num_records = cur.fetchone()[0] - num_lists = num_records / 1000 - num_lists = max(10, num_lists, math.sqrt(num_records)) - # use the cosine distance measure, which is what we'll later use for querying - cur.execute( - "CREATE INDEX ON embeddings USING ivfflat (embedding vector_cosine_ops) " - + f"WITH (lists = {num_lists});" - ) - self.db_conn.commit() + # create index + cur.execute("SELECT COUNT(*) as cnt FROM embeddings;") + num_records = cur.fetchone()[0] + num_lists = num_records / 1000 + num_lists = max(10, num_lists, math.sqrt(num_records)) + # use the cosine distance measure, which is what we'll later use for querying + cur.execute( + "CREATE INDEX ON embeddings USING ivfflat (embedding vector_cosine_ops) " + + f"WITH (lists = {num_lists});" + ) + self.db_conn.commit() - cur.close() - except Exception as exe: - raise PostgresException("While adding data: " + str(exe)) from exe + cur.close() def _get_relevant_documents( self, query: list, run_manager: CallbackManagerForRetrieverRun| None = None, **kwargs diff --git a/app/templates/chat-demo-postgres.html b/app/templates/chat-demo-postgres.html index 0afa2d1..d500a42 100644 --- a/app/templates/chat-demo-postgres.html +++ b/app/templates/chat-demo-postgres.html @@ -67,6 +67,18 @@

assistant.bible

Faith and Farming +
+ + +