From 8fb2be1f8bf19be4628159a86ff1c146eff30094 Mon Sep 17 00:00:00 2001 From: Um Changyong Date: Mon, 16 Dec 2024 11:52:00 +0900 Subject: [PATCH] fix: reformatting... --- autorag/nodes/retrieval/base.py | 5 ++++- autorag/nodes/retrieval/vectordb.py | 2 +- autorag/vectordb/chroma.py | 4 +++- autorag/vectordb/milvus.py | 2 +- autorag/vectordb/pinecone.py | 4 ++-- autorag/vectordb/qdrant.py | 16 ++++++++++++---- 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/autorag/nodes/retrieval/base.py b/autorag/nodes/retrieval/base.py index 9da0e6f4a..16fb8335d 100644 --- a/autorag/nodes/retrieval/base.py +++ b/autorag/nodes/retrieval/base.py @@ -102,7 +102,10 @@ def cast_queries(queries: Union[str, List[str]]) -> List[str]: def evenly_distribute_passages( - ids: List[List[str]], scores: List[List[float]], contents: [List[List[str]]], top_k: int + ids: List[List[str]], + scores: List[List[float]], + contents: [List[List[str]]], + top_k: int, ) -> Tuple[List[str], List[float], List[str]]: assert len(ids) == len(scores), "ids and scores must have same length." query_cnt = len(ids) diff --git a/autorag/nodes/retrieval/vectordb.py b/autorag/nodes/retrieval/vectordb.py index a02bca4bf..fcaa1cb77 100644 --- a/autorag/nodes/retrieval/vectordb.py +++ b/autorag/nodes/retrieval/vectordb.py @@ -27,7 +27,7 @@ flatten_apply, result_to_dataframe, pop_params, - fetch_contents, + # fetch_contents, empty_cuda_cache, convert_inputs_to_list, make_batch, diff --git a/autorag/vectordb/chroma.py b/autorag/vectordb/chroma.py index 37a73f21c..c7375fef2 100644 --- a/autorag/vectordb/chroma.py +++ b/autorag/vectordb/chroma.py @@ -68,7 +68,9 @@ async def add(self, ids: List[str], texts: List[str]): texts = self.truncated_inputs(texts) text_embeddings = await self.embedding.aget_text_embedding_batch(texts) if isinstance(self.collection, AsyncCollection): - await self.collection.add(ids=ids, embeddings=text_embeddings, documents=texts) + await self.collection.add( + ids=ids, embeddings=text_embeddings, documents=texts + ) else: self.collection.add(ids=ids, embeddings=text_embeddings, documents=texts) diff --git a/autorag/vectordb/milvus.py b/autorag/vectordb/milvus.py index 4be88d3c7..bd2e7638c 100644 --- a/autorag/vectordb/milvus.py +++ b/autorag/vectordb/milvus.py @@ -68,7 +68,7 @@ def __init__( field = FieldSchema( name="vector", dtype=DataType.FLOAT_VECTOR, dim=dimension ) - content = FieldSchema( + content = FieldSchema( name="content", dtype=DataType.VARCHAR, max_length=65535 ) schema = CollectionSchema(fields=[pk, field, content]) diff --git a/autorag/vectordb/pinecone.py b/autorag/vectordb/pinecone.py index cf78cc9f6..28e16304e 100644 --- a/autorag/vectordb/pinecone.py +++ b/autorag/vectordb/pinecone.py @@ -62,8 +62,8 @@ async def add(self, ids: List[str], texts: List[str]): metadatas = [{} for _ in texts] for metadata, text in zip(metadatas, texts): - metadata[self.text_key] = text - + metadata[self.text_key] = text + vector_tuples = list(zip(ids, text_embeddings, metadatas)) batch_vectors = make_batch(vector_tuples, self.ingest_batch) diff --git a/autorag/vectordb/qdrant.py b/autorag/vectordb/qdrant.py index a18f171a4..680bb810c 100644 --- a/autorag/vectordb/qdrant.py +++ b/autorag/vectordb/qdrant.py @@ -86,10 +86,13 @@ async def add(self, ids: List[str], texts: List[str]): metadatas = [{} for _ in texts] for metadata, text in zip(metadatas, texts): - metadata[self.text_key] = text + metadata[self.text_key] = text points = list( - map(lambda x: PointStruct(id=x[0], vector=x[1], payload=x[2]), zip(ids, text_embeddings, metadatas)) + map( + lambda x: PointStruct(id=x[0], vector=x[1], payload=x[2]), + zip(ids, text_embeddings, metadatas), + ) ) self.client.upload_points( @@ -133,7 +136,9 @@ async def query( search_queries = list( map( - lambda x: SearchRequest(vector=x, limit=top_k, with_vector=True, with_payload=True), + lambda x: SearchRequest( + vector=x, limit=top_k, with_vector=True, with_payload=True + ), query_embeddings, ) ) @@ -145,7 +150,10 @@ async def query( # Extract IDs and distances ids = [[str(hit.id) for hit in result] for result in search_result] scores = [[hit.score for hit in result] for result in search_result] - contents = [[hit.payload.get(self.text_key) for hit in result] for result in search_result] + contents = [ + [hit.payload.get(self.text_key) for hit in result] + for result in search_result + ] return ids, scores, contents