From ba25a74520b30afc965cb24e854222c11749d3c1 Mon Sep 17 00:00:00 2001 From: yucui Date: Thu, 5 Dec 2024 14:20:41 +0800 Subject: [PATCH 01/10] refactor: handle multi vids path bug --- .../operators/hugegraph_op/graph_rag_query.py | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py index a3dc1ade..a2d113bc 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py @@ -90,19 +90,7 @@ def __init__(self, max_deep: int = 2, max_items: int = 20, max_v_prop_len: int = self._max_e_prop_len = max_e_prop_len def run(self, context: Dict[str, Any]) -> Dict[str, Any]: - # pylint: disable=R0915 (too-many-statements) - if self._client is None: - if isinstance(context.get("graph_client"), PyHugeClient): - self._client = context["graph_client"] - else: - ip = context.get("ip") or "localhost" - port = context.get("port") or "8080" - graph = context.get("graph") or "hugegraph" - user = context.get("user") or "admin" - pwd = context.get("pwd") or "admin" - gs = context.get("graphspace") or None - self._client = PyHugeClient(ip, port, graph, user, pwd, gs) - assert self._client is not None, "No valid graph to search." + self._init_client(context) # 2. Extract params from context matched_vids = context.get("match_vids") @@ -129,15 +117,18 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: log.debug("Vids gremlin query: %s", gremlin_query) vertex_knowledge = self._format_graph_from_vertex(query_result=vertexes) - gremlin_query = VID_QUERY_NEIGHBOR_TPL.format( - keywords=matched_vids, - max_deep=self._max_deep, - edge_labels=edge_labels_str, - edge_limit=edge_limit_amount, - max_items=self._max_items, - ) - log.debug("Kneighbor gremlin query: %s", gremlin_query) - paths = self._client.gremlin().exec(gremlin=gremlin_query)["data"] + paths: List[Any] = [] + # TODO: 这里后续改为使用生成器 or 异步 asycnio 处理以提高性能 + for matched_vid in matched_vids: + gremlin_query = VID_QUERY_NEIGHBOR_TPL.format( + keywords="'{}'".format(matched_vid), + max_deep=self._max_deep, + edge_labels=edge_labels_str, + edge_limit=edge_limit_amount, + max_items=self._max_items, + ) + log.debug("Kneighbor gremlin query: %s", gremlin_query) + paths.extend(self._client.gremlin().exec(gremlin=gremlin_query)["data"]) graph_chain_knowledge, vertex_degree_list, knowledge_with_degree = self._format_graph_query_result( query_paths=paths @@ -182,6 +173,21 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: log.debug("Knowledge from Graph:\n%s", "\n".join(context["graph_result"])) return context + def _init_client(self, context): + # pylint: disable=R0915 (too-many-statements) + if self._client is None: + if isinstance(context.get("graph_client"), PyHugeClient): + self._client = context["graph_client"] + else: + ip = context.get("ip") or "localhost" + port = context.get("port") or "8080" + graph = context.get("graph") or "hugegraph" + user = context.get("user") or "admin" + pwd = context.get("pwd") or "admin" + gs = context.get("graphspace") or None + self._client = PyHugeClient(ip, port, graph, user, pwd, gs) + assert self._client is not None, "No valid graph to search." + def _format_graph_from_vertex(self, query_result: List[Any]) -> Set[str]: knowledge = set() for item in query_result: From 9022a685063b8da5fba057225a0e24a9336f642a Mon Sep 17 00:00:00 2001 From: yc319 <58455269+yc319@users.noreply.github.com> Date: Mon, 9 Dec 2024 10:33:33 +0800 Subject: [PATCH 02/10] refactor move default params to config file --- hugegraph-llm/src/hugegraph_llm/config/config_data.py | 3 +++ hugegraph-llm/src/hugegraph_llm/indices/vector_index.py | 6 ++++-- .../src/hugegraph_llm/operators/graph_rag_task.py | 4 ++-- .../operators/hugegraph_op/graph_rag_query.py | 8 ++++---- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/config/config_data.py b/hugegraph-llm/src/hugegraph_llm/config/config_data.py index 78ff0cf8..5c22025c 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/config_data.py +++ b/hugegraph-llm/src/hugegraph_llm/config/config_data.py @@ -105,6 +105,9 @@ class ConfigData: graph_pwd: Optional[str] = "xxx" graph_space: Optional[str] = None limit_property: Optional[str] = "False" + max_items: Optional[int] = 10 + edge_limit_pre_label: Optional[int] = 8 + dis_threshold: Optional[float] = 0.9 """Admin settings""" enable_login: Optional[str] = "False" diff --git a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py index 3732a9f0..5819aec5 100644 --- a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py +++ b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py @@ -24,6 +24,7 @@ import numpy as np from hugegraph_llm.utils.log import log +from hugegraph_llm.config import settings INDEX_FILE_NAME = "index.faiss" PROPERTIES_FILE_NAME = "properties.pkl" @@ -85,17 +86,18 @@ def remove(self, props: Union[Set[Any], List[Any]]) -> int: self.properties = [p for i, p in enumerate(self.properties) if i not in indices] return remove_num - def search(self, query_vector: List[float], top_k: int, dis_threshold: float = 0.9) -> List[Dict[str, Any]]: + def search(self, query_vector: List[float], top_k: int) -> List[Dict[str, Any]]: if self.index.ntotal == 0: return [] if len(query_vector) != self.index.d: raise ValueError("Query vector dimension does not match index dimension!") + dis_threshold = float(settings.dis_threshold) distances, indices = self.index.search(np.array([query_vector]), top_k) results = [] for dist, i in zip(distances[0], indices[0]): - if dist < dis_threshold: # Smaller distances indicate higher similarity + if dist < dis_threshold: # Smaller distances indicate higher similarity results.append(deepcopy(self.properties[i])) log.debug("[✓] Add valid distance %s to results.", dist) else: diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py index 07dc7706..5d245312 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py @@ -131,8 +131,8 @@ def query_graphdb( :return: Self-instance for chaining. """ self._operators.append( - GraphRAGQuery(max_deep=max_deep, max_items=max_items, max_v_prop_len=max_v_prop_len, - max_e_prop_len=max_e_prop_len, prop_to_match=prop_to_match) + GraphRAGQuery(max_deep=max_deep, max_v_prop_len=max_v_prop_len, max_e_prop_len=max_e_prop_len, + prop_to_match=prop_to_match) ) return self diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py index a2d113bc..aa8e27f2 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py @@ -71,8 +71,8 @@ class GraphRAGQuery: - def __init__(self, max_deep: int = 2, max_items: int = 20, max_v_prop_len: int = 2048, - max_e_prop_len: int = 256, prop_to_match: Optional[str] = None): + def __init__(self, max_deep: int = 2, max_v_prop_len: int = 2048, max_e_prop_len: int = 256, + prop_to_match: Optional[str] = None): self._client = PyHugeClient( settings.graph_ip, settings.graph_port, @@ -82,7 +82,7 @@ def __init__(self, max_deep: int = 2, max_items: int = 20, max_v_prop_len: int = settings.graph_space, ) self._max_deep = max_deep - self._max_items = max_items + self._max_items = settings.max_items self._prop_to_match = prop_to_match self._schema = "" self._limit_property = settings.limit_property.lower() == "true" @@ -105,7 +105,7 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: _, edge_labels = self._extract_labels_from_schema() edge_labels_str = ",".join("'" + label + "'" for label in edge_labels) # TODO: enhance the limit logic later - edge_limit_amount = len(edge_labels) * 10 + edge_limit_amount = len(edge_labels) * settings.edge_limit_pre_label use_id_to_match = self._prop_to_match is None if use_id_to_match: From 9d0d39d44fe8b4e1bdd43e5ee19c3ad24426c3c5 Mon Sep 17 00:00:00 2001 From: yc319 <58455269+yc319@users.noreply.github.com> Date: Tue, 10 Dec 2024 02:38:09 +0800 Subject: [PATCH 03/10] fix fix the bug of dis_threshold --- hugegraph-llm/src/hugegraph_llm/indices/vector_index.py | 3 +-- .../src/hugegraph_llm/operators/index_op/semantic_id_query.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py index 35b5f7c3..56eaaae8 100644 --- a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py +++ b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py @@ -86,14 +86,13 @@ def remove(self, props: Union[Set[Any], List[Any]]) -> int: self.properties = [p for i, p in enumerate(self.properties) if i not in indices] return remove_num - def search(self, query_vector: List[float], top_k: int) -> List[Any]: + def search(self, query_vector: List[float], top_k: int, dis_threshold: float = 0.9) -> List[Any]: if self.index.ntotal == 0: return [] if len(query_vector) != self.index.d: raise ValueError("Query vector dimension does not match index dimension!") - dis_threshold = float(settings.dis_threshold) distances, indices = self.index.search(np.array([query_vector]), top_k) results = [] for dist, i in zip(distances[0], indices[0]): diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py index f5dd8dee..e1e4fc49 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py @@ -75,7 +75,8 @@ def _fuzzy_match_vids(self, keywords: List[str]) -> List[str]: fuzzy_match_result = [] for keyword in keywords: keyword_vector = self.embedding.get_text_embedding(keyword) - results = self.vector_index.search(keyword_vector, top_k=self.topk_per_keyword) + results = self.vector_index.search(keyword_vector, top_k=self.topk_per_keyword, + dis_threshold=float(settings.dis_threshold)) if results: fuzzy_match_result.extend(results[:self.topk_per_keyword]) return fuzzy_match_result From c636073591972e1031b5ccfe9578f11880df6339 Mon Sep 17 00:00:00 2001 From: yc319 <58455269+yc319@users.noreply.github.com> Date: Tue, 10 Dec 2024 02:54:24 +0800 Subject: [PATCH 04/10] Revert "fix" This reverts commit 9d0d39d44fe8b4e1bdd43e5ee19c3ad24426c3c5. --- hugegraph-llm/src/hugegraph_llm/indices/vector_index.py | 3 ++- .../src/hugegraph_llm/operators/index_op/semantic_id_query.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py index 56eaaae8..35b5f7c3 100644 --- a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py +++ b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py @@ -86,13 +86,14 @@ def remove(self, props: Union[Set[Any], List[Any]]) -> int: self.properties = [p for i, p in enumerate(self.properties) if i not in indices] return remove_num - def search(self, query_vector: List[float], top_k: int, dis_threshold: float = 0.9) -> List[Any]: + def search(self, query_vector: List[float], top_k: int) -> List[Any]: if self.index.ntotal == 0: return [] if len(query_vector) != self.index.d: raise ValueError("Query vector dimension does not match index dimension!") + dis_threshold = float(settings.dis_threshold) distances, indices = self.index.search(np.array([query_vector]), top_k) results = [] for dist, i in zip(distances[0], indices[0]): diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py index e1e4fc49..f5dd8dee 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py @@ -75,8 +75,7 @@ def _fuzzy_match_vids(self, keywords: List[str]) -> List[str]: fuzzy_match_result = [] for keyword in keywords: keyword_vector = self.embedding.get_text_embedding(keyword) - results = self.vector_index.search(keyword_vector, top_k=self.topk_per_keyword, - dis_threshold=float(settings.dis_threshold)) + results = self.vector_index.search(keyword_vector, top_k=self.topk_per_keyword) if results: fuzzy_match_result.extend(results[:self.topk_per_keyword]) return fuzzy_match_result From 6c0a4ae8463004d43cdf4c84980ac72197ebc3f3 Mon Sep 17 00:00:00 2001 From: yc319 <58455269+yc319@users.noreply.github.com> Date: Tue, 10 Dec 2024 02:54:51 +0800 Subject: [PATCH 05/10] Reapply "fix" This reverts commit c636073591972e1031b5ccfe9578f11880df6339. --- hugegraph-llm/src/hugegraph_llm/indices/vector_index.py | 3 +-- .../src/hugegraph_llm/operators/index_op/semantic_id_query.py | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py index 35b5f7c3..56eaaae8 100644 --- a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py +++ b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py @@ -86,14 +86,13 @@ def remove(self, props: Union[Set[Any], List[Any]]) -> int: self.properties = [p for i, p in enumerate(self.properties) if i not in indices] return remove_num - def search(self, query_vector: List[float], top_k: int) -> List[Any]: + def search(self, query_vector: List[float], top_k: int, dis_threshold: float = 0.9) -> List[Any]: if self.index.ntotal == 0: return [] if len(query_vector) != self.index.d: raise ValueError("Query vector dimension does not match index dimension!") - dis_threshold = float(settings.dis_threshold) distances, indices = self.index.search(np.array([query_vector]), top_k) results = [] for dist, i in zip(distances[0], indices[0]): diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py index f5dd8dee..e1e4fc49 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py @@ -75,7 +75,8 @@ def _fuzzy_match_vids(self, keywords: List[str]) -> List[str]: fuzzy_match_result = [] for keyword in keywords: keyword_vector = self.embedding.get_text_embedding(keyword) - results = self.vector_index.search(keyword_vector, top_k=self.topk_per_keyword) + results = self.vector_index.search(keyword_vector, top_k=self.topk_per_keyword, + dis_threshold=float(settings.dis_threshold)) if results: fuzzy_match_result.extend(results[:self.topk_per_keyword]) return fuzzy_match_result From 333f77c267d4d05538633645fa835469ae15f4e9 Mon Sep 17 00:00:00 2001 From: imbajin Date: Tue, 10 Dec 2024 17:26:52 +0800 Subject: [PATCH 06/10] tiny fix & rename --- hugegraph-llm/src/hugegraph_llm/config/config_data.py | 4 ++-- .../src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py | 2 +- hugegraph-llm/src/hugegraph_llm/indices/vector_index.py | 2 +- .../hugegraph_llm/operators/hugegraph_op/graph_rag_query.py | 4 ++-- .../src/hugegraph_llm/operators/index_op/semantic_id_query.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/config/config_data.py b/hugegraph-llm/src/hugegraph_llm/config/config_data.py index 1ae20acc..62a3136a 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/config_data.py +++ b/hugegraph-llm/src/hugegraph_llm/config/config_data.py @@ -105,9 +105,9 @@ class ConfigData: graph_pwd: Optional[str] = "xxx" graph_space: Optional[str] = None limit_property: Optional[str] = "False" - max_items: Optional[int] = 10 + max_graph_path: Optional[int] = 10 edge_limit_pre_label: Optional[int] = 8 - dis_threshold: Optional[float] = 0.9 + vector_dis_threshold: Optional[float] = 0.9 """Admin settings""" enable_login: Optional[str] = "False" diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py index f2fb6fb3..797ec73f 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py @@ -62,7 +62,7 @@ def build_example_vector_index(temp_file) -> dict: def gremlin_generate(inp, example_num, schema, gremlin_prompt) -> tuple[str, str] | tuple[str, Any, Any, Any, Any]: generator = GremlinGenerator(llm=LLMs().get_text2gql_llm(), embedding=Embeddings().get_embedding()) - sm = SchemaManager(graph_name="schema") + sm = SchemaManager(graph_name=schema) short_schema = False if schema: diff --git a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py index 56eaaae8..7f93c3dd 100644 --- a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py +++ b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py @@ -24,7 +24,6 @@ import numpy as np from hugegraph_llm.utils.log import log -from hugegraph_llm.config import settings INDEX_FILE_NAME = "index.faiss" PROPERTIES_FILE_NAME = "properties.pkl" @@ -32,6 +31,7 @@ class VectorIndex: """Comment""" + def __init__(self, embed_dim: int = 1024): self.index = faiss.IndexFlatL2(embed_dim) self.properties = [] diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py index dbbabc07..9f97a049 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py @@ -95,7 +95,7 @@ def __init__( settings.graph_space, ) self._max_deep = max_deep - self._max_items = settings.max_items + self._max_items = settings.max_graph_path self._prop_to_match = prop_to_match self._schema = "" self._limit_property = settings.limit_property.lower() == "true" @@ -188,7 +188,7 @@ def _subgraph_query(self, context: Dict[str, Any]) -> Dict[str, Any]: vertex_knowledge = self._format_graph_from_vertex(query_result=vertexes) paths: List[Any] = [] - # TODO: 这里后续改为使用生成器 or 异步 asycnio 处理以提高性能 + # TODO: use generator or asyncio to speed up the query logic for matched_vid in matched_vids: gremlin_query = VID_QUERY_NEIGHBOR_TPL.format( keywords="'{}'".format(matched_vid), diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py index e1e4fc49..51a6769e 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py @@ -76,7 +76,7 @@ def _fuzzy_match_vids(self, keywords: List[str]) -> List[str]: for keyword in keywords: keyword_vector = self.embedding.get_text_embedding(keyword) results = self.vector_index.search(keyword_vector, top_k=self.topk_per_keyword, - dis_threshold=float(settings.dis_threshold)) + dis_threshold=float(settings.vector_dis_threshold)) if results: fuzzy_match_result.extend(results[:self.topk_per_keyword]) return fuzzy_match_result From f26627767cf147f0b4d1eb09da9d66e0bdcbef22 Mon Sep 17 00:00:00 2001 From: yc319 <58455269+yc319@users.noreply.github.com> Date: Tue, 10 Dec 2024 19:09:42 +0800 Subject: [PATCH 07/10] fix keep the param max_items in class GraphRAGQuery init function --- hugegraph-llm/src/hugegraph_llm/config/config_data.py | 1 + .../hugegraph_llm/operators/hugegraph_op/graph_rag_query.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hugegraph-llm/src/hugegraph_llm/config/config_data.py b/hugegraph-llm/src/hugegraph_llm/config/config_data.py index 62a3136a..a3d887ed 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/config_data.py +++ b/hugegraph-llm/src/hugegraph_llm/config/config_data.py @@ -106,6 +106,7 @@ class ConfigData: graph_space: Optional[str] = None limit_property: Optional[str] = "False" max_graph_path: Optional[int] = 10 + max_items: Optional[int] = 30 edge_limit_pre_label: Optional[int] = 8 vector_dis_threshold: Optional[float] = 0.9 diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py index 9f97a049..c2707c89 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py @@ -78,6 +78,7 @@ class GraphRAGQuery: def __init__( self, max_deep: int = 2, + max_items: int = int(settings.max_items), prop_to_match: Optional[str] = None, with_gremlin_template: bool = True, llm: Optional[BaseLLM] = None, @@ -95,7 +96,7 @@ def __init__( settings.graph_space, ) self._max_deep = max_deep - self._max_items = settings.max_graph_path + self._max_items = max_items self._prop_to_match = prop_to_match self._schema = "" self._limit_property = settings.limit_property.lower() == "true" From b32c7a430592f91a17b91d7a891976121edc17f1 Mon Sep 17 00:00:00 2001 From: imbajin Date: Tue, 10 Dec 2024 22:40:23 +0800 Subject: [PATCH 08/10] unify params --- hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py | 2 +- .../src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py index 8e8bd460..8f5f81d6 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py @@ -137,7 +137,7 @@ def query_graphdb( :return: Self-instance for chaining. """ self._operators.append( - GraphRAGQuery(max_deep=max_deep, max_v_prop_len=max_v_prop_len, + GraphRAGQuery(max_deep=max_deep, max_items=max_items, max_v_prop_len=max_v_prop_len, max_e_prop_len=max_e_prop_len, prop_to_match=prop_to_match, with_gremlin_template=with_gremlin_template) ) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py index c2707c89..2f5291a6 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py @@ -80,11 +80,11 @@ def __init__( max_deep: int = 2, max_items: int = int(settings.max_items), prop_to_match: Optional[str] = None, - with_gremlin_template: bool = True, llm: Optional[BaseLLM] = None, embedding: Optional[BaseEmbedding] = None, max_v_prop_len: int = 2048, max_e_prop_len: int = 256, + with_gremlin_template: bool = True, num_gremlin_generate_example: int = 1 ): self._client = PyHugeClient( From 77d92f19975ff522f60c795ebd41f5aca71d133b Mon Sep 17 00:00:00 2001 From: yc319 <58455269+yc319@users.noreply.github.com> Date: Fri, 20 Dec 2024 20:23:25 +0800 Subject: [PATCH 09/10] acg add graph rag query only for acg --- .../hugegraph_llm/operators/graph_rag_task.py | 52 ++- .../hugegraph_op/graph_rag_query_acg.py | 406 ++++++++++++++++++ 2 files changed, 436 insertions(+), 22 deletions(-) create mode 100644 hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query_acg.py diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py index 8f5f81d6..7dfd6f10 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py @@ -17,7 +17,7 @@ from typing import Dict, Any, Optional, List, Literal - +from hugegraph_llm.config import settings from hugegraph_llm.models.embeddings.base import BaseEmbedding from hugegraph_llm.models.embeddings.init_embedding import Embeddings from hugegraph_llm.models.llms.base import BaseLLM @@ -26,6 +26,7 @@ from hugegraph_llm.operators.common_op.print_result import PrintResult from hugegraph_llm.operators.document_op.word_extract import WordExtract from hugegraph_llm.operators.hugegraph_op.graph_rag_query import GraphRAGQuery +from hugegraph_llm.operators.hugegraph_op.graph_rag_query_acg import GraphRAGACGQuery from hugegraph_llm.operators.hugegraph_op.schema_manager import SchemaManager from hugegraph_llm.operators.index_op.semantic_id_query import SemanticIdQuery from hugegraph_llm.operators.index_op.vector_index_query import VectorIndexQuery @@ -95,10 +96,10 @@ def import_schema(self, graph_name: str): return self def keywords_to_vid( - self, - by: Literal["query", "keywords"] = "keywords", - topk_per_keyword: int = 1, - topk_per_query: int = 10, + self, + by: Literal["query", "keywords"] = "keywords", + topk_per_keyword: int = 1, + topk_per_query: int = 10, ): """ Add a semantic ID query operator to the pipeline. @@ -118,13 +119,13 @@ def keywords_to_vid( return self def query_graphdb( - self, - max_deep: int = 2, - max_items: int = 30, - max_v_prop_len: int = 2048, - max_e_prop_len: int = 256, - prop_to_match: Optional[str] = None, - with_gremlin_template: bool = True, + self, + max_deep: int = 2, + max_items: int = 30, + max_v_prop_len: int = 2048, + max_e_prop_len: int = 256, + prop_to_match: Optional[str] = None, + with_gremlin_template: bool = True, ): """ Add a graph RAG query operator to the pipeline. @@ -136,11 +137,18 @@ def query_graphdb( :param prop_to_match: Property to match in the graph. :return: Self-instance for chaining. """ - self._operators.append( - GraphRAGQuery(max_deep=max_deep, max_items=max_items, max_v_prop_len=max_v_prop_len, - max_e_prop_len=max_e_prop_len, prop_to_match=prop_to_match, - with_gremlin_template=with_gremlin_template) - ) + if settings.graph_space == "acgraggs": + self._operators.append( + GraphRAGACGQuery(max_deep=max_deep, max_v_prop_len=max_v_prop_len, + max_e_prop_len=max_e_prop_len, prop_to_match=prop_to_match, + with_gremlin_template=with_gremlin_template) + ) + else: + self._operators.append( + GraphRAGQuery(max_deep=max_deep, max_items=max_items, max_v_prop_len=max_v_prop_len, + max_e_prop_len=max_e_prop_len, prop_to_match=prop_to_match, + with_gremlin_template=with_gremlin_template) + ) return self def query_vector_index(self, max_items: int = 3): @@ -156,11 +164,11 @@ def query_vector_index(self, max_items: int = 3): return self def merge_dedup_rerank( - self, - graph_ratio: float = 0.5, - rerank_method: Literal["bleu", "reranker"] = "bleu", - near_neighbor_first: bool = False, - custom_related_information: str = "", + self, + graph_ratio: float = 0.5, + rerank_method: Literal["bleu", "reranker"] = "bleu", + near_neighbor_first: bool = False, + custom_related_information: str = "", ): """ Add a merge, deduplication, and rerank operator to the pipeline. diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query_acg.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query_acg.py new file mode 100644 index 00000000..7de8f814 --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query_acg.py @@ -0,0 +1,406 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import requests +from typing import Any, Dict, Optional, List, Set, Tuple + +from hugegraph_llm.config import settings +from hugegraph_llm.models.embeddings.base import BaseEmbedding +from hugegraph_llm.models.llms.base import BaseLLM +from hugegraph_llm.operators.gremlin_generate_task import GremlinGenerator +from hugegraph_llm.utils.log import log +from pyhugegraph.client import PyHugeClient + +# # TODO: remove 'as('subj)' step +VERTEX_QUERY_TPL = "g.V({keywords}).toList()" + +# TODO: we could use a simpler query (like kneighbor-api to get the edges) +# TODO: test with profile()/explain() to speed up the query +VID_QUERY_MODULE_STATION_TPL = """\ +g.V({keywords}).bothE({edge_labels}).otherV().dedup() +.simplePath() +.path() +.by(project('label', 'id', 'props') + .by(label()) + .by(id()) + .by(valueMap().by(unfold())) +).by(project('label', 'inV', 'outV', 'props') + .by(label()) + .by(inV().id()) + .by(outV().id()) + .by(valueMap().by(unfold())) +) +.toList() +""" + + +def get_paths_vertex_id(sources, targets, depth=2, capacity=100, limit=100): + log.debug(f"Get_Paths: {sources}, {targets}") + graph_ip = settings.graph_ip + graph_port = settings.graph_port + graph_space = settings.graph_space + graph_name = settings.graph_name + # 定义请求 URL 和头部 + url = f'http://{graph_ip}:{graph_port}/graphspaces/{graph_space}/graphs/{graph_name}/traversers/paths' + headers = { + "Content-Type": "application/json", + "Authorization": "Basic YWRtaW46UzMjcmQ2KHNnIQ==", + "Connection": "close" + } + data = { + "sources": { + "ids": sources + }, + "targets": { + "ids": targets + }, + "step": { + "direction": "BOTH" + }, + "max_depth": depth, + "capacity": capacity, + "limit": limit, + "with_vertex": True + } + log.debug(f"json: {json.dumps(data)}") + response = requests.post(url, headers=headers, data=json.dumps(data, ensure_ascii=False)) + vertex_id = [] + if response.status_code != 200: + log.error(f"Get_Paths Error: {response.status_code}") + return vertex_id + log.debug(f"Get_Paths Response: {response.json()}") + for vertex_info in response.json()["vertices"]: + vertex_id.append(vertex_info["id"]) + return vertex_id + + +class GraphRAGACGQuery: + def __init__( + self, + max_deep: int = 2, + prop_to_match: Optional[str] = None, + llm: Optional[BaseLLM] = None, + embedding: Optional[BaseEmbedding] = None, + max_v_prop_len: int = 2048, + max_e_prop_len: int = 256, + with_gremlin_template: bool = True, + num_gremlin_generate_example: int = 1 + ): + self._client = PyHugeClient( + settings.graph_ip, + settings.graph_port, + settings.graph_name, + settings.graph_user, + settings.graph_pwd, + settings.graph_space, + ) + self._max_deep = max_deep + self._prop_to_match = prop_to_match + self._schema = "" + self._limit_property = settings.limit_property.lower() == "true" + self._max_v_prop_len = max_v_prop_len + self._max_e_prop_len = max_e_prop_len + self._gremlin_generator = GremlinGenerator( + llm=llm, + embedding=embedding, + ) + self._num_gremlin_generate_example = num_gremlin_generate_example + self._with_gremlin_template = with_gremlin_template + + def run(self, context: Dict[str, Any]) -> Dict[str, Any]: + self._init_client(context) + + # initial flag: -1 means no result, 0 means subgraph query, 1 means gremlin query + context["graph_result_flag"] = -1 + # 1. Try to perform a query based on the generated gremlin + context = self._gremlin_generate_query(context) + # 2. Try to perform a query based on subgraph-search if the previous query failed + if not context.get("graph_result"): + context = self._subgraph_query(context) + + if context.get("graph_result"): + log.debug("Knowledge from Graph:\n%s", "\n".join(context["graph_result"])) + else: + log.debug("No Knowledge Extracted from Graph") + return context + + def _gremlin_generate_query(self, context: Dict[str, Any]) -> Dict[str, Any]: + query = context["query"] + vertices = context.get("match_vids") + query_embedding = context.get("query_embedding") + + self._gremlin_generator.clear() + self._gremlin_generator.example_index_query(num_examples=self._num_gremlin_generate_example) + gremlin_response = self._gremlin_generator.gremlin_generate_synthesize( + context["simple_schema"], + vertices=vertices, + ).run( + query=query, + query_embedding=query_embedding + ) + if self._with_gremlin_template: + gremlin = gremlin_response["result"] + else: + gremlin = gremlin_response["raw_result"] + log.info("Generated gremlin: %s", gremlin) + context["gremlin"] = gremlin + try: + result = self._client.gremlin().exec(gremlin=gremlin)["data"] + if result == [None]: + result = [] + context["graph_result"] = [json.dumps(item, ensure_ascii=False) for item in result] + if context["graph_result"]: + context["graph_result_flag"] = 1 + context["graph_context_head"] = ( + f"The following are graph query result " + f"from gremlin query `{gremlin}`.\n" + ) + except Exception as e: # pylint: disable=broad-except + log.error(e) + context["graph_result"] = "" + return context + + def _subgraph_query(self, context: Dict[str, Any]) -> Dict[str, Any]: + # 1. Extract params from context + matched_vids = context.get("match_vids") + # 2. Extract edge_labels from graph schema + _, edge_labels = self._extract_labels_from_schema() + edge_labels_str = ",".join("'" + label + "'" for label in edge_labels) + + if not matched_vids: + return context + + gremlin_query = VERTEX_QUERY_TPL.format(keywords=matched_vids) + vertexes = self._client.gremlin().exec(gremlin=gremlin_query)["data"] + log.debug("Vids gremlin query: %s", gremlin_query) + vertex_knowledge = self._format_graph_from_vertex(query_result=vertexes) + + paths: List[Any] = [] + module_set: Set[str] = set() + concept_knowledge_set: Set[str] = set() + # TODO: use generator or asyncio to speed up the query logic + # 根据关键词,匹配模块、知识、概念类型的节点,忽略机房类型的节点; + for matched_vid in matched_vids: + if matched_vid[0] == "1": + # 对每个模块类型的节点,gremlin查找相关机房的节点信息和边信息; + gremlin_query = VID_QUERY_MODULE_STATION_TPL.format( + keywords="'{}'".format(matched_vid), + edge_labels=edge_labels_str, + ) + log.debug("Kneighbor gremlin query: %s", gremlin_query) + paths.extend(self._client.gremlin().exec(gremlin=gremlin_query)["data"]) + module_set.add(matched_vid) + elif matched_vid[0] == "3" or matched_vid[0] == "4": + concept_knowledge_set.add(matched_vid) + + graph_chain_knowledge, vertex_degree_list, knowledge_with_degree = self._format_graph_query_result( + query_paths=paths + ) + # 调用Paths API 高级版,source是模块类型节点集合,target是知识和概念类型节点集合,step为2,获取所有路径所有节点,并去重取集合; + paths_vertex_ids = get_paths_vertex_id(sources=list(module_set), targets=list(concept_knowledge_set)) + paths_gremlin_query = VERTEX_QUERY_TPL.format(keywords=paths_vertex_ids) + paths_vertexes = self._client.gremlin().exec(gremlin=paths_gremlin_query)["data"] + paths_vertex_knowledge = self._format_graph_from_vertex(query_result=paths_vertexes) + graph_chain_knowledge.update(paths_vertex_knowledge) + if vertex_degree_list: + vertex_degree_list[0].update(vertex_knowledge) + else: + vertex_degree_list.append(vertex_knowledge) + + # TODO: we may need to optimize the logic here with global deduplication (may lack some single vertex) + if not graph_chain_knowledge: + graph_chain_knowledge.update(vertex_knowledge) + if vertex_degree_list: + vertex_degree_list[0].update(vertex_knowledge) + else: + vertex_degree_list.append(vertex_knowledge) + + context["graph_result"] = list(graph_chain_knowledge) + if context["graph_result"]: + context["graph_result_flag"] = 0 + context["vertex_degree_list"] = [list(vertex_degree) for vertex_degree in vertex_degree_list] + context["knowledge_with_degree"] = knowledge_with_degree + context["graph_context_head"] = ( + f"The following are graph knowledge in {self._max_deep} depth, e.g:\n" + "`vertexA--[links]-->vertexB<--[links]--vertexC ...`" + "extracted based on key entities as subject:\n" + ) + return context + + def _init_client(self, context): + # pylint: disable=R0915 (too-many-statements) + if self._client is None: + if isinstance(context.get("graph_client"), PyHugeClient): + self._client = context["graph_client"] + else: + ip = context.get("ip") or "localhost" + port = context.get("port") or "8080" + graph = context.get("graph") or "hugegraph" + user = context.get("user") or "admin" + pwd = context.get("pwd") or "admin" + gs = context.get("graphspace") or None + self._client = PyHugeClient(ip, port, graph, user, pwd, gs) + assert self._client is not None, "No valid graph to search." + + def _format_graph_from_vertex(self, query_result: List[Any]) -> Set[str]: + knowledge = set() + for item in query_result: + props_str = ", ".join(f"{k}: {v}" for k, v in item["properties"].items()) + node_str = f"{item['id']}{{{props_str}}}" + knowledge.add(node_str) + return knowledge + + def _format_graph_query_result(self, query_paths) -> Tuple[Set[str], List[Set[str]], Dict[str, List[str]]]: + use_id_to_match = self._prop_to_match is None + subgraph = set() + subgraph_with_degree = {} + vertex_degree_list: List[Set[str]] = [] + v_cache: Set[str] = set() + e_cache: Set[Tuple[str, str, str]] = set() + + for path in query_paths: + # 1. Process each path + path_str, vertex_with_degree = self._process_path(path, use_id_to_match, v_cache, e_cache) + subgraph.add(path_str) + subgraph_with_degree[path_str] = vertex_with_degree + # 2. Update vertex degree list + self._update_vertex_degree_list(vertex_degree_list, vertex_with_degree) + + return subgraph, vertex_degree_list, subgraph_with_degree + + def _process_path(self, path: Any, use_id_to_match: bool, v_cache: Set[str], + e_cache: Set[Tuple[str, str, str]]) -> Tuple[str, List[str]]: + flat_rel = "" + raw_flat_rel = path["objects"] + + assert len(raw_flat_rel) % 2 == 1, "The length of raw_flat_rel should be odd." + + node_cache = set() + prior_edge_str_len = 0 + depth = 0 + nodes_with_degree = [] + + for i, item in enumerate(raw_flat_rel): + if i % 2 == 0: + # Process each vertex + flat_rel, prior_edge_str_len, depth = self._process_vertex( + item, flat_rel, node_cache, prior_edge_str_len, depth, nodes_with_degree, use_id_to_match, + v_cache + ) + else: + # Process each edge + flat_rel, prior_edge_str_len = self._process_edge( + item, flat_rel, raw_flat_rel, i, use_id_to_match, e_cache + ) + + return flat_rel, nodes_with_degree + + def _process_vertex(self, item: Any, flat_rel: str, node_cache: Set[str], + prior_edge_str_len: int, depth: int, nodes_with_degree: List[str], + use_id_to_match: bool, v_cache: Set[str]) -> Tuple[str, int, int]: + matched_str = item["id"] if use_id_to_match else item["props"][self._prop_to_match] + if matched_str in node_cache: + flat_rel = flat_rel[:-prior_edge_str_len] + return flat_rel, prior_edge_str_len, depth + + node_cache.add(matched_str) + props_str = ", ".join(f"{k}: {self._limit_property_query(v, 'v')}" + for k, v in item["props"].items() if v) + + # TODO: we may remove label id or replace with label name + if matched_str in v_cache: + node_str = matched_str + else: + v_cache.add(matched_str) + node_str = f"{item['id']}{{{props_str}}}" + + flat_rel += node_str + nodes_with_degree.append(node_str) + depth += 1 + return flat_rel, prior_edge_str_len, depth + + def _process_edge(self, item: Any, path_str: str, raw_flat_rel: List[Any], i: int, use_id_to_match: bool, + e_cache: Set[Tuple[str, str, str]]) -> Tuple[str, int]: + props_str = ", ".join(f"{k}: {self._limit_property_query(v, 'e')}" + for k, v in item["props"].items() if v) + props_str = f"{{{props_str}}}" if props_str else "" + prev_matched_str = raw_flat_rel[i - 1]["id"] if use_id_to_match else ( + raw_flat_rel)[i - 1]["props"][self._prop_to_match] + + edge_key = (item['inV'], item['label'], item['outV']) + if edge_key not in e_cache: + e_cache.add(edge_key) + edge_label = f"{item['label']}{props_str}" + else: + edge_label = item['label'] + + edge_str = f"--[{edge_label}]-->" if item["outV"] == prev_matched_str else f"<--[{edge_label}]--" + path_str += edge_str + prior_edge_str_len = len(edge_str) + return path_str, prior_edge_str_len + + def _update_vertex_degree_list(self, vertex_degree_list: List[Set[str]], nodes_with_degree: List[str]) -> None: + for depth, node_str in enumerate(nodes_with_degree): + if depth >= len(vertex_degree_list): + vertex_degree_list.append(set()) + vertex_degree_list[depth].add(node_str) + + def _extract_labels_from_schema(self) -> Tuple[List[str], List[str]]: + schema = self._get_graph_schema() + vertex_props_str, edge_props_str = schema.split("\n")[:2] + # TODO: rename to vertex (also need update in the schema) + vertex_props_str = vertex_props_str[len("Vertex properties: "):].strip("[").strip("]") + edge_props_str = edge_props_str[len("Edge properties: "):].strip("[").strip("]") + vertex_labels = self._extract_label_names(vertex_props_str) + edge_labels = self._extract_label_names(edge_props_str) + return vertex_labels, edge_labels + + @staticmethod + def _extract_label_names(source: str, head: str = "name: ", tail: str = ", ") -> List[str]: + result = [] + for s in source.split(head): + end = s.find(tail) + label = s[:end] + if label: + result.append(label) + return result + + def _get_graph_schema(self, refresh: bool = False) -> str: + if self._schema and not refresh: + return self._schema + + schema = self._client.schema() + vertex_schema = schema.getVertexLabels() + edge_schema = schema.getEdgeLabels() + relationships = schema.getRelations() + + self._schema = ( + f"Vertex properties: {vertex_schema}\n" + f"Edge properties: {edge_schema}\n" + f"Relationships: {relationships}\n" + ) + log.debug("Link(Relation): %s", relationships) + return self._schema + + def _limit_property_query(self, value: Optional[str], item_type: str) -> Optional[str]: + # NOTE: we skip the filter for list/set type (e.g., list of string, add it if needed) + if not self._limit_property or not isinstance(value, str): + return value + + max_len = self._max_v_prop_len if item_type == "v" else self._max_e_prop_len + return value[:max_len] if value else value From a82dd548acac54e13bb75d686b0a54c7b97b8111 Mon Sep 17 00:00:00 2001 From: imbajin Date: Mon, 23 Dec 2024 12:32:00 +0800 Subject: [PATCH 10/10] fix settings error --- .../hugegraph_llm/operators/graph_rag_task.py | 14 +++++++---- .../hugegraph_op/graph_rag_query_acg.py | 24 +++++++++---------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py index 9e05de88..836af528 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py @@ -17,7 +17,7 @@ from typing import Dict, Any, Optional, List, Literal -from hugegraph_llm.config import settings +from hugegraph_llm.config import huge_settings from hugegraph_llm.models.embeddings.base import BaseEmbedding from hugegraph_llm.models.embeddings.init_embedding import Embeddings from hugegraph_llm.models.llms.base import BaseLLM @@ -140,11 +140,15 @@ def query_graphdb( :param prop_to_match: Property to match in the graph. :return: Self-instance for chaining. """ - if settings.graph_space == "acgraggs": + if huge_settings.graph_space == "acgraggs": self._operators.append( - GraphRAGACGQuery(max_deep=max_deep, max_v_prop_len=max_v_prop_len, - max_e_prop_len=max_e_prop_len, prop_to_match=prop_to_match, - with_gremlin_template=with_gremlin_template) + GraphRAGACGQuery( + max_deep=max_deep, + max_v_prop_len=max_v_prop_len, + max_e_prop_len=max_e_prop_len, + prop_to_match=prop_to_match, + with_gremlin_template=with_gremlin_template + ) ) else: self._operators.append( diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query_acg.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query_acg.py index 7de8f814..bd428952 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query_acg.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query_acg.py @@ -19,7 +19,7 @@ import requests from typing import Any, Dict, Optional, List, Set, Tuple -from hugegraph_llm.config import settings +from hugegraph_llm.config import huge_settings from hugegraph_llm.models.embeddings.base import BaseEmbedding from hugegraph_llm.models.llms.base import BaseLLM from hugegraph_llm.operators.gremlin_generate_task import GremlinGenerator @@ -51,10 +51,10 @@ def get_paths_vertex_id(sources, targets, depth=2, capacity=100, limit=100): log.debug(f"Get_Paths: {sources}, {targets}") - graph_ip = settings.graph_ip - graph_port = settings.graph_port - graph_space = settings.graph_space - graph_name = settings.graph_name + graph_ip = huge_settings.graph_ip + graph_port = huge_settings.graph_port + graph_space = huge_settings.graph_space + graph_name = huge_settings.graph_name # 定义请求 URL 和头部 url = f'http://{graph_ip}:{graph_port}/graphspaces/{graph_space}/graphs/{graph_name}/traversers/paths' headers = { @@ -102,17 +102,17 @@ def __init__( num_gremlin_generate_example: int = 1 ): self._client = PyHugeClient( - settings.graph_ip, - settings.graph_port, - settings.graph_name, - settings.graph_user, - settings.graph_pwd, - settings.graph_space, + huge_settings.graph_ip, + huge_settings.graph_port, + huge_settings.graph_name, + huge_settings.graph_user, + huge_settings.graph_pwd, + huge_settings.graph_space, ) self._max_deep = max_deep self._prop_to_match = prop_to_match self._schema = "" - self._limit_property = settings.limit_property.lower() == "true" + self._limit_property = huge_settings.limit_property.lower() == "true" self._max_v_prop_len = max_v_prop_len self._max_e_prop_len = max_e_prop_len self._gremlin_generator = GremlinGenerator(