refact(client): use rich handle for log (#111)

* refact(client): use rich handle for log Update requirements.txt refactor log usage * Update requirements.txt * fix: some style problems - pylint
apache · Nov 13, 2024 · e3d25fd · e3d25fd
1 parent 0f609ba
commit e3d25fd
Show file tree

Hide file tree

Showing 10 changed files with 213 additions and 133 deletions.
diff --git a/hugegraph-llm/requirements.txt b/hugegraph-llm/requirements.txt
@@ -4,7 +4,7 @@ qianfan~=0.3.18
 retry~=0.9.2
 tiktoken>=0.7.0
 nltk~=3.8.1
-gradio~=4.43.0
+gradio~=4.44.1
 jieba>=0.42.1
 numpy~=1.24.4
 python-docx~=1.1.2

diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
@@ -56,9 +56,9 @@ def authenticate(credentials: HTTPAuthorizationCredentials = Depends(sec)):
 
 def init_rag_ui() -> gr.Interface:
     with gr.Blocks(
-        theme="default",
-        title="HugeGraph RAG Platform",
-        css=CSS,
+            theme="default",
+            title="HugeGraph RAG Platform",
+            css=CSS,
     ) as hugegraph_llm_ui:
         gr.Markdown("# HugeGraph LLM RAG Demo")
 
@@ -84,7 +84,6 @@ def init_rag_ui() -> gr.Interface:
          = else if settings.reranker_type == siliconflow [settings.reranker_api_key, "BAAI/bge-reranker-v2-m3", ""]
          = else ["","",""]
         """
-
 
         textbox_array_graph_config = create_configs_block()
 
@@ -94,7 +93,6 @@ def init_rag_ui() -> gr.Interface:
             textbox_inp, textbox_answer_prompt_input = create_rag_block()
         with gr.Tab(label="3. Others Tools 🚧"):
             create_other_block()
-
 
         def refresh_ui_config_prompt() -> tuple:
             settings.from_env()
@@ -105,7 +103,6 @@ def refresh_ui_config_prompt() -> tuple:
                 prompt.default_question, prompt.answer_prompt
             )
 
-
         hugegraph_llm_ui.load(fn=refresh_ui_config_prompt, outputs=[
             textbox_array_graph_config[0],
             textbox_array_graph_config[1],

diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
@@ -105,10 +105,10 @@ def create_rag_block():
             )
         with gr.Column(scale=1):
             with gr.Row():
-                raw_radio = gr.Radio(choices=[True, False], value=True, label="Basic LLM Answer")
+                raw_radio = gr.Radio(choices=[True, False], value=False, label="Basic LLM Answer")
                 vector_only_radio = gr.Radio(choices=[True, False], value=False, label="Vector-only Answer")
             with gr.Row():
-                graph_only_radio = gr.Radio(choices=[True, False], value=False, label="Graph-only Answer")
+                graph_only_radio = gr.Radio(choices=[True, False], value=True, label="Graph-only Answer")
                 graph_vector_radio = gr.Radio(choices=[True, False], value=False, label="Graph-Vector Answer")
 
             def toggle_slider(enable):
@@ -265,4 +265,4 @@ def several_rag_answer(
     )
     questions_file.change(read_file_to_excel, questions_file, [qa_dataframe, answer_max_line_count])
     answer_max_line_count.change(change_showing_excel, answer_max_line_count, qa_dataframe)
-    return inp, answer_prompt_input
+    return inp, answer_prompt_input
diff --git a/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py b/hugegraph-llm/src/hugegraph_llm/indices/vector_index.py
@@ -97,8 +97,9 @@ def search(self, query_vector: List[float], top_k: int, dis_threshold: float = 0
         for dist, i in zip(distances[0], indices[0]):
             if dist < dis_threshold: # Smaller distances indicate higher similarity
                 results.append(deepcopy(self.properties[i]))
+                log.debug("[✓] Add valid distance %s to results.", dist)
             else:
-                log.debug("Distance %s is larger than threshold %s, ignore this result.", dist, dis_threshold)
+                log.debug("[x] Distance %s ≥ threshold %s, ignore this result.", dist, dis_threshold)
         return results
 
     @staticmethod

diff --git a/hugegraph-llm/src/hugegraph_llm/operators/document_op/chunk_split.py b/hugegraph-llm/src/hugegraph_llm/operators/document_op/chunk_split.py
@@ -43,24 +43,22 @@ def __init__(
     def _get_separators(self, language: str) -> List[str]:
         if language == LANGUAGE_ZH:
             return ["\n\n", "\n", "。", "，", ""]
-        elif language == LANGUAGE_EN:
+        if language == LANGUAGE_EN:
             return ["\n\n", "\n", ".", ",", " ", ""]
-        else:
-            raise ValueError("language must be zh or en")
+        raise ValueError("language must be zh or en")
 
     def _get_text_splitter(self, split_type: str):
         if split_type == SPLIT_TYPE_DOCUMENT:
             return lambda text: [text]
-        elif split_type == SPLIT_TYPE_PARAGRAPH:
+        if split_type == SPLIT_TYPE_PARAGRAPH:
             return RecursiveCharacterTextSplitter(
                 chunk_size=500, chunk_overlap=30, separators=self.separators
             ).split_text
-        elif split_type == SPLIT_TYPE_SENTENCE:
+        if split_type == SPLIT_TYPE_SENTENCE:
             return RecursiveCharacterTextSplitter(
                 chunk_size=50, chunk_overlap=0, separators=self.separators
             ).split_text
-        else:
-            raise ValueError("Type must be paragraph, sentence, html or markdown")
+        raise ValueError("Type must be paragraph, sentence, html or markdown")
 
     def run(self, context: Optional[Dict[str, Any]]) -> Dict[str, Any]:
         all_chunks = []

diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
@@ -248,7 +248,8 @@ def _process_edge(self, item: Any, flat_rel: str, prior_edge_str_len: int,
                       raw_flat_rel: List[Any], i: int, use_id_to_match: bool) -> Tuple[str, int]:
         props_str = ", ".join(f"{k}: {v}" for k, v in item["props"].items())
         props_str = f"{{{props_str}}}" if len(props_str) > 0 else ""
-        prev_matched_str = raw_flat_rel[i - 1]["id"] if use_id_to_match else raw_flat_rel[i - 1]["props"][self._prop_to_match]
+        prev_matched_str = raw_flat_rel[i - 1]["id"] if use_id_to_match else (
+            raw_flat_rel)[i - 1]["props"][self._prop_to_match]
 
         if item["outV"] == prev_matched_str:
             edge_str = f" --[{item['label']}{props_str}]--> "

diff --git a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
@@ -20,11 +20,11 @@
 from copy import deepcopy
 from typing import Dict, Any, Literal, List, Tuple
 
-from hugegraph_llm.utils.log import log
-from pyhugegraph.client import PyHugeClient
 from hugegraph_llm.config import resource_path, settings
 from hugegraph_llm.indices.vector_index import VectorIndex
 from hugegraph_llm.models.embeddings.base import BaseEmbedding
+from hugegraph_llm.utils.log import log
+from pyhugegraph.client import PyHugeClient
 
 
 class SemanticIdQuery:

diff --git a/hugegraph-llm/src/hugegraph_llm/utils/log.py b/hugegraph-llm/src/hugegraph_llm/utils/log.py
@@ -14,59 +14,17 @@
 #  limitations under the License.
 
 import logging
-from logging.handlers import TimedRotatingFileHandler
 import os
 
-# TODO: unify the log format in the project (include gradle(fastapi) frame)
-
-# Set log format
-LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
-DATE_FORMAT = "%Y-%m-%d %H:%M:%S %p"
+from pyhugegraph.utils import log
 
+# TODO: unify the log format in the project (include gradle(fastapi) frame)
 # Configure log file path and maximum size
 LOG_DIR = "logs"
 if not os.path.exists(LOG_DIR):
     os.makedirs(LOG_DIR)
 LOG_FILE = os.path.join(LOG_DIR, "llm-server.log")
 
 # Create a logger
-log = logging.getLogger("llm_app")
-log.setLevel(logging.DEBUG)
-
-# Create a handler for writing to log file
-file_handler = TimedRotatingFileHandler(LOG_FILE, when='midnight', interval=1,
-                                        backupCount=7, encoding='utf-8')
-file_handler.setLevel(logging.DEBUG)
-file_handler.setFormatter(logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT))
-# Add the handler, and we could use 'log.Info(xxx)' in other files
-log.addHandler(file_handler)
-
-
-# ANSI escape sequences for colors
-class CustomConsoleHandler(logging.StreamHandler):
-    COLORS = {
-        "DEBUG": "\033[0;37m",  # White
-        "INFO": "\033[0;32m",  # Green
-        "WARNING": "\033[0;33m",  # Yellow
-        "ERROR": "\033[0;31m",  # Red
-        "CRITICAL": "\033[0;41m"  # Red background
-    }
-
-    def emit(self, record):
-        try:
-            msg = self.format(record)
-            level = record.levelname
-            color_prefix = self.COLORS.get(level, "\033[0;37m")  # Default to white
-            color_suffix = "\033[0m"  # Reset to default
-            stream = self.stream
-            stream.write(color_prefix + msg + color_suffix + self.terminator)
-            self.flush()
-        except Exception:  # pylint: disable=broad-exception-caught
-            self.handleError(record)
-
-
-# Also output logs to the console, we could add a StreamHandler here (Optional)
-custom_handler = CustomConsoleHandler()  # console_handler = logging.StreamHandler()
-custom_handler.setLevel(logging.DEBUG)
-custom_handler.setFormatter(logging.Formatter(LOG_FORMAT, datefmt=DATE_FORMAT))
-log.addHandler(custom_handler)
+log = log.init_logger(log_output=LOG_FILE, log_level=logging.DEBUG, logger_name="rag",
+                      max_log_size=20 * 1024 * 1024)
diff --git a/hugegraph-python-client/requirements.txt b/hugegraph-python-client/requirements.txt
@@ -1,4 +1,5 @@
-decorator==5.1.1
-requests==2.32.0
-setuptools==70.0.0
-urllib3==2.2.2
+decorator~=5.1.1
+requests~=2.32.0
+setuptools~=70.0.0
+urllib3~=2.2.2
+rich~=13.9.4