From 60312bf702e953659a97405fbf5c7675fe30920e Mon Sep 17 00:00:00 2001 From: Laurent Sorber Date: Tue, 3 Dec 2024 20:56:03 +0100 Subject: [PATCH] fix: filter spurious llama.cpp output --- src/raglite/_litellm.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/raglite/_litellm.py b/src/raglite/_litellm.py index e8b338a..13114b4 100644 --- a/src/raglite/_litellm.py +++ b/src/raglite/_litellm.py @@ -1,10 +1,13 @@ """Add support for llama-cpp-python models to LiteLLM.""" import asyncio +import contextlib import logging +import os import warnings from collections.abc import AsyncIterator, Callable, Iterator from functools import cache +from io import StringIO from typing import Any, ClassVar, cast import httpx @@ -28,7 +31,8 @@ from raglite._config import RAGLiteConfig # Reduce the logging level for LiteLLM and flashrank. -logging.getLogger("litellm").setLevel(logging.WARNING) +os.environ["LITELLM_LOG"] = "WARNING" +logging.getLogger("LiteLLM").setLevel(logging.WARNING) logging.getLogger("flashrank").setLevel(logging.WARNING) @@ -96,7 +100,10 @@ def llm(model: str, **kwargs: Any) -> Llama: filename, n_ctx_str = filename_n_ctx n_ctx = int(n_ctx_str) # Load the LLM. - with warnings.catch_warnings(): # Filter huggingface_hub warning about HF_TOKEN. + with ( + contextlib.redirect_stderr(StringIO()), # Filter spurious llama.cpp output. + warnings.catch_warnings(), # Filter huggingface_hub warning about HF_TOKEN. + ): warnings.filterwarnings("ignore", category=UserWarning) llm = Llama.from_pretrained( repo_id=repo_id,