diff --git a/bpm_ai_inference/llm/llama_cpp/_constants.py b/bpm_ai_inference/llm/llama_cpp/_constants.py index 7c1e4c3..a039d27 100644 --- a/bpm_ai_inference/llm/llama_cpp/_constants.py +++ b/bpm_ai_inference/llm/llama_cpp/_constants.py @@ -1,5 +1,5 @@ -DEFAULT_MODEL = "QuantFactory/dolphin-2.9-llama3-8b-GGUF" +DEFAULT_MODEL = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" DEFAULT_QUANT_LARGE = "*Q8_0.gguf" DEFAULT_QUANT_BALANCED = "*Q4_K_M.gguf" DEFAULT_QUANT_SMALL = "*Q2_K.gguf" diff --git a/bpm_ai_inference/llm/llama_cpp/llama_chat.py b/bpm_ai_inference/llm/llama_cpp/llama_chat.py index aac285c..148ab0a 100644 --- a/bpm_ai_inference/llm/llama_cpp/llama_chat.py +++ b/bpm_ai_inference/llm/llama_cpp/llama_chat.py @@ -65,6 +65,7 @@ def __init__( verbose=False ) else: + logger.info(f"Loading model file {filename} from {model}...") self.llm = Llama.from_pretrained( repo_id=model, filename=filename, diff --git a/bpm_ai_inference/llm/llama_cpp/util.py b/bpm_ai_inference/llm/llama_cpp/util.py index fe38634..dcb6304 100644 --- a/bpm_ai_inference/llm/llama_cpp/util.py +++ b/bpm_ai_inference/llm/llama_cpp/util.py @@ -1,6 +1,7 @@ import json import logging +from bpm_ai_core.llm.common.blob import Blob from bpm_ai_core.llm.common.message import ChatMessage, AssistantMessage, ToolResultMessage logger = logging.getLogger(__name__) @@ -18,6 +19,20 @@ async def message_to_llama_dict(message: ChatMessage) -> dict: elif isinstance(message, ToolResultMessage): tool_response_content = f"{message.content}" content = '\n{"name": "' + message.name + '", "content": ' + tool_response_content + '}\n' + elif isinstance(message.content, list): + content = "" + for e in message.content: + if isinstance(e, str): + content += e + elif isinstance(e, Blob) and (e.is_text()): + text = (await e.as_bytes()).decode("utf-8") + filename = (" name='" + e.path + "'") if e.path else '' + text = f"\n\n{text}\n\n" + content += text + else: + raise ValueError( + "Elements in ChatMessage.content must be str or Blob (text)" + ) else: content = message.content diff --git a/poetry.lock b/poetry.lock index 601a0d2..c8fa381 100644 --- a/poetry.lock +++ b/poetry.lock @@ -332,13 +332,13 @@ tqdm = "*" [[package]] name = "bpm-ai-core" -version = "2.6.0" +version = "2.6.1" description = "Core AI abstractions and helpers." optional = false python-versions = "<4.0,>=3.11" files = [ - {file = "bpm_ai_core-2.6.0-py3-none-any.whl", hash = "sha256:bb1753296b637231f9fe295ba32c9da6374e60da09ea3323c628a9e382749f84"}, - {file = "bpm_ai_core-2.6.0.tar.gz", hash = "sha256:6a88de6212a7d67fd6af4e73983eac7dd008fff4a8509f0dff3ad0029575b99f"}, + {file = "bpm_ai_core-2.6.1-py3-none-any.whl", hash = "sha256:38e3bf71a93f3fa714b5d70692ebfd8b8fa2d8cd0e50ddce1e01f33a823d0be4"}, + {file = "bpm_ai_core-2.6.1.tar.gz", hash = "sha256:542f4c489e87894071e8df8ccaff60cbc1b6669d8aefcea7d5a3e49ac4442cce"}, ] [package.dependencies] @@ -4377,4 +4377,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "ca370e8760dbcc580dc6f122dc7c839a3095522970216401c4a40c14dbc367ed" +content-hash = "dfd3d34b8de4ac1535f96ba98a58fb35b8fe203c3ad55aea9125b959290ce69e" diff --git a/pyproject.toml b/pyproject.toml index cf35b28..77f83c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bpm-ai-inference" -version = "0.2.9" +version = "0.3.1" description = "Inference and server for local AI implementations of bpm-ai-core abstractions." authors = ["Bennet Krause "] repository = "https://github.com/holunda-io/bpm-ai-inference" @@ -10,7 +10,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.11,<3.12" -bpm-ai-core = "^2.6.0" +bpm-ai-core = "^2.6.1" langfuse = "^2.7.6" faster-whisper = "^0.10.0" lingua-language-detector = "^2.0.2"