diff --git a/bpm_ai_inference/llm/llama_cpp/_constants.py b/bpm_ai_inference/llm/llama_cpp/_constants.py
index 7c1e4c3..a039d27 100644
--- a/bpm_ai_inference/llm/llama_cpp/_constants.py
+++ b/bpm_ai_inference/llm/llama_cpp/_constants.py
@@ -1,5 +1,5 @@
-DEFAULT_MODEL = "QuantFactory/dolphin-2.9-llama3-8b-GGUF"
+DEFAULT_MODEL = "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF"
DEFAULT_QUANT_LARGE = "*Q8_0.gguf"
DEFAULT_QUANT_BALANCED = "*Q4_K_M.gguf"
DEFAULT_QUANT_SMALL = "*Q2_K.gguf"
diff --git a/bpm_ai_inference/llm/llama_cpp/llama_chat.py b/bpm_ai_inference/llm/llama_cpp/llama_chat.py
index aac285c..148ab0a 100644
--- a/bpm_ai_inference/llm/llama_cpp/llama_chat.py
+++ b/bpm_ai_inference/llm/llama_cpp/llama_chat.py
@@ -65,6 +65,7 @@ def __init__(
verbose=False
)
else:
+ logger.info(f"Loading model file {filename} from {model}...")
self.llm = Llama.from_pretrained(
repo_id=model,
filename=filename,
diff --git a/bpm_ai_inference/llm/llama_cpp/util.py b/bpm_ai_inference/llm/llama_cpp/util.py
index fe38634..dcb6304 100644
--- a/bpm_ai_inference/llm/llama_cpp/util.py
+++ b/bpm_ai_inference/llm/llama_cpp/util.py
@@ -1,6 +1,7 @@
import json
import logging
+from bpm_ai_core.llm.common.blob import Blob
from bpm_ai_core.llm.common.message import ChatMessage, AssistantMessage, ToolResultMessage
logger = logging.getLogger(__name__)
@@ -18,6 +19,20 @@ async def message_to_llama_dict(message: ChatMessage) -> dict:
elif isinstance(message, ToolResultMessage):
tool_response_content = f"{message.content}"
content = '\n{"name": "' + message.name + '", "content": ' + tool_response_content + '}\n'
+ elif isinstance(message.content, list):
+ content = ""
+ for e in message.content:
+ if isinstance(e, str):
+ content += e
+ elif isinstance(e, Blob) and (e.is_text()):
+ text = (await e.as_bytes()).decode("utf-8")
+ filename = (" name='" + e.path + "'") if e.path else ''
+ text = f"\n\n{text}\n\n"
+ content += text
+ else:
+ raise ValueError(
+ "Elements in ChatMessage.content must be str or Blob (text)"
+ )
else:
content = message.content
diff --git a/poetry.lock b/poetry.lock
index 601a0d2..c8fa381 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -332,13 +332,13 @@ tqdm = "*"
[[package]]
name = "bpm-ai-core"
-version = "2.6.0"
+version = "2.6.1"
description = "Core AI abstractions and helpers."
optional = false
python-versions = "<4.0,>=3.11"
files = [
- {file = "bpm_ai_core-2.6.0-py3-none-any.whl", hash = "sha256:bb1753296b637231f9fe295ba32c9da6374e60da09ea3323c628a9e382749f84"},
- {file = "bpm_ai_core-2.6.0.tar.gz", hash = "sha256:6a88de6212a7d67fd6af4e73983eac7dd008fff4a8509f0dff3ad0029575b99f"},
+ {file = "bpm_ai_core-2.6.1-py3-none-any.whl", hash = "sha256:38e3bf71a93f3fa714b5d70692ebfd8b8fa2d8cd0e50ddce1e01f33a823d0be4"},
+ {file = "bpm_ai_core-2.6.1.tar.gz", hash = "sha256:542f4c489e87894071e8df8ccaff60cbc1b6669d8aefcea7d5a3e49ac4442cce"},
]
[package.dependencies]
@@ -4377,4 +4377,4 @@ multidict = ">=4.0"
[metadata]
lock-version = "2.0"
python-versions = ">=3.11,<3.12"
-content-hash = "ca370e8760dbcc580dc6f122dc7c839a3095522970216401c4a40c14dbc367ed"
+content-hash = "dfd3d34b8de4ac1535f96ba98a58fb35b8fe203c3ad55aea9125b959290ce69e"
diff --git a/pyproject.toml b/pyproject.toml
index cf35b28..77f83c8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "bpm-ai-inference"
-version = "0.2.9"
+version = "0.3.1"
description = "Inference and server for local AI implementations of bpm-ai-core abstractions."
authors = ["Bennet Krause "]
repository = "https://github.com/holunda-io/bpm-ai-inference"
@@ -10,7 +10,7 @@ readme = "README.md"
[tool.poetry.dependencies]
python = ">=3.11,<3.12"
-bpm-ai-core = "^2.6.0"
+bpm-ai-core = "^2.6.1"
langfuse = "^2.7.6"
faster-whisper = "^0.10.0"
lingua-language-detector = "^2.0.2"