Version 0.3.11 (#130)

## LLMstudio Version 0.3.11 ### What was done in this version: - Updated the method input_to_string in provider.py to ensure compatibility with vision models -- [PR 126](#126) - Added events to the startup process of tracking, ui and engine. This removes the race conditions we were experiencing repeatedly, also removes the need to run start_server() as early as possible -- [PR 129](#129). - Improved exception handling for invalid Azure endpoints -- [PR 129](#129). ### How it was tested: - Ran projects with LLMStudio server dependencies ### Additional notes: - Any breaking changes? - No - Any new dependencies added? - No - Any performance improvements? - Yes. Servers will be launched synchronously preventing parent PIDs to call LLMStudio before being up.
TensorOpsAI · Sep 9, 2024 · 3567855 · 3567855
2 parents 21cc916 + 41f7c11
commit 3567855
Show file tree

Hide file tree

Showing 8 changed files with 118 additions and 129 deletions.
diff --git a/llmstudio/engine/__init__.py b/llmstudio/engine/__init__.py
@@ -1,6 +1,7 @@
 import json
 import os
 from pathlib import Path
+from threading import Event
 from typing import Any, Dict, List, Optional, Union
 
 import uvicorn
@@ -78,7 +79,9 @@ def _merge_configs(config1, config2):
         raise RuntimeError(f"Error in configuration data: {e}")
 
 
-def create_engine_app(config: EngineConfig = _load_engine_config()) -> FastAPI:
+def create_engine_app(
+    started_event: Event, config: EngineConfig = _load_engine_config()
+) -> FastAPI:
     app = FastAPI(
         title=ENGINE_TITLE,
         description=ENGINE_DESCRIPTION,
@@ -162,14 +165,15 @@ async def export(request: Request):
 
     @app.on_event("startup")
     async def startup_event():
+        started_event.set()
         print(f"Running LLMstudio Engine on http://{ENGINE_HOST}:{ENGINE_PORT} ")
 
     return app
 
 
-def run_engine_app():
+def run_engine_app(started_event: Event):
     try:
-        engine = create_engine_app()
+        engine = create_engine_app(started_event)
         uvicorn.run(
             engine,
             host=ENGINE_HOST,

diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py
@@ -1,3 +1,4 @@
+import ast
 import asyncio
 import json
 import os
@@ -120,13 +121,17 @@ async def generate_client(
                 **function_args,
                 **request.parameters.model_dump(),
             }
-
             # Perform the asynchronous call
             return await asyncio.to_thread(
                 client.chat.completions.create, **combined_args
             )
 
-        except openai._exceptions.APIError as e:
+        except openai._exceptions.APIConnectionError as e:
+            raise HTTPException(
+                status_code=404, detail=f"There was an error reaching the endpoint: {e}"
+            )
+
+        except openai._exceptions.APIStatusError as e:
             raise HTTPException(status_code=e.status_code, detail=e.response.json())
 
     def prepare_messages(self, request: AzureRequest):
@@ -174,6 +179,7 @@ async def handle_tool_response(
 
         function_call_buffer = ""
         saving = False
+        normal_call_chunks = []
         for chunk in response:
             if chunk.choices[0].delta.content is not None:
                 if (
@@ -224,7 +230,13 @@ async def handle_tool_response(
                         yield finish_chunk
 
             else:
-                yield chunk.model_dump()
+                normal_call_chunks.append(chunk)
+                if chunk.choices[0].finish_reason == "stop":
+                    for chunk in normal_call_chunks:
+                        normal_call_chunks.append(chunk)
+                if chunk.choices[0].finish_reason == "stop":
+                    for chunk in normal_call_chunks:
+                        yield chunk.model_dump()
 
     def create_tool_name_chunk(self, function_name: str, kwargs: dict) -> dict:
         return ChatCompletionChunk(
@@ -433,14 +445,15 @@ def add_tool_instructions(self, tools: list) -> str:
         tool_prompt += """
 If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix:
 §{"type": "function", "name": "FUNCTION_NAME", "parameters": {"PARAMETER_NAME": PARAMETER_VALUE}}
+IMPORTANT: IT IS VITAL THAT YOU NEVER ADD A PREFIX OR A SUFFIX TO THE FUNCTION CALL.
 
 Here is an example of the output I desiere when performing function call:
 §{"type": "function", "name": "python_repl_ast", "parameters": {"query": "print(df.shape)"}}
+NOTE: There is no prefix before the symbol '§' and nothing comes after the call is done.
 
     Reminder:
     - Function calls MUST follow the specified format.
     - Only call one function at a time.
-    - NEVER call more than one function at a time.
     - Required parameters MUST be specified.
     - Put the entire function call reply on one line.
     - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls.
@@ -456,10 +469,10 @@ def add_function_instructions(self, functions: list) -> str:
 
         for func in functions:
             function_prompt += (
-                f"Use the function '{func['name']}' to '{func['description']}':\n"
+                f"Use the function '{func['name']}' to: '{func['description']}'\n"
             )
             params_info = json.dumps(func["parameters"], indent=4)
-            function_prompt += f"Parameters format:\n{params_info}\n\n"
+            function_prompt += f"{params_info}\n\n"
 
         function_prompt += """
 If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix:
@@ -477,74 +490,74 @@ def add_function_instructions(self, functions: list) -> str:
 - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls.
 - If you have already called a function and got the response for the user's question, please reply with the response.
 """
-
         return function_prompt
 
     def add_conversation(self, openai_message: list, llama_message: str) -> str:
         conversation_parts = []
         for message in openai_message:
             if message["role"] == "system":
                 continue
-            elif "tool_calls" in message:
-                for tool_call in message["tool_calls"]:
-                    function_name = tool_call["function"]["name"]
-                    arguments = tool_call["function"]["arguments"]
-                    conversation_parts.append(
-                        f"""
-            <|start_header_id|>assistant<|end_header_id|>
-            <function={function_name}>{arguments}</function>
-            <|eom_id|>
-            """
-                    )
-            elif "tool_call_id" in message:
-                tool_response = message["content"]
-                conversation_parts.append(
-                    f"""
-          <|start_header_id|>ipython<|end_header_id|>
-          {tool_response}
-          <|eot_id|>
-          """
-                )
-            elif "function_call" in message:
-                function_name = message["function_call"]["name"]
-                arguments = message["function_call"]["arguments"]
-                conversation_parts.append(
-                    f"""
-            <|start_header_id|>assistant<|end_header_id|>
-            <function={function_name}>{arguments}</function>
-            <|eom_id|>
-            """
-                )
-            elif (
-                message["role"] in ["assistant", "user"]
-                and message["content"] is not None
-            ):
-                conversation_parts.append(
-                    f"""
-          <|start_header_id|>{message['role']}<|end_header_id|>
-          {message['content']}
-          <|eot_id|>
-          """
-                )
-            elif message["role"] == "function":
-                function_response = message["content"]
-                conversation_parts.append(
-                    f"""
-          <|start_header_id|>ipython<|end_header_id|>
-          {function_response}
-          <|eot_id|>
-          """
-                )
-            elif (
-                message["role"] in ["assistant", "user"]
-                and message["content"] is not None
-            ):
-                conversation_parts.append(
-                    f"""
-          <|start_header_id|>{message['role']}<|end_header_id|>
-          {message['content']}
-          <|eot_id|>
-          """
-                )
+            elif message["role"] == "user" and isinstance(message["content"], str):
+                try:
+                    # Attempt to safely evaluate the string to a Python object
+                    content_as_list = ast.literal_eval(message["content"])
+                    if isinstance(content_as_list, list):
+                        # If the content is a list, process each nested message
+                        for nested_message in content_as_list:
+                            conversation_parts.append(
+                                self.format_message(nested_message)
+                            )
+                    else:
+                        # If the content is not a list, append it directly
+                        conversation_parts.append(self.format_message(message))
+                except (ValueError, SyntaxError):
+                    # If evaluation fails or content is not a list/dict string, append the message directly
+                    conversation_parts.append(self.format_message(message))
+            else:
+                # For all other messages, use the existing formatting logic
+                conversation_parts.append(self.format_message(message))
 
         return llama_message + "".join(conversation_parts)
+
+    def format_message(self, message: dict) -> str:
+        """Format a single message for the conversation."""
+        if "tool_calls" in message:
+            for tool_call in message["tool_calls"]:
+                function_name = tool_call["function"]["name"]
+                arguments = tool_call["function"]["arguments"]
+                return f"""
+        <|start_header_id|>assistant<|end_header_id|>
+        <function={function_name}>{arguments}</function>
+        <|eom_id|>
+        """
+        elif "tool_call_id" in message:
+            tool_response = message["content"]
+            return f"""
+    <|start_header_id|>ipython<|end_header_id|>
+    {tool_response}
+    <|eot_id|>
+    """
+        elif "function_call" in message:
+            function_name = message["function_call"]["name"]
+            arguments = message["function_call"]["arguments"]
+            return f"""
+        <|start_header_id|>assistant<|end_header_id|>
+        <function={function_name}>{arguments}</function>
+        <|eom_id|>
+        """
+        elif (
+            message["role"] in ["assistant", "user"] and message["content"] is not None
+        ):
+            return f"""
+    <|start_header_id|>{message['role']}<|end_header_id|>
+    {message['content']}
+    <|eot_id|>
+    """
+        elif message["role"] == "function":
+            function_response = message["content"]
+            return f"""
+    <|start_header_id|>ipython<|end_header_id|>
+    {function_response}
+    <|eot_id|>
+    """
+        return ""
diff --git a/llmstudio/engine/providers/ollama.py b/llmstudio/engine/providers/ollama.py
@@ -64,7 +64,6 @@ async def parse_response(
             if "error" in chunk:
                 raise HTTPException(status_code=500, detail=chunk["error"])
             if chunk.get("done"):
-                print("done")
                 yield ChatCompletionChunk(
                     id=str(uuid.uuid4()),
                     choices=[

diff --git a/llmstudio/engine/providers/provider.py b/llmstudio/engine/providers/provider.py
@@ -268,28 +268,6 @@ def join_chunks(self, chunks, request):
                 ):
                     function_call_arguments += chunk.get("arguments")
 
-            chunk = ChatCompletion(
-                id=chunks[-1].get("id"),
-                created=chunks[-1].get("created"),
-                model=chunks[-1].get("model"),
-                object="chat.completion",
-                choices=[
-                    Choice(
-                        finish_reason="function_call",
-                        index=0,
-                        logprobs=None,
-                        message=ChatCompletionMessage(
-                            content=None,
-                            role="assistant",
-                            tool_calls=None,
-                            function_call=FunctionCall(
-                                arguments=function_call_arguments,
-                                name=function_call_name,
-                            ),
-                        ),
-                    )
-                ],
-            )
             return (
                 ChatCompletion(
                     id=chunks[-1].get("id"),
@@ -332,26 +310,6 @@ def join_chunks(self, chunks, request):
                 )
             )
 
-            chunk = ChatCompletion(
-                id=chunks[-1].get("id"),
-                created=chunks[-1].get("created"),
-                model=chunks[-1].get("model"),
-                object="chat.completion",
-                choices=[
-                    Choice(
-                        finish_reason="stop",
-                        index=0,
-                        logprobs=None,
-                        message=ChatCompletionMessage(
-                            content=stop_content,
-                            role="assistant",
-                            function_call=None,
-                            tool_calls=None,
-                        ),
-                    )
-                ],
-            )
-
             return (
                 ChatCompletion(
                     id=chunks[-1].get("id"),
@@ -428,13 +386,22 @@ def input_to_string(self, input):
         if isinstance(input, str):
             return input
         else:
-            return "".join(
-                [
-                    message.get("content", "")
-                    for message in input
-                    if message.get("content") is not None
-                ]
-            )
+            result = []
+            for message in input:
+                if message.get("content") is not None:
+                    if isinstance(message["content"], str):
+                        result.append(message["content"])
+                    elif (
+                        isinstance(message["content"], list)
+                        and message.get("role") == "user"
+                    ):
+                        for item in message["content"]:
+                            if item.get("type") == "text":
+                                result.append(item.get("text", ""))
+                            elif item.get("type") == "image_url":
+                                url = item.get("image_url", {}).get("url", "")
+                                result.append(url)
+            return "".join(result)
 
     def output_to_string(self, output):
         if output.choices[0].finish_reason == "stop":

diff --git a/llmstudio/server.py b/llmstudio/server.py
@@ -1,4 +1,5 @@
 import threading
+from threading import Event
 
 import requests
 
@@ -29,8 +30,10 @@ def is_server_running(host, port, path="/health"):
 
 def start_server_component(host, port, run_func, server_name):
     if not is_server_running(host, port):
-        thread = threading.Thread(target=run_func, daemon=True)
+        started_event = Event()
+        thread = threading.Thread(target=run_func, daemon=True, args=(started_event,))
         thread.start()
+        started_event.wait()  # wait for startup, this assumes the event is set somewhere
         return thread
     else:
         print(f"{server_name} server already running on {host}:{port}")
@@ -53,7 +56,6 @@ def setup_servers(engine, tracking, ui):
             TRACKING_HOST, TRACKING_PORT, run_tracking_app, "Tracking"
         )
 
-    ui_thread = None
     if ui:
         ui_thread = start_server_component(UI_HOST, UI_PORT, run_ui_app, "UI")