Skip to content

Commit

Permalink
Version 0.3.11 (#130)
Browse files Browse the repository at this point in the history
## LLMstudio Version 0.3.11

### What was done in this version:

- Updated the method input_to_string in provider.py to ensure
compatibility with vision models -- [PR
126](#126)
- Added events to the startup process of tracking, ui and engine. This
removes the race conditions we were experiencing repeatedly, also
removes the need to run start_server() as early as possible -- [PR
129](#129).
- Improved exception handling for invalid Azure endpoints -- [PR
129](#129).


### How it was tested:

- Ran projects with LLMStudio server dependencies

### Additional notes:

- Any breaking changes? 
    - No
- Any new dependencies added?
    - No
- Any performance improvements?
- Yes. Servers will be launched synchronously preventing parent PIDs to
call LLMStudio before being up.
  • Loading branch information
claudiolemos authored Sep 9, 2024
2 parents 21cc916 + 41f7c11 commit 3567855
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 129 deletions.
10 changes: 7 additions & 3 deletions llmstudio/engine/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
from pathlib import Path
from threading import Event
from typing import Any, Dict, List, Optional, Union

import uvicorn
Expand Down Expand Up @@ -78,7 +79,9 @@ def _merge_configs(config1, config2):
raise RuntimeError(f"Error in configuration data: {e}")


def create_engine_app(config: EngineConfig = _load_engine_config()) -> FastAPI:
def create_engine_app(
started_event: Event, config: EngineConfig = _load_engine_config()
) -> FastAPI:
app = FastAPI(
title=ENGINE_TITLE,
description=ENGINE_DESCRIPTION,
Expand Down Expand Up @@ -162,14 +165,15 @@ async def export(request: Request):

@app.on_event("startup")
async def startup_event():
started_event.set()
print(f"Running LLMstudio Engine on http://{ENGINE_HOST}:{ENGINE_PORT} ")

return app


def run_engine_app():
def run_engine_app(started_event: Event):
try:
engine = create_engine_app()
engine = create_engine_app(started_event)
uvicorn.run(
engine,
host=ENGINE_HOST,
Expand Down
149 changes: 81 additions & 68 deletions llmstudio/engine/providers/azure.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import ast
import asyncio
import json
import os
Expand Down Expand Up @@ -120,13 +121,17 @@ async def generate_client(
**function_args,
**request.parameters.model_dump(),
}

# Perform the asynchronous call
return await asyncio.to_thread(
client.chat.completions.create, **combined_args
)

except openai._exceptions.APIError as e:
except openai._exceptions.APIConnectionError as e:
raise HTTPException(
status_code=404, detail=f"There was an error reaching the endpoint: {e}"
)

except openai._exceptions.APIStatusError as e:
raise HTTPException(status_code=e.status_code, detail=e.response.json())

def prepare_messages(self, request: AzureRequest):
Expand Down Expand Up @@ -174,6 +179,7 @@ async def handle_tool_response(

function_call_buffer = ""
saving = False
normal_call_chunks = []
for chunk in response:
if chunk.choices[0].delta.content is not None:
if (
Expand Down Expand Up @@ -224,7 +230,13 @@ async def handle_tool_response(
yield finish_chunk

else:
yield chunk.model_dump()
normal_call_chunks.append(chunk)
if chunk.choices[0].finish_reason == "stop":
for chunk in normal_call_chunks:
normal_call_chunks.append(chunk)
if chunk.choices[0].finish_reason == "stop":
for chunk in normal_call_chunks:
yield chunk.model_dump()

def create_tool_name_chunk(self, function_name: str, kwargs: dict) -> dict:
return ChatCompletionChunk(
Expand Down Expand Up @@ -433,14 +445,15 @@ def add_tool_instructions(self, tools: list) -> str:
tool_prompt += """
If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix:
§{"type": "function", "name": "FUNCTION_NAME", "parameters": {"PARAMETER_NAME": PARAMETER_VALUE}}
IMPORTANT: IT IS VITAL THAT YOU NEVER ADD A PREFIX OR A SUFFIX TO THE FUNCTION CALL.
Here is an example of the output I desiere when performing function call:
§{"type": "function", "name": "python_repl_ast", "parameters": {"query": "print(df.shape)"}}
NOTE: There is no prefix before the symbol '§' and nothing comes after the call is done.
Reminder:
- Function calls MUST follow the specified format.
- Only call one function at a time.
- NEVER call more than one function at a time.
- Required parameters MUST be specified.
- Put the entire function call reply on one line.
- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls.
Expand All @@ -456,10 +469,10 @@ def add_function_instructions(self, functions: list) -> str:

for func in functions:
function_prompt += (
f"Use the function '{func['name']}' to '{func['description']}':\n"
f"Use the function '{func['name']}' to: '{func['description']}'\n"
)
params_info = json.dumps(func["parameters"], indent=4)
function_prompt += f"Parameters format:\n{params_info}\n\n"
function_prompt += f"{params_info}\n\n"

function_prompt += """
If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix:
Expand All @@ -477,74 +490,74 @@ def add_function_instructions(self, functions: list) -> str:
- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls.
- If you have already called a function and got the response for the user's question, please reply with the response.
"""

return function_prompt

def add_conversation(self, openai_message: list, llama_message: str) -> str:
conversation_parts = []
for message in openai_message:
if message["role"] == "system":
continue
elif "tool_calls" in message:
for tool_call in message["tool_calls"]:
function_name = tool_call["function"]["name"]
arguments = tool_call["function"]["arguments"]
conversation_parts.append(
f"""
<|start_header_id|>assistant<|end_header_id|>
<function={function_name}>{arguments}</function>
<|eom_id|>
"""
)
elif "tool_call_id" in message:
tool_response = message["content"]
conversation_parts.append(
f"""
<|start_header_id|>ipython<|end_header_id|>
{tool_response}
<|eot_id|>
"""
)
elif "function_call" in message:
function_name = message["function_call"]["name"]
arguments = message["function_call"]["arguments"]
conversation_parts.append(
f"""
<|start_header_id|>assistant<|end_header_id|>
<function={function_name}>{arguments}</function>
<|eom_id|>
"""
)
elif (
message["role"] in ["assistant", "user"]
and message["content"] is not None
):
conversation_parts.append(
f"""
<|start_header_id|>{message['role']}<|end_header_id|>
{message['content']}
<|eot_id|>
"""
)
elif message["role"] == "function":
function_response = message["content"]
conversation_parts.append(
f"""
<|start_header_id|>ipython<|end_header_id|>
{function_response}
<|eot_id|>
"""
)
elif (
message["role"] in ["assistant", "user"]
and message["content"] is not None
):
conversation_parts.append(
f"""
<|start_header_id|>{message['role']}<|end_header_id|>
{message['content']}
<|eot_id|>
"""
)
elif message["role"] == "user" and isinstance(message["content"], str):
try:
# Attempt to safely evaluate the string to a Python object
content_as_list = ast.literal_eval(message["content"])
if isinstance(content_as_list, list):
# If the content is a list, process each nested message
for nested_message in content_as_list:
conversation_parts.append(
self.format_message(nested_message)
)
else:
# If the content is not a list, append it directly
conversation_parts.append(self.format_message(message))
except (ValueError, SyntaxError):
# If evaluation fails or content is not a list/dict string, append the message directly
conversation_parts.append(self.format_message(message))
else:
# For all other messages, use the existing formatting logic
conversation_parts.append(self.format_message(message))

return llama_message + "".join(conversation_parts)

def format_message(self, message: dict) -> str:
"""Format a single message for the conversation."""
if "tool_calls" in message:
for tool_call in message["tool_calls"]:
function_name = tool_call["function"]["name"]
arguments = tool_call["function"]["arguments"]
return f"""
<|start_header_id|>assistant<|end_header_id|>
<function={function_name}>{arguments}</function>
<|eom_id|>
"""
elif "tool_call_id" in message:
tool_response = message["content"]
return f"""
<|start_header_id|>ipython<|end_header_id|>
{tool_response}
<|eot_id|>
"""
elif "function_call" in message:
function_name = message["function_call"]["name"]
arguments = message["function_call"]["arguments"]
return f"""
<|start_header_id|>assistant<|end_header_id|>
<function={function_name}>{arguments}</function>
<|eom_id|>
"""
elif (
message["role"] in ["assistant", "user"] and message["content"] is not None
):
return f"""
<|start_header_id|>{message['role']}<|end_header_id|>
{message['content']}
<|eot_id|>
"""
elif message["role"] == "function":
function_response = message["content"]
return f"""
<|start_header_id|>ipython<|end_header_id|>
{function_response}
<|eot_id|>
"""
return ""
1 change: 0 additions & 1 deletion llmstudio/engine/providers/ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ async def parse_response(
if "error" in chunk:
raise HTTPException(status_code=500, detail=chunk["error"])
if chunk.get("done"):
print("done")
yield ChatCompletionChunk(
id=str(uuid.uuid4()),
choices=[
Expand Down
65 changes: 16 additions & 49 deletions llmstudio/engine/providers/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,28 +268,6 @@ def join_chunks(self, chunks, request):
):
function_call_arguments += chunk.get("arguments")

chunk = ChatCompletion(
id=chunks[-1].get("id"),
created=chunks[-1].get("created"),
model=chunks[-1].get("model"),
object="chat.completion",
choices=[
Choice(
finish_reason="function_call",
index=0,
logprobs=None,
message=ChatCompletionMessage(
content=None,
role="assistant",
tool_calls=None,
function_call=FunctionCall(
arguments=function_call_arguments,
name=function_call_name,
),
),
)
],
)
return (
ChatCompletion(
id=chunks[-1].get("id"),
Expand Down Expand Up @@ -332,26 +310,6 @@ def join_chunks(self, chunks, request):
)
)

chunk = ChatCompletion(
id=chunks[-1].get("id"),
created=chunks[-1].get("created"),
model=chunks[-1].get("model"),
object="chat.completion",
choices=[
Choice(
finish_reason="stop",
index=0,
logprobs=None,
message=ChatCompletionMessage(
content=stop_content,
role="assistant",
function_call=None,
tool_calls=None,
),
)
],
)

return (
ChatCompletion(
id=chunks[-1].get("id"),
Expand Down Expand Up @@ -428,13 +386,22 @@ def input_to_string(self, input):
if isinstance(input, str):
return input
else:
return "".join(
[
message.get("content", "")
for message in input
if message.get("content") is not None
]
)
result = []
for message in input:
if message.get("content") is not None:
if isinstance(message["content"], str):
result.append(message["content"])
elif (
isinstance(message["content"], list)
and message.get("role") == "user"
):
for item in message["content"]:
if item.get("type") == "text":
result.append(item.get("text", ""))
elif item.get("type") == "image_url":
url = item.get("image_url", {}).get("url", "")
result.append(url)
return "".join(result)

def output_to_string(self, output):
if output.choices[0].finish_reason == "stop":
Expand Down
6 changes: 4 additions & 2 deletions llmstudio/server.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import threading
from threading import Event

import requests

Expand Down Expand Up @@ -29,8 +30,10 @@ def is_server_running(host, port, path="/health"):

def start_server_component(host, port, run_func, server_name):
if not is_server_running(host, port):
thread = threading.Thread(target=run_func, daemon=True)
started_event = Event()
thread = threading.Thread(target=run_func, daemon=True, args=(started_event,))
thread.start()
started_event.wait() # wait for startup, this assumes the event is set somewhere
return thread
else:
print(f"{server_name} server already running on {host}:{port}")
Expand All @@ -53,7 +56,6 @@ def setup_servers(engine, tracking, ui):
TRACKING_HOST, TRACKING_PORT, run_tracking_app, "Tracking"
)

ui_thread = None
if ui:
ui_thread = start_server_component(UI_HOST, UI_PORT, run_ui_app, "UI")

Expand Down
Loading

0 comments on commit 3567855

Please sign in to comment.