From 864aa97b8c6dc23d552799a0d8b6182282d6e967 Mon Sep 17 00:00:00 2001 From: diogoazevedo15 Date: Fri, 30 Aug 2024 14:35:48 +0100 Subject: [PATCH 01/11] Update azure.py llama function call parsing 1. Update the llama parsing for Llama calls with functions, where the functions are not used to produce the response. 2. Remove useless chunk code from provider.py --- llmstudio/engine/providers/azure.py | 137 +++++++++++++------------ llmstudio/engine/providers/provider.py | 22 ---- 2 files changed, 70 insertions(+), 89 deletions(-) diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 1bb61516..418553e8 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -14,6 +14,7 @@ Union, ) +import ast # Add this import to safely evaluate string representations of lists/dicts import openai from fastapi import HTTPException from openai import AzureOpenAI, OpenAI @@ -174,7 +175,9 @@ async def handle_tool_response( function_call_buffer = "" saving = False + normal_call_chunks = [] for chunk in response: + chunk if chunk.choices[0].delta.content is not None: if ( "§" in chunk.choices[0].delta.content @@ -224,8 +227,11 @@ async def handle_tool_response( yield finish_chunk else: - yield chunk.model_dump() - + normal_call_chunks.append(chunk) + if chunk.choices[0].finish_reason == "stop": + for chunk in normal_call_chunks: + yield chunk.model_dump() + def create_tool_name_chunk(self, function_name: str, kwargs: dict) -> dict: return ChatCompletionChunk( id=str(uuid.uuid4()), @@ -433,14 +439,15 @@ def add_tool_instructions(self, tools: list) -> str: tool_prompt += """ If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix: §{"type": "function", "name": "FUNCTION_NAME", "parameters": {"PARAMETER_NAME": PARAMETER_VALUE}} +IMPORTANT: IT IS VITAL THAT YOU NEVER ADD A PREFIX OR A SUFFIX TO THE FUNCTION CALL. Here is an example of the output I desiere when performing function call: §{"type": "function", "name": "python_repl_ast", "parameters": {"query": "print(df.shape)"}} +NOTE: There is no prefix before the symbol '§' and nothing comes after the call is done. Reminder: - Function calls MUST follow the specified format. - Only call one function at a time. - - NEVER call more than one function at a time. - Required parameters MUST be specified. - Put the entire function call reply on one line. - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls. @@ -456,10 +463,10 @@ def add_function_instructions(self, functions: list) -> str: for func in functions: function_prompt += ( - f"Use the function '{func['name']}' to '{func['description']}':\n" + f"Use the function '{func['name']}' to: '{func['description']}'\n" ) params_info = json.dumps(func["parameters"], indent=4) - function_prompt += f"Parameters format:\n{params_info}\n\n" + function_prompt += f"{params_info}\n\n" function_prompt += """ If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix: @@ -477,7 +484,6 @@ def add_function_instructions(self, functions: list) -> str: - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls. - If you have already called a function and got the response for the user's question, please reply with the response. """ - return function_prompt def add_conversation(self, openai_message: list, llama_message: str) -> str: @@ -485,66 +491,63 @@ def add_conversation(self, openai_message: list, llama_message: str) -> str: for message in openai_message: if message["role"] == "system": continue - elif "tool_calls" in message: - for tool_call in message["tool_calls"]: - function_name = tool_call["function"]["name"] - arguments = tool_call["function"]["arguments"] - conversation_parts.append( - f""" - <|start_header_id|>assistant<|end_header_id|> - {arguments} - <|eom_id|> - """ - ) - elif "tool_call_id" in message: - tool_response = message["content"] - conversation_parts.append( - f""" - <|start_header_id|>ipython<|end_header_id|> - {tool_response} - <|eot_id|> - """ - ) - elif "function_call" in message: - function_name = message["function_call"]["name"] - arguments = message["function_call"]["arguments"] - conversation_parts.append( - f""" - <|start_header_id|>assistant<|end_header_id|> - {arguments} - <|eom_id|> - """ - ) - elif ( - message["role"] in ["assistant", "user"] - and message["content"] is not None - ): - conversation_parts.append( - f""" - <|start_header_id|>{message['role']}<|end_header_id|> - {message['content']} - <|eot_id|> - """ - ) - elif message["role"] == "function": - function_response = message["content"] - conversation_parts.append( - f""" - <|start_header_id|>ipython<|end_header_id|> - {function_response} - <|eot_id|> - """ - ) - elif ( - message["role"] in ["assistant", "user"] - and message["content"] is not None - ): - conversation_parts.append( - f""" - <|start_header_id|>{message['role']}<|end_header_id|> - {message['content']} - <|eot_id|> - """ - ) + elif message["role"] == "user" and isinstance(message["content"], str): + try: + # Attempt to safely evaluate the string to a Python object + content_as_list = ast.literal_eval(message["content"]) + if isinstance(content_as_list, list): + # If the content is a list, process each nested message + for nested_message in content_as_list: + conversation_parts.append(self.format_message(nested_message)) + else: + # If the content is not a list, append it directly + conversation_parts.append(self.format_message(message)) + except (ValueError, SyntaxError): + # If evaluation fails or content is not a list/dict string, append the message directly + conversation_parts.append(self.format_message(message)) + else: + # For all other messages, use the existing formatting logic + conversation_parts.append(self.format_message(message)) return llama_message + "".join(conversation_parts) + + def format_message(self, message: dict) -> str: + """Format a single message for the conversation.""" + if "tool_calls" in message: + for tool_call in message["tool_calls"]: + function_name = tool_call["function"]["name"] + arguments = tool_call["function"]["arguments"] + return f""" + <|start_header_id|>assistant<|end_header_id|> + {arguments} + <|eom_id|> + """ + elif "tool_call_id" in message: + tool_response = message["content"] + return f""" + <|start_header_id|>ipython<|end_header_id|> + {tool_response} + <|eot_id|> + """ + elif "function_call" in message: + function_name = message["function_call"]["name"] + arguments = message["function_call"]["arguments"] + return f""" + <|start_header_id|>assistant<|end_header_id|> + {arguments} + <|eom_id|> + """ + elif message["role"] in ["assistant", "user"] and message["content"] is not None: + return f""" + <|start_header_id|>{message['role']}<|end_header_id|> + {message['content']} + <|eot_id|> + """ + elif message["role"] == "function": + function_response = message["content"] + return f""" + <|start_header_id|>ipython<|end_header_id|> + {function_response} + <|eot_id|> + """ + return "" \ No newline at end of file diff --git a/llmstudio/engine/providers/provider.py b/llmstudio/engine/providers/provider.py index 6c37bbf9..c4204952 100644 --- a/llmstudio/engine/providers/provider.py +++ b/llmstudio/engine/providers/provider.py @@ -268,28 +268,6 @@ def join_chunks(self, chunks, request): ): function_call_arguments += chunk.get("arguments") - chunk = ChatCompletion( - id=chunks[-1].get("id"), - created=chunks[-1].get("created"), - model=chunks[-1].get("model"), - object="chat.completion", - choices=[ - Choice( - finish_reason="function_call", - index=0, - logprobs=None, - message=ChatCompletionMessage( - content=None, - role="assistant", - tool_calls=None, - function_call=FunctionCall( - arguments=function_call_arguments, - name=function_call_name, - ), - ), - ) - ], - ) return ( ChatCompletion( id=chunks[-1].get("id"), From a98aafe8d3c63a0a261c5f52541f4df5e33c0e61 Mon Sep 17 00:00:00 2001 From: diogoazevedo15 Date: Fri, 30 Aug 2024 15:28:16 +0100 Subject: [PATCH 02/11] Solve Lint issues --- llmstudio/engine/providers/azure.py | 14 +++++++++----- llmstudio/llm/langchain.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 418553e8..0ccfc452 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -1,3 +1,4 @@ +import ast # Add this import to safely evaluate string representations of lists/dicts import asyncio import json import os @@ -14,7 +15,6 @@ Union, ) -import ast # Add this import to safely evaluate string representations of lists/dicts import openai from fastapi import HTTPException from openai import AzureOpenAI, OpenAI @@ -231,7 +231,7 @@ async def handle_tool_response( if chunk.choices[0].finish_reason == "stop": for chunk in normal_call_chunks: yield chunk.model_dump() - + def create_tool_name_chunk(self, function_name: str, kwargs: dict) -> dict: return ChatCompletionChunk( id=str(uuid.uuid4()), @@ -498,7 +498,9 @@ def add_conversation(self, openai_message: list, llama_message: str) -> str: if isinstance(content_as_list, list): # If the content is a list, process each nested message for nested_message in content_as_list: - conversation_parts.append(self.format_message(nested_message)) + conversation_parts.append( + self.format_message(nested_message) + ) else: # If the content is not a list, append it directly conversation_parts.append(self.format_message(message)) @@ -537,7 +539,9 @@ def format_message(self, message: dict) -> str: {arguments} <|eom_id|> """ - elif message["role"] in ["assistant", "user"] and message["content"] is not None: + elif ( + message["role"] in ["assistant", "user"] and message["content"] is not None + ): return f""" <|start_header_id|>{message['role']}<|end_header_id|> {message['content']} @@ -550,4 +554,4 @@ def format_message(self, message: dict) -> str: {function_response} <|eot_id|> """ - return "" \ No newline at end of file + return "" diff --git a/llmstudio/llm/langchain.py b/llmstudio/llm/langchain.py index a65cd922..e25610f3 100644 --- a/llmstudio/llm/langchain.py +++ b/llmstudio/llm/langchain.py @@ -22,7 +22,7 @@ def __init__(self, model_id: str, **kwargs): @property def _llm_type(self): return "LLMstudio" - + # @property # def model_id(self) -> str: # return self.model_id From b368b7772ba19287d290cfe06d1aa99bef91f560 Mon Sep 17 00:00:00 2001 From: diogoazevedo15 Date: Mon, 2 Sep 2024 12:08:11 +0100 Subject: [PATCH 03/11] Update azure.py --- llmstudio/engine/providers/azure.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 0ccfc452..92ffd577 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -177,7 +177,6 @@ async def handle_tool_response( saving = False normal_call_chunks = [] for chunk in response: - chunk if chunk.choices[0].delta.content is not None: if ( "§" in chunk.choices[0].delta.content From c173fa6a0ce1077a46a2c2b6f9bac0e6c856ae8c Mon Sep 17 00:00:00 2001 From: diogoazevedo15 <68329635+diogoazevedo15@users.noreply.github.com> Date: Mon, 2 Sep 2024 14:36:36 +0100 Subject: [PATCH 04/11] [update] azure-llama-function-call-parsing (#124) * Update azure.py llama function call parsing 1. Update the llama parsing for Llama calls with functions, where the functions are not used to produce the response. 2. Remove useless chunk code from provider.py * Solve Lint issues * Update azure.py --- llmstudio/engine/providers/azure.py | 138 +++++++++++++------------ llmstudio/engine/providers/provider.py | 22 ---- 2 files changed, 72 insertions(+), 88 deletions(-) diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 1bb61516..92ffd577 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -1,3 +1,4 @@ +import ast # Add this import to safely evaluate string representations of lists/dicts import asyncio import json import os @@ -174,6 +175,7 @@ async def handle_tool_response( function_call_buffer = "" saving = False + normal_call_chunks = [] for chunk in response: if chunk.choices[0].delta.content is not None: if ( @@ -224,7 +226,10 @@ async def handle_tool_response( yield finish_chunk else: - yield chunk.model_dump() + normal_call_chunks.append(chunk) + if chunk.choices[0].finish_reason == "stop": + for chunk in normal_call_chunks: + yield chunk.model_dump() def create_tool_name_chunk(self, function_name: str, kwargs: dict) -> dict: return ChatCompletionChunk( @@ -433,14 +438,15 @@ def add_tool_instructions(self, tools: list) -> str: tool_prompt += """ If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix: §{"type": "function", "name": "FUNCTION_NAME", "parameters": {"PARAMETER_NAME": PARAMETER_VALUE}} +IMPORTANT: IT IS VITAL THAT YOU NEVER ADD A PREFIX OR A SUFFIX TO THE FUNCTION CALL. Here is an example of the output I desiere when performing function call: §{"type": "function", "name": "python_repl_ast", "parameters": {"query": "print(df.shape)"}} +NOTE: There is no prefix before the symbol '§' and nothing comes after the call is done. Reminder: - Function calls MUST follow the specified format. - Only call one function at a time. - - NEVER call more than one function at a time. - Required parameters MUST be specified. - Put the entire function call reply on one line. - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls. @@ -456,10 +462,10 @@ def add_function_instructions(self, functions: list) -> str: for func in functions: function_prompt += ( - f"Use the function '{func['name']}' to '{func['description']}':\n" + f"Use the function '{func['name']}' to: '{func['description']}'\n" ) params_info = json.dumps(func["parameters"], indent=4) - function_prompt += f"Parameters format:\n{params_info}\n\n" + function_prompt += f"{params_info}\n\n" function_prompt += """ If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix: @@ -477,7 +483,6 @@ def add_function_instructions(self, functions: list) -> str: - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls. - If you have already called a function and got the response for the user's question, please reply with the response. """ - return function_prompt def add_conversation(self, openai_message: list, llama_message: str) -> str: @@ -485,66 +490,67 @@ def add_conversation(self, openai_message: list, llama_message: str) -> str: for message in openai_message: if message["role"] == "system": continue - elif "tool_calls" in message: - for tool_call in message["tool_calls"]: - function_name = tool_call["function"]["name"] - arguments = tool_call["function"]["arguments"] - conversation_parts.append( - f""" - <|start_header_id|>assistant<|end_header_id|> - {arguments} - <|eom_id|> - """ - ) - elif "tool_call_id" in message: - tool_response = message["content"] - conversation_parts.append( - f""" - <|start_header_id|>ipython<|end_header_id|> - {tool_response} - <|eot_id|> - """ - ) - elif "function_call" in message: - function_name = message["function_call"]["name"] - arguments = message["function_call"]["arguments"] - conversation_parts.append( - f""" - <|start_header_id|>assistant<|end_header_id|> - {arguments} - <|eom_id|> - """ - ) - elif ( - message["role"] in ["assistant", "user"] - and message["content"] is not None - ): - conversation_parts.append( - f""" - <|start_header_id|>{message['role']}<|end_header_id|> - {message['content']} - <|eot_id|> - """ - ) - elif message["role"] == "function": - function_response = message["content"] - conversation_parts.append( - f""" - <|start_header_id|>ipython<|end_header_id|> - {function_response} - <|eot_id|> - """ - ) - elif ( - message["role"] in ["assistant", "user"] - and message["content"] is not None - ): - conversation_parts.append( - f""" - <|start_header_id|>{message['role']}<|end_header_id|> - {message['content']} - <|eot_id|> - """ - ) + elif message["role"] == "user" and isinstance(message["content"], str): + try: + # Attempt to safely evaluate the string to a Python object + content_as_list = ast.literal_eval(message["content"]) + if isinstance(content_as_list, list): + # If the content is a list, process each nested message + for nested_message in content_as_list: + conversation_parts.append( + self.format_message(nested_message) + ) + else: + # If the content is not a list, append it directly + conversation_parts.append(self.format_message(message)) + except (ValueError, SyntaxError): + # If evaluation fails or content is not a list/dict string, append the message directly + conversation_parts.append(self.format_message(message)) + else: + # For all other messages, use the existing formatting logic + conversation_parts.append(self.format_message(message)) return llama_message + "".join(conversation_parts) + + def format_message(self, message: dict) -> str: + """Format a single message for the conversation.""" + if "tool_calls" in message: + for tool_call in message["tool_calls"]: + function_name = tool_call["function"]["name"] + arguments = tool_call["function"]["arguments"] + return f""" + <|start_header_id|>assistant<|end_header_id|> + {arguments} + <|eom_id|> + """ + elif "tool_call_id" in message: + tool_response = message["content"] + return f""" + <|start_header_id|>ipython<|end_header_id|> + {tool_response} + <|eot_id|> + """ + elif "function_call" in message: + function_name = message["function_call"]["name"] + arguments = message["function_call"]["arguments"] + return f""" + <|start_header_id|>assistant<|end_header_id|> + {arguments} + <|eom_id|> + """ + elif ( + message["role"] in ["assistant", "user"] and message["content"] is not None + ): + return f""" + <|start_header_id|>{message['role']}<|end_header_id|> + {message['content']} + <|eot_id|> + """ + elif message["role"] == "function": + function_response = message["content"] + return f""" + <|start_header_id|>ipython<|end_header_id|> + {function_response} + <|eot_id|> + """ + return "" diff --git a/llmstudio/engine/providers/provider.py b/llmstudio/engine/providers/provider.py index 6c37bbf9..c4204952 100644 --- a/llmstudio/engine/providers/provider.py +++ b/llmstudio/engine/providers/provider.py @@ -268,28 +268,6 @@ def join_chunks(self, chunks, request): ): function_call_arguments += chunk.get("arguments") - chunk = ChatCompletion( - id=chunks[-1].get("id"), - created=chunks[-1].get("created"), - model=chunks[-1].get("model"), - object="chat.completion", - choices=[ - Choice( - finish_reason="function_call", - index=0, - logprobs=None, - message=ChatCompletionMessage( - content=None, - role="assistant", - tool_calls=None, - function_call=FunctionCall( - arguments=function_call_arguments, - name=function_call_name, - ), - ), - ) - ], - ) return ( ChatCompletion( id=chunks[-1].get("id"), From 66bc3ae3e7cd98acc85718b70293925811019ae9 Mon Sep 17 00:00:00 2001 From: diogoazevedo15 Date: Mon, 2 Sep 2024 18:06:24 +0100 Subject: [PATCH 05/11] Update input_to_string Updated the method input_to_string to ensure compatibility with vision models. --- llmstudio/engine/providers/azure.py | 2 +- llmstudio/engine/providers/provider.py | 44 ++++++++++---------------- 2 files changed, 17 insertions(+), 29 deletions(-) diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 92ffd577..b4f4d349 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -1,4 +1,4 @@ -import ast # Add this import to safely evaluate string representations of lists/dicts +import ast import asyncio import json import os diff --git a/llmstudio/engine/providers/provider.py b/llmstudio/engine/providers/provider.py index c4204952..4c8d0c91 100644 --- a/llmstudio/engine/providers/provider.py +++ b/llmstudio/engine/providers/provider.py @@ -79,13 +79,14 @@ async def chat( if request.is_stream: return StreamingResponse(response_handler) else: - return JSONResponse(content=await response_handler.__anext__()) + return JSONResponse(content= await response_handler.__anext__()) except HTTPException as e: if e.status_code == 429: continue # Retry on rate limit error else: raise e # Raise other HTTP exceptions except Exception as e: + print(e) raise HTTPException( status_code=500, detail=str(e) ) # Raise other exceptions as HTTP 500 @@ -310,26 +311,6 @@ def join_chunks(self, chunks, request): ) ) - chunk = ChatCompletion( - id=chunks[-1].get("id"), - created=chunks[-1].get("created"), - model=chunks[-1].get("model"), - object="chat.completion", - choices=[ - Choice( - finish_reason="stop", - index=0, - logprobs=None, - message=ChatCompletionMessage( - content=stop_content, - role="assistant", - function_call=None, - tool_calls=None, - ), - ) - ], - ) - return ( ChatCompletion( id=chunks[-1].get("id"), @@ -406,13 +387,20 @@ def input_to_string(self, input): if isinstance(input, str): return input else: - return "".join( - [ - message.get("content", "") - for message in input - if message.get("content") is not None - ] - ) + result = [] + for message in input: + if message.get("content") is not None: + if isinstance(message["content"], str): + result.append(message["content"]) + elif isinstance(message["content"], list) and message.get("role") == "user": + for item in message["content"]: + if item.get("type") == "text": + result.append(item.get("text", "")) + elif item.get("type") == "image_url": + url = item.get("image_url", {}).get("url", "") + result.append(url) + return "".join(result) + def output_to_string(self, output): if output.choices[0].finish_reason == "stop": From ce74c6b920ec69d8051d324e6d37a07de86f23fe Mon Sep 17 00:00:00 2001 From: diogoazevedo15 Date: Mon, 2 Sep 2024 18:11:14 +0100 Subject: [PATCH 06/11] Fix lint issues --- llmstudio/engine/providers/provider.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llmstudio/engine/providers/provider.py b/llmstudio/engine/providers/provider.py index 4c8d0c91..354906c5 100644 --- a/llmstudio/engine/providers/provider.py +++ b/llmstudio/engine/providers/provider.py @@ -79,7 +79,7 @@ async def chat( if request.is_stream: return StreamingResponse(response_handler) else: - return JSONResponse(content= await response_handler.__anext__()) + return JSONResponse(content=await response_handler.__anext__()) except HTTPException as e: if e.status_code == 429: continue # Retry on rate limit error @@ -392,7 +392,10 @@ def input_to_string(self, input): if message.get("content") is not None: if isinstance(message["content"], str): result.append(message["content"]) - elif isinstance(message["content"], list) and message.get("role") == "user": + elif ( + isinstance(message["content"], list) + and message.get("role") == "user" + ): for item in message["content"]: if item.get("type") == "text": result.append(item.get("text", "")) @@ -401,7 +404,6 @@ def input_to_string(self, input): result.append(url) return "".join(result) - def output_to_string(self, output): if output.choices[0].finish_reason == "stop": return output.choices[0].message.content From f2bce13ec5bbb6345c57ead0bee3b271ea534831 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 3 Sep 2024 13:33:13 +0000 Subject: [PATCH 07/11] [fix] bump prerelease version in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9932f327..70b4f917 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "llmstudio" -version = "0.3.10" +version = "0.3.11a0" description = "Prompt Perfection at Your Fingertips" authors = ["Cláudio Lemos "] license = "MIT" From e110a462bf8f99620782fe304609f8455c02d8bb Mon Sep 17 00:00:00 2001 From: gabrielrfg Date: Mon, 9 Sep 2024 11:55:30 +0100 Subject: [PATCH 08/11] [fix] Added event tracking for server startup (#129) * Added event tracking for server startup * pre-commit changes * Added missing status_code for openai APIConnectionError * Reverted start_server back to class declaration instead of initialization * Added exception to error message --- llmstudio/engine/__init__.py | 10 +++++++--- llmstudio/engine/providers/azure.py | 8 ++++++-- llmstudio/server.py | 6 ++++-- llmstudio/tracking/__init__.py | 9 ++++++--- llmstudio/ui/__init__.py | 5 +++-- 5 files changed, 26 insertions(+), 12 deletions(-) diff --git a/llmstudio/engine/__init__.py b/llmstudio/engine/__init__.py index 3f503ae3..d541f775 100644 --- a/llmstudio/engine/__init__.py +++ b/llmstudio/engine/__init__.py @@ -1,6 +1,7 @@ import json import os from pathlib import Path +from threading import Event from typing import Any, Dict, List, Optional, Union import uvicorn @@ -78,7 +79,9 @@ def _merge_configs(config1, config2): raise RuntimeError(f"Error in configuration data: {e}") -def create_engine_app(config: EngineConfig = _load_engine_config()) -> FastAPI: +def create_engine_app( + started_event: Event, config: EngineConfig = _load_engine_config() +) -> FastAPI: app = FastAPI( title=ENGINE_TITLE, description=ENGINE_DESCRIPTION, @@ -162,14 +165,15 @@ async def export(request: Request): @app.on_event("startup") async def startup_event(): + started_event.set() print(f"Running LLMstudio Engine on http://{ENGINE_HOST}:{ENGINE_PORT} ") return app -def run_engine_app(): +def run_engine_app(started_event: Event): try: - engine = create_engine_app() + engine = create_engine_app(started_event) uvicorn.run( engine, host=ENGINE_HOST, diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 362abbb0..79afa2fe 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -121,13 +121,17 @@ async def generate_client( **function_args, **request.parameters.model_dump(), } - # Perform the asynchronous call return await asyncio.to_thread( client.chat.completions.create, **combined_args ) - except openai._exceptions.APIError as e: + except openai._exceptions.APIConnectionError as e: + raise HTTPException( + status_code=404, detail=f"There was an error reaching the endpoint: {e}" + ) + + except openai._exceptions.APIStatusError as e: raise HTTPException(status_code=e.status_code, detail=e.response.json()) def prepare_messages(self, request: AzureRequest): diff --git a/llmstudio/server.py b/llmstudio/server.py index e9643d73..c69a5b96 100644 --- a/llmstudio/server.py +++ b/llmstudio/server.py @@ -1,4 +1,5 @@ import threading +from threading import Event import requests @@ -29,8 +30,10 @@ def is_server_running(host, port, path="/health"): def start_server_component(host, port, run_func, server_name): if not is_server_running(host, port): - thread = threading.Thread(target=run_func, daemon=True) + started_event = Event() + thread = threading.Thread(target=run_func, daemon=True, args=(started_event,)) thread.start() + started_event.wait() # wait for startup, this assumes the event is set somewhere return thread else: print(f"{server_name} server already running on {host}:{port}") @@ -53,7 +56,6 @@ def setup_servers(engine, tracking, ui): TRACKING_HOST, TRACKING_PORT, run_tracking_app, "Tracking" ) - ui_thread = None if ui: ui_thread = start_server_component(UI_HOST, UI_PORT, run_ui_app, "UI") diff --git a/llmstudio/tracking/__init__.py b/llmstudio/tracking/__init__.py index 31a7fbe8..d32bc768 100644 --- a/llmstudio/tracking/__init__.py +++ b/llmstudio/tracking/__init__.py @@ -1,3 +1,5 @@ +from threading import Event + import uvicorn from fastapi import APIRouter, FastAPI from fastapi.middleware.cors import CORSMiddleware @@ -15,7 +17,7 @@ ## Tracking -def create_tracking_app() -> FastAPI: +def create_tracking_app(started_event: Event) -> FastAPI: app = FastAPI( title=TRACKING_TITLE, description=TRACKING_DESCRIPTION, @@ -43,14 +45,15 @@ def health_check(): @app.on_event("startup") async def startup_event(): + started_event.set() print(f"Running LLMstudio Tracking on http://{TRACKING_HOST}:{TRACKING_PORT} ") return app -def run_tracking_app(): +def run_tracking_app(started_event: Event): try: - tracking = create_tracking_app() + tracking = create_tracking_app(started_event) uvicorn.run( tracking, host=TRACKING_HOST, diff --git a/llmstudio/ui/__init__.py b/llmstudio/ui/__init__.py index c2a15c22..1569aa6f 100644 --- a/llmstudio/ui/__init__.py +++ b/llmstudio/ui/__init__.py @@ -2,7 +2,7 @@ import subprocess from pathlib import Path import threading -import webbrowser +from threading import Event from llmstudio.config import UI_PORT @@ -20,6 +20,7 @@ def run_bun_in_thread(): print(f"Error running LLMstudio UI: {e}") -def run_ui_app(): +def run_ui_app(started_event: Event): thread = threading.Thread(target=run_bun_in_thread) thread.start() + started_event.set() #just here for compatibility From 6d667a80f63b4276e0d38ed405a5e231150e7567 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Mon, 9 Sep 2024 11:00:11 +0000 Subject: [PATCH 09/11] [fix] bump prerelease version in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 70b4f917..b4ff0348 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "llmstudio" -version = "0.3.11a0" +version = "0.3.11a1" description = "Prompt Perfection at Your Fingertips" authors = ["Cláudio Lemos "] license = "MIT" From 4ce56fb2ed2b6ee2cabe9beb183edc97a2994b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A1udio=20Lemos?= Date: Mon, 9 Sep 2024 16:21:34 -0500 Subject: [PATCH 10/11] [chore] removed prints --- llmstudio/engine/providers/ollama.py | 1 - llmstudio/engine/providers/provider.py | 1 - 2 files changed, 2 deletions(-) diff --git a/llmstudio/engine/providers/ollama.py b/llmstudio/engine/providers/ollama.py index 3dd1cb6f..192bd849 100644 --- a/llmstudio/engine/providers/ollama.py +++ b/llmstudio/engine/providers/ollama.py @@ -64,7 +64,6 @@ async def parse_response( if "error" in chunk: raise HTTPException(status_code=500, detail=chunk["error"]) if chunk.get("done"): - print("done") yield ChatCompletionChunk( id=str(uuid.uuid4()), choices=[ diff --git a/llmstudio/engine/providers/provider.py b/llmstudio/engine/providers/provider.py index 354906c5..fccd8f34 100644 --- a/llmstudio/engine/providers/provider.py +++ b/llmstudio/engine/providers/provider.py @@ -86,7 +86,6 @@ async def chat( else: raise e # Raise other HTTP exceptions except Exception as e: - print(e) raise HTTPException( status_code=500, detail=str(e) ) # Raise other exceptions as HTTP 500 From 41f7c116bc2b79cf2baf5d2d40ed69b2f468ceb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A1udio=20Lemos?= Date: Mon, 9 Sep 2024 16:23:07 -0500 Subject: [PATCH 11/11] [chore] up version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b4ff0348..943d61e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "llmstudio" -version = "0.3.11a1" +version = "0.3.11" description = "Prompt Perfection at Your Fingertips" authors = ["Cláudio Lemos "] license = "MIT"