diff --git a/.github/workflows/upload-pypi-dev.yml b/.github/workflows/upload-pypi-dev.yml index 8304eb18..ee3d783e 100644 --- a/.github/workflows/upload-pypi-dev.yml +++ b/.github/workflows/upload-pypi-dev.yml @@ -1,4 +1,4 @@ -name: Upload Python package to PyPI as dev pre-release +name: Upload Python package to PyPI as dev release, build and push Docker image to hub. on: workflow_dispatch: @@ -39,3 +39,48 @@ jobs: git add pyproject.toml git commit -m "[fix] bump prerelease version in pyproject.toml" git push + + # Wait for PyPI to update + - name: Wait for PyPI to update + run: | + VERSION=$(poetry version --short) + echo "Checking for llmstudio==$VERSION on PyPI..." + for i in {1..10}; do + if python -m pip install llmstudio==${VERSION} --dry-run >/dev/null 2>&1; then + echo "Package llmstudio==${VERSION} is available on PyPI." + break + else + echo "Package llmstudio==${VERSION} not available yet. Waiting 15 seconds..." + sleep 15 + fi + if [ $i -eq 10 ]; then + echo "Package did not become available in time." + exit 1 + fi + done + + # Docker build and push section + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Log in to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Extract version for tagging Docker image + id: get_version + run: | + echo "VERSION=$(poetry version --short)" >> $GITHUB_ENV + + - name: Build and tag Docker image + run: | + docker build \ + --build-arg LLMSTUDIO_VERSION=${{ env.VERSION }} \ + -t tensoropsai/llmstudio:${{ env.VERSION }} \ + . + + - name: Push Docker image to Docker Hub + run: | + docker push tensoropsai/llmstudio:${{ env.VERSION }} \ No newline at end of file diff --git a/.github/workflows/upload-pypi.yml b/.github/workflows/upload-pypi.yml index ec06e30b..62eedaf0 100644 --- a/.github/workflows/upload-pypi.yml +++ b/.github/workflows/upload-pypi.yml @@ -1,4 +1,4 @@ -name: Upload Python package to PyPI +name: Upload Python package to PyPI and build/push Docker images on: push: @@ -11,23 +11,77 @@ jobs: deploy: runs-on: ubuntu-latest steps: + # Checkout the code - name: Checkout code uses: actions/checkout@v2 + # Set up Python environment - name: Set up Python uses: actions/setup-python@v2 with: python-version: "3.x" + # Install Poetry - name: Install Poetry run: | curl -sSL https://install.python-poetry.org | python3 - + # Configure Poetry with PyPI token - name: Configure Poetry run: | poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }} + # Build and publish package to PyPI - name: Build and publish to PyPI run: | poetry build poetry publish + + # Extract the new version number from pyproject.toml + - name: Extract version for tagging Docker image + run: | + echo "VERSION=$(poetry version --short)" >> $GITHUB_ENV + + # Wait for the package to become available on PyPI + - name: Wait for PyPI to update + run: | + echo "Checking for llmstudio==${{ env.VERSION }} on PyPI..." + for i in {1..10}; do + if python -m pip install llmstudio==${{ env.VERSION }} --dry-run >/dev/null 2>&1; then + echo "Package llmstudio==${{ env.VERSION }} is available on PyPI." + break + else + echo "Package llmstudio==${{ env.VERSION }} not available yet. Waiting 15 seconds..." + sleep 15 + fi + if [ $i -eq 10 ]; then + echo "Package did not become available in time." + exit 1 + fi + done + + # Set up Docker Buildx + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + # Log in to Docker Hub + - name: Log in to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + # Build and tag Docker images with both :latest and :[NEW_VERSION] + - name: Build and tag Docker images + run: | + docker build \ + --build-arg LLMSTUDIO_VERSION=${{ env.VERSION }} \ + -t tensoropsai/llmstudio:latest \ + -t tensoropsai/llmstudio:${{ env.VERSION }} \ + . + + # Push both Docker images to Docker Hub + - name: Push Docker images to Docker Hub + run: | + docker push tensoropsai/llmstudio:${{ env.VERSION }} + docker push tensoropsai/llmstudio:latest diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..c432cf77 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +# docker/Dockerfile + +FROM python:3.11-slim +ENV PYTHONUNBUFFERED=1 + +# Install tools +RUN apt-get clean && apt-get update + +# Install llmstudio +ARG LLMSTUDIO_VERSION +RUN pip install llmstudio==${LLMSTUDIO_VERSION} +RUN pip install psycopg2-binary + +# Expose Ports +EXPOSE 8001 8002 + +CMD ["llmstudio", "server"] diff --git a/docs/how-to/build-a-tool-agent.mdx b/docs/how-to/build-a-tool-agent.mdx new file mode 100644 index 00000000..59279084 --- /dev/null +++ b/docs/how-to/build-a-tool-agent.mdx @@ -0,0 +1,83 @@ +This guide outlines how to build a tool calling agent using Langchain + LLMstudio. + +## 1. Set up your tools +Start by defining the tools your agent is going to have access to. +```python +from langchain.tools import tool + +@tool +def buy_ticket(destination: str): + """Use this to buy a ticket""" + return "Bought ticket number 270924" + +@tool +def get_departure(ticket_number: str): + """Use this to fetch the departure time of a train""" + return "8:25 AM" +``` + +## 2. Setup your .env +Create a `.env` file on the root of your project with the the credentials for the providers you want to use. + + + + ``` + OPENAI_API_KEY="YOUR_API_KEY" + ``` + + + ``` + GOOGLE_API_KEY="YOUR_API_KEY" + ``` + + + ``` + AZURE_BASE_URL="YOUR_MODEL_ENDPOINT" + AZURE_API_KEY="YOUR_API_KEY" + ``` + + + +## 3. Set up your model using LLMstudio +Use LLMstudio to choose the provider and model you want to use. + + + ```python + model = ChatLLMstudio(model_id='openai/gpt-4o') + ``` + + + ```python + model = ChatLLMstudio(model_id='vertexai/gemini-1.5-flash') + ``` + + + ```python + model = ChatLLMstudio(model_id='azure/Meta-Llama-3.1-70B-Instruct') + ``` + + + +## 4. Build the agent +Set up your agent and agent executor using Langchain. + +```python +from langchain import hub +from langchain.agents import AgentExecutor, create_openai_tools_agent + +prompt = hub.pull("hwchase17/openai-tools-agent") +agent = create_openai_tools_agent(model, tools, prompt) +agent_executor = AgentExecutor(agent=agent, tools=tools) + +input = "Can you buy me a ticket to madrid?" + +# Using with chat history +agent_executor.invoke( + { + "input": input, + } +) +``` + + + diff --git a/docs/how-to/deploy-on-gcp/deploy-on-google-cloud-platform.mdx b/docs/how-to/deploy-on-gcp/deploy-on-google-cloud-platform.mdx new file mode 100644 index 00000000..6dbf80f2 --- /dev/null +++ b/docs/how-to/deploy-on-gcp/deploy-on-google-cloud-platform.mdx @@ -0,0 +1,157 @@ +Learn how to deploy LLMstudio as a containerized application on Google Kubernetes Engine and make calls from a local repository. + + +## Prerequisites +To follow this guide you need to have the following set-up: + +- A **project** on google cloud platform. +- **Kubernetes Engine** API enabled on your project. +- **Kubernetes Engine Admin** role for the user performing the guide. + +## Deploy LLMstudio + +This example demonstrates a public deployment. For a private service accessible only within your enterprise infrastructure, deploy it within your own Virtual Private Cloud (VPC). + + + Begin by navigating to the Kubernetes Engine page. + + + Go to **Workloads** and **Create a new Deployment**. + + + + + + Rename your project. We will call the one in this guide **llmstudio-on-gcp**. + + + + + + Choose between **creating a new cluster** or **using an existing cluster**. + For this guide, we will create a new cluster and use the default region. + + + + + + Once done done with the **Deployment configuration**, proceed to **Container details**. + + + In the new container section, select **Existing container image**. + + + Copy the path to LLMstudio's image available on Docker Hub. + ```bash Image Path + tensoropsai/llmstudio:latest + ``` + Set it as the **Image path** to your container. + + + + + + Configure the following mandatory environment variables: +| Environment Variable | Value | +|----------------------------|-----------| +| `LLMSTUDIO_ENGINE_HOST` | 0.0.0.0 | +| `LLMSTUDIO_ENGINE_PORT` | 8001 | +| `LLMSTUDIO_TRACKING_HOST` | 0.0.0.0 | +| `LLMSTUDIO_TRACKING_PORT` | 8002 | + +Additionally, set the `GOOGLE_API_KEY` environment variable to enable calls to Google's Gemini models. +Refer to **SDK/LLM/Providers** for instructions on setting up other providers. + + + + + + + + After configuring your container, proceed to **Expose (Optional)**. + + + Select **Expose deployment as a new service** and leave the first item as is. + + + + + + Add two other items, and expose the ports defined in the **Set Environment Variables** step. + + + + + + + After setting up and exposing the ports, press **Deploy**. + You have successfully deployed **LLMstudio on Google Cloud Platform**! + + + + +## Make a Call +Now let's make a call to our LLMstudio instance on GCP! + + + + + + Setup a simple project with this two files: + 1. `simple-call.ipynb` + 2. `.env` + + + + + + + Go to your newly deployed **Workload**, scroll to the **Exposing services** section, and take note of the Host of your endpoint. + + + + + Create your `.env` file with the following: + + ```env .env + LLMSTUDIO_ENGINE_HOST = "YOUR_HOST" + LLMSTUDIO_ENGINE_PORT = "8001" + LLMSTUDIO_TRACKING_HOST = "YOUR_TRACKING_PORT" + LLMSTUDIO_TRACKING_PORT = "8002" + ``` + + You are done seting up you **.env** file! + + + + Start by importing llmstudio: + ```python 1st cell + from llmstudio import LLM + ``` + + Set up your LLM. We will be using `gemini-1.5-flash` for this guide. + ```python 2nd cell + llm = LLM('vertexai/gemini-1.5-flash') + ``` + + Chat with your model. + ```python 3rd cell + llm.chat('Hello!') + print(response.chat_output) + ``` + + + + + + + You are done calling llmstudio on GCP! + + + + + + + + + diff --git a/docs/how-to/deploy-on-gcp/step-2.png b/docs/how-to/deploy-on-gcp/step-2.png new file mode 100644 index 00000000..4d2bb3ee Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-2.png differ diff --git a/docs/how-to/deploy-on-gcp/step-3.png b/docs/how-to/deploy-on-gcp/step-3.png new file mode 100644 index 00000000..adb4eac3 Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-3.png differ diff --git a/docs/how-to/deploy-on-gcp/step-4.png b/docs/how-to/deploy-on-gcp/step-4.png new file mode 100644 index 00000000..cc18c845 Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-4.png differ diff --git a/docs/how-to/deploy-on-gcp/step-6.png b/docs/how-to/deploy-on-gcp/step-6.png new file mode 100644 index 00000000..6ac8b8b1 Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-6.png differ diff --git a/docs/how-to/deploy-on-gcp/step-7-1.png b/docs/how-to/deploy-on-gcp/step-7-1.png new file mode 100644 index 00000000..e3e523a3 Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-7-1.png differ diff --git a/docs/how-to/deploy-on-gcp/step-7.png b/docs/how-to/deploy-on-gcp/step-7.png new file mode 100644 index 00000000..b8325854 Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-7.png differ diff --git a/docs/how-to/deploy-on-gcp/step-9-1.png b/docs/how-to/deploy-on-gcp/step-9-1.png new file mode 100644 index 00000000..328ca889 Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-9-1.png differ diff --git a/docs/how-to/deploy-on-gcp/step-9-2.png b/docs/how-to/deploy-on-gcp/step-9-2.png new file mode 100644 index 00000000..64d9ddb6 Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-9-2.png differ diff --git a/docs/how-to/deploy-on-gcp/step-9.png b/docs/how-to/deploy-on-gcp/step-9.png new file mode 100644 index 00000000..1953805a Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-9.png differ diff --git a/docs/how-to/deploy-on-gcp/step-env.png b/docs/how-to/deploy-on-gcp/step-env.png new file mode 100644 index 00000000..392a49ad Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-env.png differ diff --git a/docs/how-to/deploy-on-gcp/step-llmstudio-call.png b/docs/how-to/deploy-on-gcp/step-llmstudio-call.png new file mode 100644 index 00000000..e05609fb Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-llmstudio-call.png differ diff --git a/docs/mint.json b/docs/mint.json index 284081e2..0645334f 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -43,6 +43,32 @@ "group": "Get Started", "pages": ["quickstart", "support"] }, + { + "group": "How to", + "pages": [ + "how-to/build-a-tool-agent", + "how-to/deploy-on-gcp/deploy-on-google-cloud-platform" + ] + }, + { + "group": "SDK", + "pages": [ + { + "group": "LLM", + "pages":[{ + "group": "Providers", + "pages":[ + "sdk/llm/providers/openai", + "sdk/llm/providers/vertexai", + "sdk/llm/providers/anthropic", + "sdk/llm/providers/ollama", + "sdk/llm/providers/azure" + ] + }, + "sdk/llm/chat"] + } + ] + }, { "group": "Endpoint Examples", "pages": [ @@ -68,3 +94,4 @@ "baseUrl": "http://localhost:8000" } } + \ No newline at end of file diff --git a/docs/sdk/llm/chat.mdx b/docs/sdk/llm/chat.mdx new file mode 100644 index 00000000..cc8852f4 --- /dev/null +++ b/docs/sdk/llm/chat.mdx @@ -0,0 +1,86 @@ +Make chat calls using your LLM. + +## Parameters +The llm.chat method can have the following parameters. +| Parameter | Type | Description | +|-------------------|--------|-----------------------------------------------------------------------------| +| `input ` | str | The input message to send to the chat model. | +| `is_stream` | bool | The temperature parameter for the model. | +| `**kwargs` | dict | Additional parameters to pass to the chat model. | + +Refer to your provider-specific documentation for additional kwargs you can use. + +## Returns +| Output | Type | Description | +|-------------------|--------|-----------------------------------------------------------------------------| +| `ChatCompletion` | object | A chat completion object in the OpenAI format + metrics computed by LLMstudio.| + + +## Usage +Here's how to use `.chat()` to make calls to your LLM. + + + + Start by importing LLM. + ```python + from llmstudio import LLM + ``` + + + Set up an LLM from your desired provider. + ```python + llm = LLM('openai/gpt-4o') + ``` + + + Create your message. Your message can be a simple `string` or a message in the `OpenAI format`. + + + + ```python + message = "Hello, how are you today?" + ``` + + + ```python + message = [ + {"role": "system", "content": "You are a helpfull assistant."}, + {"role": "user", "content": "Hello, how are you today?"} + ] + ``` + + + + + + + + + Get your response. + ```python + response = llm.chat(message) + ``` + + Vizualize your response. + ```python + print(response) + ``` + + + Get your response. + ```python + response = llm.chat(message, is_stream = True) + ``` + + Vizualize your response. + ```python + for chunk in response: + print(chunk) + ``` + + + + You are done chating with your **LLMstudio LLM**! + + + \ No newline at end of file diff --git a/docs/sdk/llm/providers/anthropic.mdx b/docs/sdk/llm/providers/anthropic.mdx new file mode 100644 index 00000000..eeff4915 --- /dev/null +++ b/docs/sdk/llm/providers/anthropic.mdx @@ -0,0 +1,101 @@ +Interact with your Anthropic models using LLMstudios LLM. + +## Supported models +1. `claude-3-opus-20240229` +2. `claude-3-sonnet-2024022` +3. `claude-3-haiku-20240307` +4. `claude-2.1` +5. `claude-2` +6. `claude-instant-1.2` + +## Parameters +An Anthropic LLM interface can have the following parameters: +| Parameter | Type | Description | +|-------------------|--------|-----------------------------------------------------------------------------| +| `api_key` | str | The API key for authentication. | +| `temperature` | float | The temperature parameter for the model. | +| `top_p` | float | The top-p parameter for the model. | +| `max_tokens` | int | The maximum number of tokens for the model's output. | +| `top_k` | int | The top-k parameter for the model. | + + +## Usage +Here is how you setup an interface to interact with your Anthropic models. + + + + + + Create a `.env` file with you `ANTHROPIC_API_KEY` + + Make sure you call your environment variable ANTHROPIC_API_KEY + ```bash + ANTHROPIC_API_KEY="YOUR-KEY" + ``` + + + In your python code, import LLM from llmstudio. + ```python + from llmstudio import LLM + ``` + + + Create your **llm** instance. + ```python + llm = LLM('anthropic/{model}') + ``` + + + **Optional:** You can add your parameters as follows: + ```python + llm = LLM('anthropic/model', + temperature= ..., + max_tokens= ..., + top_p= ..., + frequency_penalty= ..., + presence_penalty= ...) + ``` + You are done setting up your **Anthropic LLM**! + + + + + + + In your python code, import LLM from llmstudio. + ```python + from llmstudio import LLM + ``` + + + Create your **llm** instance. + ```python + llm = LLM('anthropic/{model}',api_key="YOUR_API_KEY") + ``` + + + **Optional:** You can add your parameters as follows: + ```python + llm = LLM('anthropic/model', + temperature= ..., + max_tokens= ..., + top_p= ..., + frequency_penalty= ..., + presence_penalty= ...) + ``` + You are done setting up your **Anthropic LLM**! + + + + + + +## What's next? + + + Learn how to send messeges and recieve responses next! + + + Learn how to build a tool calling agent using llmstudio. + + diff --git a/docs/sdk/llm/providers/azure.mdx b/docs/sdk/llm/providers/azure.mdx new file mode 100644 index 00000000..161a2491 --- /dev/null +++ b/docs/sdk/llm/providers/azure.mdx @@ -0,0 +1,126 @@ +Interact with your Azure models using LLM. + +## Parameters +An Azure LLM interface can have the following parameters: +| Parameter | Type | Description | +|---------------------|--------|-----------------------------------------------------------------------------| +| `temperature` | float | The temperature parameter for the model. | +| `max_tokens` | int | The maximum number of tokens to generate. | +| `top_p` | float | The top-p parameter for the model. | +| `frequency_penalty` | float | The frequency penalty parameter for the model. | +| `presence_penalty` | float | The presence penalty parameter for the model. | + + +## Usage +Here is how you setup an interface to interact with your Azure models. + + + + + Create a `config.yaml` file in the same directory as your code. + 1. πŸ“ src + 1. 🐍 PythonCode.py + 2. 🐍 PyNotebook.ipynb + 3. πŸ“„ **config.yaml** + + + Define your Azure OpenAI provider and models inside the `config.yaml` file. + ```yaml + providers: + azure: + id: azure + name: Azure + chat: true + embed: true + models: + YOUR_MODEL: <- Replace with your model name + mode: chat + max_tokens: ... + input_token_cost: ... + output_token_cost: ... + ``` + If you are not sure, you can leave `max_tokens`, `input_tokens` and the other parameters as **0** + + + Create your **llm** instance. + ```python + llm = LLM('azure/YOUR_MODEL', + api_key = YOUR_API_KEY, + api_endpoint = YOUR_ENDPOINT, + api_version = YOUR_API_VERSION) + ``` + + + **Optional:** You can add your parameters as follows: + ```python + llm = LLM('azure/model', + temperature= ..., + max_tokens= ..., + top_p= ..., + frequency_penalty= ..., + presence_penalty= ...) + ``` + You are done setting up your **Azure LLM**! + + + + + + + Create a `config.yaml` file in the same directory as your code. + 1. πŸ“ src + 1. 🐍 PythonCode.py + 2. 🐍 PyNotebook.ipynb + 3. πŸ“„ **config.yaml** + + + Define your Azure provider and models inside the `config.yaml` file. + ```yaml + providers: + azure: + id: azure + name: Azure + chat: true + embed: true + models: + YOUR_MODEL: <- Replace with your model name + mode: chat + max_tokens: ... + input_token_cost: ... + output_token_cost: ... + ``` + If you are not sure, you can leave `max_tokens`, `input_tokens` and the other parameters as **0** + + + Create your **llm** instance. + ```python + llm = LLM('azure/YOUR_MODEL', + api_key = YOUR_API_KEY, + base_url = YOUR_ENDPOINT) + ``` + + + **Optional:** You can add your parameters as follows: + ```python + llm = LLM('azure/model', + temperature= ..., + max_tokens= ..., + top_p= ..., + frequency_penalty= ..., + presence_penalty= ...) + ``` + You are done setting up your **Azure LLM**! + + + + + +## What's next? + + + Learn how to send messeges and recieve responses next! + + + Learn how to build a tool calling agent using llmstudio. + + diff --git a/docs/sdk/llm/providers/ollama.mdx b/docs/sdk/llm/providers/ollama.mdx new file mode 100644 index 00000000..eae25f25 --- /dev/null +++ b/docs/sdk/llm/providers/ollama.mdx @@ -0,0 +1,70 @@ +Interact with your Ollama models using LLM. + +## Parameters +An Ollama LLM interface can have the following parameters: +| Parameter | Type | Description | +|-------------------|--------|-----------------------------------------------------------------------------| +| `temperature` | float | The temperature parameter for the model. | +| `top_p` | float | The top-p parameter for the model. | +| `num_predict` | int | The number of tokens to predict. | +| `top_k` | int | The top-k parameter for the model. | + + +## Usage +Here is how you setup an interface to interact with your Ollama models. + + + + Create a `config.yaml` in the same directory your code is in. + 1. src + 1. yourPythonCode.py + 2. yourPyNotebook.py + 3. **config.yaml** + + + Define your Ollama provider and models inside the `config.yaml` file. + ```yaml + providers: + ollama: + id: ollama + name: Ollama + chat: true + embed: true + keys: + models: + YOUR_MODEL: <- Replace with your model name + mode: chat + max_tokens: ... + input_token_cost: ... + output_token_cost: ... + ``` + If you are not sure about any of these parameters, you can just leave them as **0** + + + Create your **llm** instance. + ```python + llm = LLM('ollama/{YOUR_MODEL}') + ``` + + + **Optional:** You can add your parameters as follows: + ```python + llm = LLM('ollama/model', + temperature= ..., + num_predict= ..., + top_p= ..., + top_k= ...,) + ``` + You are done setting up your **Ollama LLM**! + + + +## What's next? + + + Learn how to send messeges and recieve responses next! + + + Learn how to build a tool calling agent using llmstudio. + + diff --git a/docs/sdk/llm/providers/openai.mdx b/docs/sdk/llm/providers/openai.mdx new file mode 100644 index 00000000..4c6d3528 --- /dev/null +++ b/docs/sdk/llm/providers/openai.mdx @@ -0,0 +1,100 @@ +Interact with your OpenAI models using LLM. + +## Supported models +1. `gpt-4o` +2. `gpt-4-turbo` +3. `gpt-4` +4. `gpt-3.5-turbo` +5. `gpt-3.5-turbo-instruct` + +## Parameters +An OpenAI LLM interface can have the following parameters: +| Parameter | Type | Description | +|-------------------|--------|-----------------------------------------------------------------------------| +| `api_key` | str | The API key for authentication. | +| `temperature` | float | The temperature parameter for the model. | +| `top_p` | float | The top-p parameter for the model. | +| `max_tokens` | int | The maximum number of tokens for the model's output. | +| `frequency_penalty` | float | The frequency penalty parameter for the model. | +| `presence_penalty` | float | The presence penalty parameter for the model. | + + +## Usage +Here is how you setup an interface to interact with your OpenAI models. + + + + + + Create a `.env` file with you `OPENAI_API_KEY` + + Make sure you call your environment variable OPENAI_API_KEY + ```bash + OPENAI_API_KEY="YOUR-KEY" + ``` + + + In your python code, import LLM from llmstudio. + ```python + from llmstudio import LLM + ``` + + + Create your **llm** instance. + ```python + llm = LLM('openai/{model}') + ``` + + + **Optional:** You can add your parameters as follows: + ```python + llm = LLM('openai/model', + temperature= ..., + max_tokens= ..., + top_p= ..., + frequency_penalty= ..., + presence_penalty= ...) + ``` + You are done setting up your **OpenAI LLM**! + + + + + + + In your python code, import LLM from llmstudio. + ```python + from llmstudio import LLM + ``` + + + Create your **llm** instance. + ```python + llm = LLM('openai/{model}',api_key="YOUR_API_KEY") + ``` + + + **Optional:** You can add your parameters as follows: + ```python + llm = LLM('openai/model', + temperature= ..., + max_tokens= ..., + top_p= ..., + frequency_penalty= ..., + presence_penalty= ...) + ``` + You are done setting up your **OpenAI LLM**! + + + + + +## What's next? + + + Learn how to send messeges and recieve responses next! + + + Learn how to build a tool calling agent using llmstudio. + + diff --git a/docs/sdk/llm/providers/vertexai.mdx b/docs/sdk/llm/providers/vertexai.mdx new file mode 100644 index 00000000..1f5a1c81 --- /dev/null +++ b/docs/sdk/llm/providers/vertexai.mdx @@ -0,0 +1,98 @@ +Interact with your VertexAI models using LLM. + +## Supported models +1. `gemini-1.5-flash` +2. `gemini-1.5-pro` +3. `gemini-1.0-pro` + +## Parameters +A VertexAI LLM interface can have the following parameters: +| Parameter | Type | Description | +|-------------------|--------|-----------------------------------------------------------------------------| +| `api_key` | str | The API key for authentication. | +| `temperature` | float | The temperature parameter for the model. | +| `top_p` | float | The top-p parameter for the model. | +| `max_tokens` | int | The maximum number of tokens for the model's output. | +| `frequency_penalty` | float | The frequency penalty parameter for the model. | +| `presence_penalty` | float | The presence penalty parameter for the model. | + + +## Usage +Here is how you setup an interface to interact with your VertexAI models. + + + + + + Create a `.env` file with you `GOOGLE_API_KEY` + + Make sure you call your environment variable GOOGLE_API_KEY + ```bash + GOOGLE_API_KEY="YOUR-KEY" + ``` + + + In your python code, import LLM from llmstudio. + ```python + from llmstudio import LLM + ``` + + + Create your **llm** instance. + ```python + llm = LLM('vertexai/{model}') + ``` + + + **Optional:** You can add your parameters as follows: + ```python + llm = LLM('vertexai/model', + temperature= ..., + max_tokens= ..., + top_p= ..., + frequency_penalty= ..., + presence_penalty= ...) + ``` + You are done setting up your **VertexAI LLM**! + + + + + + + In your python code, import LLM from llmstudio. + ```python + from llmstudio import LLM + ``` + + + Create your **llm** instance. + ```python + llm = LLM('vertexai/{model}',api_key="YOUR_API_KEY") + ``` + + + **Optional:** You can add your parameters as follows: + ```python + llm = LLM('vertexai/model', + temperature= ..., + max_tokens= ..., + top_p= ..., + frequency_penalty= ..., + presence_penalty= ...) + ``` + You are done setting up your **VertexAI LLM**! + + + + + +## What's next? + + + Learn how to send messeges and recieve responses next! + + + Learn how to build a tool calling agent using llmstudio. + + \ No newline at end of file diff --git a/examples/06_gcloud_guide.ipynb b/examples/06_gcloud_guide.ipynb new file mode 100644 index 00000000..fa29d234 --- /dev/null +++ b/examples/06_gcloud_guide.ipynb @@ -0,0 +1,108 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Engine server already running on 0.0.0.0:8001\n", + "Tracking server already running on 0.0.0.0:8002\n" + ] + } + ], + "source": [ + "from llmstudio import LLM" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "llm = LLM('openai/gpt-3.5-turbo')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "ConnectionError", + "evalue": "('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRemoteDisconnected\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:793\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 792\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 793\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 803\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 804\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 806\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 808\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:537\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 537\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connection.py:466\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 465\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 466\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:1395\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1394\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1395\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1396\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:325\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 325\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:294\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m line:\n\u001b[1;32m 292\u001b[0m \u001b[38;5;66;03m# Presumably, the server closed the connection before\u001b[39;00m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;66;03m# sending a valid response.\u001b[39;00m\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RemoteDisconnected(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRemote end closed connection without\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m response\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 296\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "\u001b[0;31mRemoteDisconnected\u001b[0m: Remote end closed connection without response", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mProtocolError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/adapters.py:589\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 596\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 598\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 599\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 600\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 601\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:847\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 845\u001b[0m new_e \u001b[38;5;241m=\u001b[39m ProtocolError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConnection aborted.\u001b[39m\u001b[38;5;124m\"\u001b[39m, new_e)\n\u001b[0;32m--> 847\u001b[0m retries \u001b[38;5;241m=\u001b[39m \u001b[43mretries\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mincrement\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 848\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnew_e\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_stacktrace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msys\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexc_info\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m 849\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 850\u001b[0m retries\u001b[38;5;241m.\u001b[39msleep()\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/util/retry.py:470\u001b[0m, in \u001b[0;36mRetry.increment\u001b[0;34m(self, method, url, response, error, _pool, _stacktrace)\u001b[0m\n\u001b[1;32m 469\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m read \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_method_retryable(method):\n\u001b[0;32m--> 470\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[43mreraise\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43merror\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_stacktrace\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 471\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m read \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/util/util.py:38\u001b[0m, in \u001b[0;36mreraise\u001b[0;34m(tp, value, tb)\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m value\u001b[38;5;241m.\u001b[39m__traceback__ \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m tb:\n\u001b[0;32m---> 38\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m value\u001b[38;5;241m.\u001b[39mwith_traceback(tb)\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m value\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:793\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 792\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 793\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 803\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 804\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 806\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 808\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:537\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 537\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connection.py:466\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 465\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 466\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:1395\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1394\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1395\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1396\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:325\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 325\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:294\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m line:\n\u001b[1;32m 292\u001b[0m \u001b[38;5;66;03m# Presumably, the server closed the connection before\u001b[39;00m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;66;03m# sending a valid response.\u001b[39;00m\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RemoteDisconnected(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRemote end closed connection without\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m response\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 296\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "\u001b[0;31mProtocolError\u001b[0m: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mConnectionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mHi\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/GitHub/LLMstudio/llmstudio/llm/__init__.py:31\u001b[0m, in \u001b[0;36mLLM.chat\u001b[0;34m(self, input, is_stream, retries, **kwargs)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mchat\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: \u001b[38;5;28mstr\u001b[39m, is_stream: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, retries: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 31\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mrequests\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhttp://\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mENGINE_HOST\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m:\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mENGINE_PORT\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/api/engine/chat/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprovider\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 33\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 34\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 35\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msession_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msession_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_key\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_endpoint\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_endpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_version\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbase_url\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mchat_input\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mis_stream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mis_stream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 42\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mretries\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparameters\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 44\u001b[0m \u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\n\u001b[1;32m 45\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 46\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtemperature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 47\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtop_p\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_k\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_k\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtop_k\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 49\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 50\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_output_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 51\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrequency_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrequency_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrequency_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 54\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpresence_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpresence_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpresence_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 56\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\n\u001b[1;32m 58\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 59\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 60\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_stream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 62\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mContent-Type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapplication/json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m response\u001b[38;5;241m.\u001b[39mok:\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(url, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 104\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[1;32m 105\u001b[0m \n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124;03m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpost\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n", + "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/adapters.py:604\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m conn\u001b[38;5;241m.\u001b[39murlopen(\n\u001b[1;32m 590\u001b[0m method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[1;32m 591\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 600\u001b[0m chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[1;32m 601\u001b[0m )\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 604\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n\u001b[1;32m 606\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m MaxRetryError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 607\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e\u001b[38;5;241m.\u001b[39mreason, ConnectTimeoutError):\n\u001b[1;32m 608\u001b[0m \u001b[38;5;66;03m# TODO: Remove this in 3.0.0: see #2811\u001b[39;00m\n", + "\u001b[0;31mConnectionError\u001b[0m: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))" + ] + } + ], + "source": [ + "llm.chat('Hi')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llmstudiodev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/llmstudio/cli.py b/llmstudio/cli.py index 2c6197ec..57944402 100644 --- a/llmstudio/cli.py +++ b/llmstudio/cli.py @@ -1,5 +1,6 @@ import os import signal +import threading import click @@ -25,8 +26,9 @@ def server(ui): print("Servers are running. Press CTRL+C to stop.") + stop_event = threading.Event() try: - signal.pause() + stop_event.wait() # Wait indefinitely until the event is set except KeyboardInterrupt: print("Shutting down servers...") diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 79afa2fe..35fa8ecc 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -153,6 +153,7 @@ def prepare_messages(self, request: AzureRequest): async def parse_response( self, response: AsyncGenerator, **kwargs ) -> AsyncGenerator[str, None]: + if self.is_llama and (self.has_tools or self.has_functions): async for chunk in self.handle_tool_response(response, **kwargs): yield chunk diff --git a/llmstudio/llm/__init__.py b/llmstudio/llm/__init__.py index 8f42086b..ac086dc2 100644 --- a/llmstudio/llm/__init__.py +++ b/llmstudio/llm/__init__.py @@ -14,6 +14,7 @@ class LLM: def __init__(self, model_id: str, **kwargs): + self.provider, self.model = model_id.split("/") self.session_id = kwargs.get("session_id") self.api_key = kwargs.get("api_key") diff --git a/pyproject.toml b/pyproject.toml index 943d61e6..09c90d39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "llmstudio" -version = "0.3.11" +version = "0.3.12a10" description = "Prompt Perfection at Your Fingertips" authors = ["ClΓ‘udio Lemos "] license = "MIT"