diff --git a/.github/workflows/upload-pypi-dev.yml b/.github/workflows/upload-pypi-dev.yml
index 8304eb18..ee3d783e 100644
--- a/.github/workflows/upload-pypi-dev.yml
+++ b/.github/workflows/upload-pypi-dev.yml
@@ -1,4 +1,4 @@
-name: Upload Python package to PyPI as dev pre-release
+name: Upload Python package to PyPI as dev release, build and push Docker image to hub.
 
 on:
   workflow_dispatch:
@@ -39,3 +39,48 @@ jobs:
           git add pyproject.toml
           git commit -m "[fix] bump prerelease version in pyproject.toml"
           git push
+
+      # Wait for PyPI to update
+      - name: Wait for PyPI to update
+        run: |
+          VERSION=$(poetry version --short)
+          echo "Checking for llmstudio==$VERSION on PyPI..."
+          for i in {1..10}; do
+            if python -m pip install llmstudio==${VERSION} --dry-run >/dev/null 2>&1; then
+              echo "Package llmstudio==${VERSION} is available on PyPI."
+              break
+            else
+              echo "Package llmstudio==${VERSION} not available yet. Waiting 15 seconds..."
+              sleep 15
+            fi
+            if [ $i -eq 10 ]; then
+              echo "Package did not become available in time."
+              exit 1
+            fi
+          done
+
+      # Docker build and push section
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Extract version for tagging Docker image
+        id: get_version
+        run: |
+          echo "VERSION=$(poetry version --short)" >> $GITHUB_ENV
+
+      - name: Build and tag Docker image
+        run: |
+          docker build \
+            --build-arg LLMSTUDIO_VERSION=${{ env.VERSION }} \
+            -t tensoropsai/llmstudio:${{ env.VERSION }} \
+            .
+
+      - name: Push Docker image to Docker Hub
+        run: |
+          docker push tensoropsai/llmstudio:${{ env.VERSION }}
\ No newline at end of file
diff --git a/.github/workflows/upload-pypi.yml b/.github/workflows/upload-pypi.yml
index ec06e30b..62eedaf0 100644
--- a/.github/workflows/upload-pypi.yml
+++ b/.github/workflows/upload-pypi.yml
@@ -1,4 +1,4 @@
-name: Upload Python package to PyPI
+name: Upload Python package to PyPI and build/push Docker images
 
 on:
   push:
@@ -11,23 +11,77 @@ jobs:
   deploy:
     runs-on: ubuntu-latest
     steps:
+      # Checkout the code
       - name: Checkout code
         uses: actions/checkout@v2
 
+      # Set up Python environment
       - name: Set up Python
         uses: actions/setup-python@v2
         with:
           python-version: "3.x"
 
+      # Install Poetry
       - name: Install Poetry
         run: |
           curl -sSL https://install.python-poetry.org | python3 -
 
+      # Configure Poetry with PyPI token
       - name: Configure Poetry
         run: |
           poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }}
 
+      # Build and publish package to PyPI
       - name: Build and publish to PyPI
         run: |
           poetry build
           poetry publish
+
+      # Extract the new version number from pyproject.toml
+      - name: Extract version for tagging Docker image
+        run: |
+          echo "VERSION=$(poetry version --short)" >> $GITHUB_ENV
+
+      # Wait for the package to become available on PyPI
+      - name: Wait for PyPI to update
+        run: |
+          echo "Checking for llmstudio==${{ env.VERSION }} on PyPI..."
+          for i in {1..10}; do
+            if python -m pip install llmstudio==${{ env.VERSION }} --dry-run >/dev/null 2>&1; then
+              echo "Package llmstudio==${{ env.VERSION }} is available on PyPI."
+              break
+            else
+              echo "Package llmstudio==${{ env.VERSION }} not available yet. Waiting 15 seconds..."
+              sleep 15
+            fi
+            if [ $i -eq 10 ]; then
+              echo "Package did not become available in time."
+              exit 1
+            fi
+          done
+
+      # Set up Docker Buildx
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      # Log in to Docker Hub
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      # Build and tag Docker images with both :latest and :[NEW_VERSION]
+      - name: Build and tag Docker images
+        run: |
+          docker build \
+            --build-arg LLMSTUDIO_VERSION=${{ env.VERSION }} \
+            -t tensoropsai/llmstudio:latest \
+            -t tensoropsai/llmstudio:${{ env.VERSION }} \
+            .
+
+      # Push both Docker images to Docker Hub
+      - name: Push Docker images to Docker Hub
+        run: |
+          docker push tensoropsai/llmstudio:${{ env.VERSION }}
+          docker push tensoropsai/llmstudio:latest
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..c432cf77
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,17 @@
+# docker/Dockerfile
+
+FROM python:3.11-slim
+ENV PYTHONUNBUFFERED=1
+
+# Install tools
+RUN apt-get clean && apt-get update
+
+# Install llmstudio
+ARG LLMSTUDIO_VERSION
+RUN pip install llmstudio==${LLMSTUDIO_VERSION}
+RUN pip install psycopg2-binary
+
+# Expose Ports
+EXPOSE 8001 8002
+
+CMD ["llmstudio", "server"]
diff --git a/docs/how-to/build-a-tool-agent.mdx b/docs/how-to/build-a-tool-agent.mdx
new file mode 100644
index 00000000..59279084
--- /dev/null
+++ b/docs/how-to/build-a-tool-agent.mdx
@@ -0,0 +1,83 @@
+This guide outlines how to build a tool calling agent using Langchain + LLMstudio. 
+
+## 1. Set up your tools
+Start by defining the tools your agent is going to have access to.
+```python
+from langchain.tools import tool
+
+@tool
+def buy_ticket(destination: str):
+    """Use this to buy a ticket"""
+    return "Bought ticket number 270924"
+
+@tool
+def get_departure(ticket_number: str):
+    """Use this to fetch the departure time of a train"""
+    return "8:25 AM"
+```
+
+## 2. Setup your .env
+Create a `.env` file on the root of your project with the the credentials for the providers you want to use.
+
+<Tabs>
+    <Tab title="OpenAI">
+        ```
+        OPENAI_API_KEY="YOUR_API_KEY"
+        ```
+    </Tab>
+    <Tab title="VertexAI">
+        ```
+        GOOGLE_API_KEY="YOUR_API_KEY"
+        ```
+    </Tab>
+    <Tab title="Azure">
+        ```
+        AZURE_BASE_URL="YOUR_MODEL_ENDPOINT"
+        AZURE_API_KEY="YOUR_API_KEY"
+        ```
+    </Tab>
+  </Tabs>
+
+## 3. Set up your model using LLMstudio
+Use LLMstudio to choose the provider and model you want to use.
+<Tabs>
+    <Tab title="OpenAI">
+        ```python
+        model = ChatLLMstudio(model_id='openai/gpt-4o')
+        ```
+    </Tab>
+    <Tab title="VertexAI">
+        ```python
+        model = ChatLLMstudio(model_id='vertexai/gemini-1.5-flash')
+        ```
+    </Tab>
+    <Tab title="Azure">
+        ```python
+        model = ChatLLMstudio(model_id='azure/Meta-Llama-3.1-70B-Instruct')
+        ```
+    </Tab>
+  </Tabs>
+
+## 4. Build the agent
+Set up your agent and agent executor using Langchain.
+
+```python
+from langchain import hub
+from langchain.agents import AgentExecutor, create_openai_tools_agent
+
+prompt = hub.pull("hwchase17/openai-tools-agent")
+agent = create_openai_tools_agent(model, tools, prompt)
+agent_executor = AgentExecutor(agent=agent, tools=tools)
+
+input = "Can you buy me a ticket to madrid?"
+
+# Using with chat history
+agent_executor.invoke(
+    {
+        "input": input,
+    }
+)
+```
+
+
+
diff --git a/docs/how-to/deploy-on-gcp/deploy-on-google-cloud-platform.mdx b/docs/how-to/deploy-on-gcp/deploy-on-google-cloud-platform.mdx
new file mode 100644
index 00000000..6dbf80f2
--- /dev/null
+++ b/docs/how-to/deploy-on-gcp/deploy-on-google-cloud-platform.mdx
@@ -0,0 +1,157 @@
+Learn how to deploy LLMstudio as a containerized application on Google Kubernetes Engine and make calls from a local repository.
+
+
+## Prerequisites
+To follow this guide you need to have the following set-up:
+
+- A **project** on google cloud platform.
+- **Kubernetes Engine** API enabled on your project.
+- **Kubernetes Engine Admin** role for the user performing the guide.
+
+## Deploy LLMstudio
+
+This example demonstrates a public deployment. For a private service accessible only within your enterprise infrastructure, deploy it within your own Virtual Private Cloud (VPC).
+<Steps>
+  <Step title="Navigate to Kubernetes Engine">
+    Begin by navigating to the Kubernetes Engine page.
+  </Step>
+  <Step title="Select Deploy">
+    Go to **Workloads** and **Create a new Deployment**.
+    <Frame>
+      <img src="how-to/deploy-on-gcp/step-2.png" />
+    </Frame>
+  </Step>
+  <Step title="Name Your Deployment">
+    Rename your project. We will call the one in this guide **llmstudio-on-gcp**.
+    <Frame>
+      <img src="how-to/deploy-on-gcp/step-3.png" />
+    </Frame>
+  </Step>
+  <Step title="Select Your Cluster">
+    Choose between **creating a new cluster** or **using an existing cluster**. 
+    For this guide, we will create a new cluster and use the default region.
+    <Frame>
+      <img src="how-to/deploy-on-gcp/step-4.png" />
+    </Frame>
+  </Step>
+  <Step title="Proceed to Container Details">
+    Once done done with the **Deployment configuration**, proceed to **Container details**.
+  </Step>
+  <Step title="Set Image Path">
+    In the new container section, select **Existing container image**.
+
+
+    Copy the path to LLMstudio's image available on Docker Hub.
+    ```bash Image Path
+    tensoropsai/llmstudio:latest
+    ```
+    Set it as the **Image path** to your container.
+    <Frame>
+      <img src="how-to/deploy-on-gcp/step-6.png" />
+    </Frame>
+  </Step>
+  <Step title="Set Environment Variables">
+    Configure the following mandatory environment variables:
+| Environment Variable       | Value     |
+|----------------------------|-----------|
+| `LLMSTUDIO_ENGINE_HOST`    | 0.0.0.0   |
+| `LLMSTUDIO_ENGINE_PORT`    | 8001      |
+| `LLMSTUDIO_TRACKING_HOST`  | 0.0.0.0   |
+| `LLMSTUDIO_TRACKING_PORT`  | 8002      |
+
+Additionally, set the `GOOGLE_API_KEY` environment variable to enable calls to Google's Gemini models.
+<Tip>Refer to **SDK/LLM/Providers** for instructions on setting up other providers.</Tip>
+
+<Frame>
+  <img src="how-to/deploy-on-gcp/step-7.png" />
+</Frame>
+
+  </Step>
+  <Step title="Proceed to Expose (Optional)">
+    After configuring your container, proceed to **Expose (Optional)**.
+  </Step>
+  <Step title="Expose Ports">
+    Select **Expose deployment as a new service** and leave the first item as is.
+
+    <Frame>
+      <img src="how-to/deploy-on-gcp/step-9-1.png" />
+    </Frame>
+    
+    Add two other items, and expose the ports defined in the **Set Environment Variables** step.
+
+    <Frame>
+      <img src="how-to/deploy-on-gcp/step-9-2.png" />
+    </Frame>
+  </Step>
+  <Step title="Deploy">
+    After setting up and exposing the ports, press **Deploy**.
+  <Check>You have successfully deployed **LLMstudio on Google Cloud Platform**!</Check>
+  </Step>
+  
+</Steps>
+
+## Make a Call
+Now let's make a call to our LLMstudio instance on GCP!
+
+
+
+<Steps>
+  <Step title="Set Up Project">
+    Setup a simple project with this two files:
+    1. `simple-call.ipynb`
+    2. `.env`
+  </Step>
+
+  <Step title="Set Up Files">
+    <Tabs>
+      <Tab title=".env">
+
+        Go to your newly deployed **Workload**, scroll to the **Exposing services** section, and take note of the Host of your endpoint.
+        <Frame>
+          <img src="how-to/deploy-on-gcp/step-env.png" />
+        </Frame>
+
+        Create your `.env` file with the following:
+
+        ```env .env
+        LLMSTUDIO_ENGINE_HOST = "YOUR_HOST"
+        LLMSTUDIO_ENGINE_PORT = "8001"
+        LLMSTUDIO_TRACKING_HOST = "YOUR_TRACKING_PORT"
+        LLMSTUDIO_TRACKING_PORT = "8002"
+        ```
+
+        <Check>You are done seting up you **.env** file!</Check>
+
+      </Tab>
+      <Tab title="simple-call.ipynb">
+        Start by importing llmstudio:
+        ```python 1st cell
+        from llmstudio import LLM
+        ```
+
+        Set up your LLM. We will be using `gemini-1.5-flash` for this guide.
+        ```python 2nd cell
+        llm = LLM('vertexai/gemini-1.5-flash')
+        ```
+
+        Chat with your model.
+        ```python 3rd cell
+        llm.chat('Hello!')
+        print(response.chat_output)
+        ```
+
+        <Frame>
+          <img src="how-to/deploy-on-gcp/step-llmstudio-call.png" />
+        </Frame>
+
+        
+        <Check>You are done calling llmstudio on GCP!</Check>
+
+      </Tab>
+
+    </Tabs>
+  </Step>
+  
+
+</Steps>
+
diff --git a/docs/how-to/deploy-on-gcp/step-2.png b/docs/how-to/deploy-on-gcp/step-2.png
new file mode 100644
index 00000000..4d2bb3ee
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-2.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-3.png b/docs/how-to/deploy-on-gcp/step-3.png
new file mode 100644
index 00000000..adb4eac3
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-3.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-4.png b/docs/how-to/deploy-on-gcp/step-4.png
new file mode 100644
index 00000000..cc18c845
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-4.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-6.png b/docs/how-to/deploy-on-gcp/step-6.png
new file mode 100644
index 00000000..6ac8b8b1
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-6.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-7-1.png b/docs/how-to/deploy-on-gcp/step-7-1.png
new file mode 100644
index 00000000..e3e523a3
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-7-1.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-7.png b/docs/how-to/deploy-on-gcp/step-7.png
new file mode 100644
index 00000000..b8325854
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-7.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-9-1.png b/docs/how-to/deploy-on-gcp/step-9-1.png
new file mode 100644
index 00000000..328ca889
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-9-1.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-9-2.png b/docs/how-to/deploy-on-gcp/step-9-2.png
new file mode 100644
index 00000000..64d9ddb6
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-9-2.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-9.png b/docs/how-to/deploy-on-gcp/step-9.png
new file mode 100644
index 00000000..1953805a
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-9.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-env.png b/docs/how-to/deploy-on-gcp/step-env.png
new file mode 100644
index 00000000..392a49ad
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-env.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-llmstudio-call.png b/docs/how-to/deploy-on-gcp/step-llmstudio-call.png
new file mode 100644
index 00000000..e05609fb
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-llmstudio-call.png differ
diff --git a/docs/mint.json b/docs/mint.json
index 284081e2..0645334f 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -43,6 +43,32 @@
       "group": "Get Started",
       "pages": ["quickstart", "support"]
     },
+    {
+      "group": "How to",
+      "pages": [
+        "how-to/build-a-tool-agent",
+        "how-to/deploy-on-gcp/deploy-on-google-cloud-platform"
+      ]
+    },
+    {
+      "group": "SDK",
+      "pages": [
+        {
+          "group": "LLM",
+          "pages":[{
+            "group": "Providers",
+            "pages":[
+              "sdk/llm/providers/openai",
+              "sdk/llm/providers/vertexai",
+              "sdk/llm/providers/anthropic",
+              "sdk/llm/providers/ollama",
+              "sdk/llm/providers/azure"
+            ]
+          }, 
+          "sdk/llm/chat"]
+        }
+      ]
+    },
     {
       "group": "Endpoint Examples",
       "pages": [
@@ -68,3 +94,4 @@
     "baseUrl": "http://localhost:8000"
   }
 }
+  
\ No newline at end of file
diff --git a/docs/sdk/llm/chat.mdx b/docs/sdk/llm/chat.mdx
new file mode 100644
index 00000000..cc8852f4
--- /dev/null
+++ b/docs/sdk/llm/chat.mdx
@@ -0,0 +1,86 @@
+Make chat calls using your LLM.
+
+## Parameters
+The llm.chat method can have the following parameters.
+| Parameter      | Type   | Description                                                                 |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `input `       | str    | The input message to send to the chat model.                                   |
+| `is_stream`    | bool  | The temperature parameter for the model.                                       |
+| `**kwargs`          | dict  | Additional parameters to pass to the chat model.                           |
+
+<Check>Refer to your provider-specific documentation for additional kwargs you can use.</Check>
+
+## Returns
+| Output            | Type   | Description                                                                 |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `ChatCompletion`  | object | A chat completion object in the OpenAI format + metrics computed by LLMstudio.|
+
+
+## Usage
+Here's how to use `.chat()` to make calls to your LLM.
+
+<Steps>
+    <Step>
+        Start by importing LLM.
+        ```python
+        from llmstudio import LLM
+        ```
+    </Step>
+    <Step >
+        Set up an LLM from your desired provider.
+        ```python
+        llm = LLM('openai/gpt-4o')
+        ```
+    </Step>
+    <Step>
+        Create your message. Your message can be a simple `string` or a message in the `OpenAI format`.
+
+       <Tabs>
+            <Tab title="String format">
+                ```python
+                message = "Hello, how are you today?"
+                ```
+            </Tab>
+            <Tab title="OpenAI format">
+                ```python
+                message = [
+                {"role": "system", "content": "You are a helpfull assistant."},
+                {"role": "user", "content": "Hello, how are you today?"}
+                ]
+                ```
+            </Tab>
+        </Tabs> 
+
+    </Step>
+    <Step>
+
+        <Tabs>
+            <Tab title="Non-stream response">
+                Get your response.
+                ```python
+                response = llm.chat(message)
+                ```
+
+                Vizualize your response.
+                 ```python
+                print(response)
+                ```
+            </Tab>
+            <Tab title="Stream response">
+                Get your response.
+                ```python
+                response = llm.chat(message, is_stream = True)
+                ```
+
+                Vizualize your response.
+                 ```python
+                for chunk in response:
+                    print(chunk)
+                ```
+            </Tab>
+        </Tabs> 
+    
+        <Check>You are done chating with your **LLMstudio LLM**!</Check>
+    </Step>
+</Steps>
+  
\ No newline at end of file
diff --git a/docs/sdk/llm/providers/anthropic.mdx b/docs/sdk/llm/providers/anthropic.mdx
new file mode 100644
index 00000000..eeff4915
--- /dev/null
+++ b/docs/sdk/llm/providers/anthropic.mdx
@@ -0,0 +1,101 @@
+Interact with your Anthropic models using LLMstudios LLM.
+
+## Supported models
+1. `claude-3-opus-20240229`
+2. `claude-3-sonnet-2024022`
+3. `claude-3-haiku-20240307`
+4. `claude-2.1`
+5. `claude-2`
+6. `claude-instant-1.2`
+
+## Parameters
+An Anthropic LLM interface can have the following parameters:
+| Parameter         | Type   | Description                                                                 |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `api_key`         | str    | The API key for authentication.                                              |
+| `temperature`     | float  | The temperature parameter for the model.                                     |
+| `top_p`           | float  | The top-p parameter for the model.                                           |
+| `max_tokens`      | int    | The maximum number of tokens for the model's output.                         |
+| `top_k`           | int    | The top-k parameter for the model.                                           |
+
+
+## Usage
+Here is how you setup an interface to interact with your Anthropic models.
+
+<Tabs>
+  <Tab title="w/ .env">
+    <Steps>
+        <Step>
+            Create a `.env` file with you `ANTHROPIC_API_KEY`
+
+            <Warning>Make sure you call your environment variable ANTHROPIC_API_KEY</Warning>
+            ```bash
+            ANTHROPIC_API_KEY="YOUR-KEY"
+            ```
+        </Step>
+        <Step >
+            In your python code, import LLM from llmstudio.
+            ```python
+            from llmstudio import LLM
+            ```
+        </Step>
+        <Step>
+            Create your **llm** instance.
+            ```python
+            llm = LLM('anthropic/{model}')
+            ```
+        </Step>
+        <Step>
+            **Optional:** You can add your parameters as follows:
+            ```python
+            llm = LLM('anthropic/model', 
+            temperature= ...,
+            max_tokens= ...,
+            top_p= ...,
+            frequency_penalty= ...,
+            presence_penalty= ...)
+            ```
+            <Check>You are done setting up your **Anthropic LLM**!</Check>
+        </Step>
+    </Steps>
+  </Tab>
+  <Tab title="w/o .env">
+    <Steps>
+        <Step >
+            In your python code, import LLM from llmstudio.
+            ```python
+            from llmstudio import LLM
+            ```
+        </Step>
+        <Step>
+            Create your **llm** instance.
+            ```python
+            llm = LLM('anthropic/{model}',api_key="YOUR_API_KEY")
+            ```
+        </Step>
+        <Step>
+            **Optional:** You can add your parameters as follows:
+            ```python
+            llm = LLM('anthropic/model', 
+            temperature= ...,
+            max_tokens= ...,
+            top_p= ...,
+            frequency_penalty= ...,
+            presence_penalty= ...)
+            ```
+            <Check>You are done setting up your **Anthropic LLM**!</Check>
+        </Step>
+    </Steps>
+  </Tab>
+</Tabs>
+
+
+## What's next?
+<CardGroup cols={2}>
+    <Card title="LLM.chat()" icon="link" href="../chat">
+    Learn how to send messeges and recieve responses next!
+    </Card>
+    <Card title="Tool calling Agent" icon="link" href="../../../how-to/build-a-tool-agent">
+    Learn how to build a tool calling agent using llmstudio.
+    </Card>
+</CardGroup>
diff --git a/docs/sdk/llm/providers/azure.mdx b/docs/sdk/llm/providers/azure.mdx
new file mode 100644
index 00000000..161a2491
--- /dev/null
+++ b/docs/sdk/llm/providers/azure.mdx
@@ -0,0 +1,126 @@
+Interact with your Azure models using LLM.
+
+## Parameters
+An Azure LLM interface can have the following parameters:
+| Parameter           | Type   | Description                                                                 |
+|---------------------|--------|-----------------------------------------------------------------------------|
+| `temperature`       | float  | The temperature parameter for the model.                                     |
+| `max_tokens`        | int    | The maximum number of tokens to generate.                                    |
+| `top_p`             | float  | The top-p parameter for the model.                                           |
+| `frequency_penalty` | float  | The frequency penalty parameter for the model.                               |
+| `presence_penalty`  | float  | The presence penalty parameter for the model.                                |
+
+
+## Usage
+Here is how you setup an interface to interact with your Azure models.
+<Tabs>
+    <Tab title="OpenAI Models">
+        <Steps>
+            <Step>
+                Create a `config.yaml` file in the same directory as your code.
+                1. 📁 src
+                    1. 🐍 PythonCode.py
+                    2. 🐍 PyNotebook.ipynb
+                    3. 📄 **config.yaml**
+            </Step>
+            <Step >
+            Define your Azure OpenAI provider and models inside the `config.yaml` file.
+            ```yaml
+                providers:
+                    azure:
+                        id: azure
+                        name: Azure
+                        chat: true
+                        embed: true
+                        models:
+                            YOUR_MODEL: <- Replace with your model name
+                                mode: chat
+                                max_tokens: ...
+                                input_token_cost: ...
+                                output_token_cost: ...
+                ```
+                <Tip>If you are not sure, you can leave `max_tokens`, `input_tokens` and the other parameters as **0**</Tip>
+            </Step>
+            <Step>
+                Create your **llm** instance.
+                ```python
+                llm = LLM('azure/YOUR_MODEL',
+                        api_key = YOUR_API_KEY,
+                        api_endpoint = YOUR_ENDPOINT,
+                        api_version = YOUR_API_VERSION)
+                ```
+            </Step>
+            <Step>
+                **Optional:** You can add your parameters as follows:
+                ```python
+                llm = LLM('azure/model', 
+                temperature= ...,
+                max_tokens= ...,
+                top_p= ...,
+                frequency_penalty= ...,
+                presence_penalty= ...)
+                ```
+                <Check>You are done setting up your **Azure LLM**!</Check>
+            </Step>
+        </Steps>
+  </Tab>
+  <Tab title="Other Models">
+    <Steps>
+        <Step>
+            Create a `config.yaml` file in the same directory as your code.
+            1. 📁 src
+                1. 🐍 PythonCode.py
+                2. 🐍 PyNotebook.ipynb
+                3. 📄 **config.yaml**
+        </Step>
+        <Step >
+        Define your Azure provider and models inside the `config.yaml` file.
+        ```yaml
+            providers:
+                azure:
+                    id: azure
+                    name: Azure
+                    chat: true
+                    embed: true
+                    models:
+                        YOUR_MODEL: <- Replace with your model name
+                            mode: chat
+                            max_tokens: ...
+                            input_token_cost: ...
+                            output_token_cost: ...
+            ```
+            <Tip>If you are not sure, you can leave `max_tokens`, `input_tokens` and the other parameters as **0**</Tip>
+        </Step>
+        <Step>
+            Create your **llm** instance.
+            ```python
+            llm = LLM('azure/YOUR_MODEL',
+                       api_key = YOUR_API_KEY,
+                       base_url = YOUR_ENDPOINT)
+            ```
+        </Step>
+        <Step>
+            **Optional:** You can add your parameters as follows:
+            ```python
+            llm = LLM('azure/model', 
+            temperature= ...,
+            max_tokens= ...,
+            top_p= ...,
+            frequency_penalty= ...,
+            presence_penalty= ...)
+            ```
+            <Check>You are done setting up your **Azure LLM**!</Check>
+        </Step>
+    </Steps>
+    </Tab>
+  </Tabs>
+
+## What's next?
+<CardGroup cols={2}>
+    <Card title="LLM.chat()" icon="link" href="../chat">
+    Learn how to send messeges and recieve responses next!
+    </Card>
+    <Card title="Tool calling Agent" icon="link" href="../../../how-to/build-a-tool-agent">
+    Learn how to build a tool calling agent using llmstudio.
+    </Card>
+</CardGroup>
diff --git a/docs/sdk/llm/providers/ollama.mdx b/docs/sdk/llm/providers/ollama.mdx
new file mode 100644
index 00000000..eae25f25
--- /dev/null
+++ b/docs/sdk/llm/providers/ollama.mdx
@@ -0,0 +1,70 @@
+Interact with your Ollama models using LLM.
+
+## Parameters
+An Ollama LLM interface can have the following parameters:
+| Parameter         | Type   | Description                                                                 |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `temperature`     | float  | The temperature parameter for the model.                                     |
+| `top_p`           | float  | The top-p parameter for the model.                                           |
+| `num_predict`     | int    | The number of tokens to predict.                                             |
+| `top_k`           | int    | The top-k parameter for the model.                                           |
+
+
+## Usage
+Here is how you setup an interface to interact with your Ollama models.
+
+<Steps>
+    <Step>
+        Create a `config.yaml` in the same directory your code is in.
+        1. src
+            1. yourPythonCode.py
+            2. yourPyNotebook.py
+            3. **config.yaml**
+    </Step>
+    <Step >
+        Define your Ollama provider and models inside the `config.yaml` file.
+        ```yaml
+        providers:
+            ollama:
+                id: ollama
+                name: Ollama
+                chat: true
+                embed: true
+                keys:
+                models:
+                    YOUR_MODEL: <- Replace with your model name
+                        mode: chat
+                        max_tokens: ...
+                        input_token_cost: ...
+                        output_token_cost: ...
+        ```
+        <Tip>If you are not sure about any of these parameters, you can just leave them as **0**</Tip>
+    </Step>
+    <Step>
+        Create your **llm** instance.
+        ```python
+        llm = LLM('ollama/{YOUR_MODEL}')
+        ```
+    </Step>
+    <Step>
+        **Optional:** You can add your parameters as follows:
+        ```python
+        llm = LLM('ollama/model', 
+        temperature= ...,
+        num_predict= ...,
+        top_p= ...,
+        top_k= ...,)
+        ```
+        <Check>You are done setting up your **Ollama LLM**!</Check>
+    </Step>
+</Steps>
+
+## What's next?
+<CardGroup cols={2}>
+    <Card title="LLM.chat()" icon="link" href="../chat">
+    Learn how to send messeges and recieve responses next!
+    </Card>
+    <Card title="Tool calling Agent" icon="link" href="../../../how-to/build-a-tool-agent">
+    Learn how to build a tool calling agent using llmstudio.
+    </Card>
+</CardGroup>
diff --git a/docs/sdk/llm/providers/openai.mdx b/docs/sdk/llm/providers/openai.mdx
new file mode 100644
index 00000000..4c6d3528
--- /dev/null
+++ b/docs/sdk/llm/providers/openai.mdx
@@ -0,0 +1,100 @@
+Interact with your OpenAI models using LLM.
+
+## Supported models
+1. `gpt-4o`
+2. `gpt-4-turbo`
+3. `gpt-4`
+4. `gpt-3.5-turbo`
+5. `gpt-3.5-turbo-instruct`
+
+## Parameters
+An OpenAI LLM interface can have the following parameters:
+| Parameter         | Type   | Description                                                                 |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `api_key`         | str    | The API key for authentication.                                              |
+| `temperature`     | float  | The temperature parameter for the model.                                     |
+| `top_p`           | float  | The top-p parameter for the model.                                           |
+| `max_tokens`      | int    | The maximum number of tokens for the model's output.                         |
+| `frequency_penalty` | float | The frequency penalty parameter for the model.                              |
+| `presence_penalty` | float | The presence penalty parameter for the model.                                |
+
+
+## Usage
+Here is how you setup an interface to interact with your OpenAI models.
+
+<Tabs>
+  <Tab title="w/ .env">
+    <Steps>
+        <Step>
+            Create a `.env` file with you `OPENAI_API_KEY`
+
+            <Warning>Make sure you call your environment variable OPENAI_API_KEY</Warning>
+            ```bash
+            OPENAI_API_KEY="YOUR-KEY"
+            ```
+        </Step>
+        <Step >
+            In your python code, import LLM from llmstudio.
+            ```python
+            from llmstudio import LLM
+            ```
+        </Step>
+        <Step>
+            Create your **llm** instance.
+            ```python
+            llm = LLM('openai/{model}')
+            ```
+        </Step>
+        <Step>
+            **Optional:** You can add your parameters as follows:
+            ```python
+            llm = LLM('openai/model', 
+            temperature= ...,
+            max_tokens= ...,
+            top_p= ...,
+            frequency_penalty= ...,
+            presence_penalty= ...)
+            ```
+            <Check>You are done setting up your **OpenAI LLM**!</Check>
+        </Step>
+    </Steps>
+  </Tab>
+  <Tab title="w/o .env">
+    <Steps>
+        <Step >
+            In your python code, import LLM from llmstudio.
+            ```python
+            from llmstudio import LLM
+            ```
+        </Step>
+        <Step>
+            Create your **llm** instance.
+            ```python
+            llm = LLM('openai/{model}',api_key="YOUR_API_KEY")
+            ```
+        </Step>
+        <Step>
+            **Optional:** You can add your parameters as follows:
+            ```python
+            llm = LLM('openai/model', 
+            temperature= ...,
+            max_tokens= ...,
+            top_p= ...,
+            frequency_penalty= ...,
+            presence_penalty= ...)
+            ```
+            <Check>You are done setting up your **OpenAI LLM**!</Check>
+        </Step>
+    </Steps>
+  </Tab>
+</Tabs>
+
+## What's next?
+<CardGroup cols={2}>
+    <Card title="LLM.chat()" icon="link" href="../chat">
+    Learn how to send messeges and recieve responses next!
+    </Card>
+    <Card title="Tool calling Agent" icon="link" href="../../../how-to/build-a-tool-agent">
+    Learn how to build a tool calling agent using llmstudio.
+    </Card>
+</CardGroup>
diff --git a/docs/sdk/llm/providers/vertexai.mdx b/docs/sdk/llm/providers/vertexai.mdx
new file mode 100644
index 00000000..1f5a1c81
--- /dev/null
+++ b/docs/sdk/llm/providers/vertexai.mdx
@@ -0,0 +1,98 @@
+Interact with your VertexAI models using LLM.
+
+## Supported models
+1. `gemini-1.5-flash`
+2. `gemini-1.5-pro`
+3. `gemini-1.0-pro`
+
+## Parameters
+A VertexAI LLM interface can have the following parameters:
+| Parameter         | Type   | Description                                                                 |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `api_key`         | str    | The API key for authentication.                                              |
+| `temperature`     | float  | The temperature parameter for the model.                                     |
+| `top_p`           | float  | The top-p parameter for the model.                                           |
+| `max_tokens`      | int    | The maximum number of tokens for the model's output.                         |
+| `frequency_penalty` | float | The frequency penalty parameter for the model.                              |
+| `presence_penalty` | float | The presence penalty parameter for the model.                                |
+
+
+## Usage
+Here is how you setup an interface to interact with your VertexAI models.
+
+<Tabs>
+  <Tab title="w/ .env">
+    <Steps>
+        <Step>
+            Create a `.env` file with you `GOOGLE_API_KEY`
+
+            <Warning>Make sure you call your environment variable GOOGLE_API_KEY</Warning>
+            ```bash
+            GOOGLE_API_KEY="YOUR-KEY"
+            ```
+        </Step>
+        <Step >
+            In your python code, import LLM from llmstudio.
+            ```python
+            from llmstudio import LLM
+            ```
+        </Step>
+        <Step>
+            Create your **llm** instance.
+            ```python
+            llm = LLM('vertexai/{model}')
+            ```
+        </Step>
+        <Step>
+            **Optional:** You can add your parameters as follows:
+            ```python
+            llm = LLM('vertexai/model', 
+            temperature= ...,
+            max_tokens= ...,
+            top_p= ...,
+            frequency_penalty= ...,
+            presence_penalty= ...)
+            ```
+            <Check>You are done setting up your **VertexAI LLM**!</Check>
+        </Step>
+    </Steps>
+  </Tab>
+  <Tab title="w/o .env">
+    <Steps>
+        <Step >
+            In your python code, import LLM from llmstudio.
+            ```python
+            from llmstudio import LLM
+            ```
+        </Step>
+        <Step>
+            Create your **llm** instance.
+            ```python
+            llm = LLM('vertexai/{model}',api_key="YOUR_API_KEY")
+            ```
+        </Step>
+        <Step>
+            **Optional:** You can add your parameters as follows:
+            ```python
+            llm = LLM('vertexai/model', 
+            temperature= ...,
+            max_tokens= ...,
+            top_p= ...,
+            frequency_penalty= ...,
+            presence_penalty= ...)
+            ```
+            <Check>You are done setting up your **VertexAI LLM**!</Check>
+        </Step>
+    </Steps>
+  </Tab>
+</Tabs>
+
+## What's next?
+<CardGroup cols={2}>
+    <Card title="LLM.chat()" icon="link" href="../chat">
+    Learn how to send messeges and recieve responses next!
+    </Card>
+    <Card title="Tool calling Agent" icon="link" href="../../../how-to/build-a-tool-agent">
+    Learn how to build a tool calling agent using llmstudio.
+    </Card>
+</CardGroup>
\ No newline at end of file
diff --git a/examples/06_gcloud_guide.ipynb b/examples/06_gcloud_guide.ipynb
new file mode 100644
index 00000000..fa29d234
--- /dev/null
+++ b/examples/06_gcloud_guide.ipynb
@@ -0,0 +1,108 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Engine server already running on 0.0.0.0:8001\n",
+      "Tracking server already running on 0.0.0.0:8002\n"
+     ]
+    }
+   ],
+   "source": [
+    "from llmstudio import LLM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = LLM('openai/gpt-3.5-turbo')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ConnectionError",
+     "evalue": "('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRemoteDisconnected\u001b[0m                        Traceback (most recent call last)",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:793\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m    792\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 793\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    794\u001b[0m \u001b[43m    \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    795\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    796\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    797\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    798\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    799\u001b[0m \u001b[43m    \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    800\u001b[0m \u001b[43m    \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    801\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    802\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    803\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    804\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    805\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    806\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    808\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:537\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m    536\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 537\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    538\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connection.py:466\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    465\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 466\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    468\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:1395\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1394\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1395\u001b[0m     \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1396\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:325\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    324\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 325\u001b[0m     version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    326\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:294\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m line:\n\u001b[1;32m    292\u001b[0m     \u001b[38;5;66;03m# Presumably, the server closed the connection before\u001b[39;00m\n\u001b[1;32m    293\u001b[0m     \u001b[38;5;66;03m# sending a valid response.\u001b[39;00m\n\u001b[0;32m--> 294\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m RemoteDisconnected(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRemote end closed connection without\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    295\u001b[0m                              \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m response\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    296\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "\u001b[0;31mRemoteDisconnected\u001b[0m: Remote end closed connection without response",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mProtocolError\u001b[0m                             Traceback (most recent call last)",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/adapters.py:589\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m    588\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 589\u001b[0m     resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    590\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    591\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    592\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    593\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    594\u001b[0m \u001b[43m        \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    595\u001b[0m \u001b[43m        \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    596\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    597\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    598\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    599\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    600\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    601\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    603\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:847\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m    845\u001b[0m     new_e \u001b[38;5;241m=\u001b[39m ProtocolError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConnection aborted.\u001b[39m\u001b[38;5;124m\"\u001b[39m, new_e)\n\u001b[0;32m--> 847\u001b[0m retries \u001b[38;5;241m=\u001b[39m \u001b[43mretries\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mincrement\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    848\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnew_e\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_stacktrace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msys\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexc_info\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m    849\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    850\u001b[0m retries\u001b[38;5;241m.\u001b[39msleep()\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/util/retry.py:470\u001b[0m, in \u001b[0;36mRetry.increment\u001b[0;34m(self, method, url, response, error, _pool, _stacktrace)\u001b[0m\n\u001b[1;32m    469\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m read \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_method_retryable(method):\n\u001b[0;32m--> 470\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[43mreraise\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43merror\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_stacktrace\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    471\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m read \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/util/util.py:38\u001b[0m, in \u001b[0;36mreraise\u001b[0;34m(tp, value, tb)\u001b[0m\n\u001b[1;32m     37\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m value\u001b[38;5;241m.\u001b[39m__traceback__ \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m tb:\n\u001b[0;32m---> 38\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m value\u001b[38;5;241m.\u001b[39mwith_traceback(tb)\n\u001b[1;32m     39\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m value\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:793\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m    792\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 793\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    794\u001b[0m \u001b[43m    \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    795\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    796\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    797\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    798\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    799\u001b[0m \u001b[43m    \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    800\u001b[0m \u001b[43m    \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    801\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    802\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    803\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    804\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    805\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    806\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    808\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:537\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m    536\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 537\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    538\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connection.py:466\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    465\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 466\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    468\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:1395\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1394\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1395\u001b[0m     \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1396\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:325\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    324\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 325\u001b[0m     version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    326\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:294\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m line:\n\u001b[1;32m    292\u001b[0m     \u001b[38;5;66;03m# Presumably, the server closed the connection before\u001b[39;00m\n\u001b[1;32m    293\u001b[0m     \u001b[38;5;66;03m# sending a valid response.\u001b[39;00m\n\u001b[0;32m--> 294\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m RemoteDisconnected(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRemote end closed connection without\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    295\u001b[0m                              \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m response\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    296\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "\u001b[0;31mProtocolError\u001b[0m: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mConnectionError\u001b[0m                           Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mHi\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/GitHub/LLMstudio/llmstudio/llm/__init__.py:31\u001b[0m, in \u001b[0;36mLLM.chat\u001b[0;34m(self, input, is_stream, retries, **kwargs)\u001b[0m\n\u001b[1;32m     30\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mchat\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: \u001b[38;5;28mstr\u001b[39m, is_stream: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, retries: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 31\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43mrequests\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     32\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhttp://\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mENGINE_HOST\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m:\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mENGINE_PORT\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/api/engine/chat/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprovider\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     33\u001b[0m \u001b[43m        \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m     34\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     35\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msession_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msession_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     36\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_key\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     37\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_endpoint\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_endpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     38\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_version\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     39\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbase_url\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     40\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mchat_input\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     41\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mis_stream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mis_stream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     42\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mretries\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     43\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparameters\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m     44\u001b[0m \u001b[43m                \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\n\u001b[1;32m     45\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m     46\u001b[0m \u001b[43m                    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtemperature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     47\u001b[0m \u001b[43m                    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtop_p\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     48\u001b[0m \u001b[43m                    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_k\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_k\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtop_k\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     49\u001b[0m \u001b[43m                    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     50\u001b[0m \u001b[43m                    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_output_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     51\u001b[0m \u001b[43m                    \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     52\u001b[0m \u001b[43m                    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrequency_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrequency_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     53\u001b[0m \u001b[43m                    \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrequency_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     54\u001b[0m \u001b[43m                    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpresence_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpresence_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     55\u001b[0m \u001b[43m                    \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpresence_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     56\u001b[0m \u001b[43m                \u001b[49m\u001b[43m}\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     57\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\n\u001b[1;32m     58\u001b[0m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     59\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     60\u001b[0m \u001b[43m        \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     61\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_stream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     62\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mContent-Type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapplication/json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     63\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     65\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m response\u001b[38;5;241m.\u001b[39mok:\n\u001b[1;32m     66\u001b[0m         \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m    103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(url, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    104\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[1;32m    105\u001b[0m \n\u001b[1;32m    106\u001b[0m \u001b[38;5;124;03m    :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[38;5;124;03m    :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m    113\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 115\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpost\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m     55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m     56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m     57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m     58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m    584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    585\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m    586\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m    587\u001b[0m }\n\u001b[1;32m    588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m    700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m    702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m    706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n",
+      "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/adapters.py:604\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m    589\u001b[0m     resp \u001b[38;5;241m=\u001b[39m conn\u001b[38;5;241m.\u001b[39murlopen(\n\u001b[1;32m    590\u001b[0m         method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[1;32m    591\u001b[0m         url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    600\u001b[0m         chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[1;32m    601\u001b[0m     )\n\u001b[1;32m    603\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 604\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n\u001b[1;32m    606\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m MaxRetryError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    607\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e\u001b[38;5;241m.\u001b[39mreason, ConnectTimeoutError):\n\u001b[1;32m    608\u001b[0m         \u001b[38;5;66;03m# TODO: Remove this in 3.0.0: see #2811\u001b[39;00m\n",
+      "\u001b[0;31mConnectionError\u001b[0m: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))"
+     ]
+    }
+   ],
+   "source": [
+    "llm.chat('Hi')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llmstudiodev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/llmstudio/cli.py b/llmstudio/cli.py
index 2c6197ec..57944402 100644
--- a/llmstudio/cli.py
+++ b/llmstudio/cli.py
@@ -1,5 +1,6 @@
 import os
 import signal
+import threading
 
 import click
 
@@ -25,8 +26,9 @@ def server(ui):
 
     print("Servers are running. Press CTRL+C to stop.")
 
+    stop_event = threading.Event()
     try:
-        signal.pause()
+        stop_event.wait()  # Wait indefinitely until the event is set
     except KeyboardInterrupt:
         print("Shutting down servers...")
 
diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py
index 79afa2fe..35fa8ecc 100644
--- a/llmstudio/engine/providers/azure.py
+++ b/llmstudio/engine/providers/azure.py
@@ -153,6 +153,7 @@ def prepare_messages(self, request: AzureRequest):
     async def parse_response(
         self, response: AsyncGenerator, **kwargs
     ) -> AsyncGenerator[str, None]:
+
         if self.is_llama and (self.has_tools or self.has_functions):
             async for chunk in self.handle_tool_response(response, **kwargs):
                 yield chunk
diff --git a/llmstudio/llm/__init__.py b/llmstudio/llm/__init__.py
index 8f42086b..ac086dc2 100644
--- a/llmstudio/llm/__init__.py
+++ b/llmstudio/llm/__init__.py
@@ -14,6 +14,7 @@
 
 class LLM:
     def __init__(self, model_id: str, **kwargs):
+
         self.provider, self.model = model_id.split("/")
         self.session_id = kwargs.get("session_id")
         self.api_key = kwargs.get("api_key")
diff --git a/pyproject.toml b/pyproject.toml
index 943d61e6..09c90d39 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "llmstudio"
-version = "0.3.11"
+version = "0.3.12a10"
 description = "Prompt Perfection at Your Fingertips"
 authors = ["Cláudio Lemos <claudio@tensorops.ai>"]
 license = "MIT"