diff --git a/.github/workflows/upload-pypi-dev.yml b/.github/workflows/upload-pypi-dev.yml
index 8304eb18..ee3d783e 100644
--- a/.github/workflows/upload-pypi-dev.yml
+++ b/.github/workflows/upload-pypi-dev.yml
@@ -1,4 +1,4 @@
-name: Upload Python package to PyPI as dev pre-release
+name: Upload Python package to PyPI as dev release, build and push Docker image to hub.
on:
workflow_dispatch:
@@ -39,3 +39,48 @@ jobs:
git add pyproject.toml
git commit -m "[fix] bump prerelease version in pyproject.toml"
git push
+
+ # Wait for PyPI to update
+ - name: Wait for PyPI to update
+ run: |
+ VERSION=$(poetry version --short)
+ echo "Checking for llmstudio==$VERSION on PyPI..."
+ for i in {1..10}; do
+ if python -m pip install llmstudio==${VERSION} --dry-run >/dev/null 2>&1; then
+ echo "Package llmstudio==${VERSION} is available on PyPI."
+ break
+ else
+ echo "Package llmstudio==${VERSION} not available yet. Waiting 15 seconds..."
+ sleep 15
+ fi
+ if [ $i -eq 10 ]; then
+ echo "Package did not become available in time."
+ exit 1
+ fi
+ done
+
+ # Docker build and push section
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v2
+ with:
+ username: ${{ secrets.DOCKER_USERNAME }}
+ password: ${{ secrets.DOCKER_PASSWORD }}
+
+ - name: Extract version for tagging Docker image
+ id: get_version
+ run: |
+ echo "VERSION=$(poetry version --short)" >> $GITHUB_ENV
+
+ - name: Build and tag Docker image
+ run: |
+ docker build \
+ --build-arg LLMSTUDIO_VERSION=${{ env.VERSION }} \
+ -t tensoropsai/llmstudio:${{ env.VERSION }} \
+ .
+
+ - name: Push Docker image to Docker Hub
+ run: |
+ docker push tensoropsai/llmstudio:${{ env.VERSION }}
\ No newline at end of file
diff --git a/.github/workflows/upload-pypi.yml b/.github/workflows/upload-pypi.yml
index ec06e30b..62eedaf0 100644
--- a/.github/workflows/upload-pypi.yml
+++ b/.github/workflows/upload-pypi.yml
@@ -1,4 +1,4 @@
-name: Upload Python package to PyPI
+name: Upload Python package to PyPI and build/push Docker images
on:
push:
@@ -11,23 +11,77 @@ jobs:
deploy:
runs-on: ubuntu-latest
steps:
+ # Checkout the code
- name: Checkout code
uses: actions/checkout@v2
+ # Set up Python environment
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.x"
+ # Install Poetry
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
+ # Configure Poetry with PyPI token
- name: Configure Poetry
run: |
poetry config pypi-token.pypi ${{ secrets.PYPI_API_TOKEN }}
+ # Build and publish package to PyPI
- name: Build and publish to PyPI
run: |
poetry build
poetry publish
+
+ # Extract the new version number from pyproject.toml
+ - name: Extract version for tagging Docker image
+ run: |
+ echo "VERSION=$(poetry version --short)" >> $GITHUB_ENV
+
+ # Wait for the package to become available on PyPI
+ - name: Wait for PyPI to update
+ run: |
+ echo "Checking for llmstudio==${{ env.VERSION }} on PyPI..."
+ for i in {1..10}; do
+ if python -m pip install llmstudio==${{ env.VERSION }} --dry-run >/dev/null 2>&1; then
+ echo "Package llmstudio==${{ env.VERSION }} is available on PyPI."
+ break
+ else
+ echo "Package llmstudio==${{ env.VERSION }} not available yet. Waiting 15 seconds..."
+ sleep 15
+ fi
+ if [ $i -eq 10 ]; then
+ echo "Package did not become available in time."
+ exit 1
+ fi
+ done
+
+ # Set up Docker Buildx
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v2
+
+ # Log in to Docker Hub
+ - name: Log in to Docker Hub
+ uses: docker/login-action@v2
+ with:
+ username: ${{ secrets.DOCKER_USERNAME }}
+ password: ${{ secrets.DOCKER_PASSWORD }}
+
+ # Build and tag Docker images with both :latest and :[NEW_VERSION]
+ - name: Build and tag Docker images
+ run: |
+ docker build \
+ --build-arg LLMSTUDIO_VERSION=${{ env.VERSION }} \
+ -t tensoropsai/llmstudio:latest \
+ -t tensoropsai/llmstudio:${{ env.VERSION }} \
+ .
+
+ # Push both Docker images to Docker Hub
+ - name: Push Docker images to Docker Hub
+ run: |
+ docker push tensoropsai/llmstudio:${{ env.VERSION }}
+ docker push tensoropsai/llmstudio:latest
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..c432cf77
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,17 @@
+# docker/Dockerfile
+
+FROM python:3.11-slim
+ENV PYTHONUNBUFFERED=1
+
+# Install tools
+RUN apt-get clean && apt-get update
+
+# Install llmstudio
+ARG LLMSTUDIO_VERSION
+RUN pip install llmstudio==${LLMSTUDIO_VERSION}
+RUN pip install psycopg2-binary
+
+# Expose Ports
+EXPOSE 8001 8002
+
+CMD ["llmstudio", "server"]
diff --git a/docs/how-to/build-a-tool-agent.mdx b/docs/how-to/build-a-tool-agent.mdx
new file mode 100644
index 00000000..59279084
--- /dev/null
+++ b/docs/how-to/build-a-tool-agent.mdx
@@ -0,0 +1,83 @@
+This guide outlines how to build a tool calling agent using Langchain + LLMstudio.
+
+## 1. Set up your tools
+Start by defining the tools your agent is going to have access to.
+```python
+from langchain.tools import tool
+
+@tool
+def buy_ticket(destination: str):
+ """Use this to buy a ticket"""
+ return "Bought ticket number 270924"
+
+@tool
+def get_departure(ticket_number: str):
+ """Use this to fetch the departure time of a train"""
+ return "8:25 AM"
+```
+
+## 2. Setup your .env
+Create a `.env` file on the root of your project with the the credentials for the providers you want to use.
+
+
+
+ ```
+ OPENAI_API_KEY="YOUR_API_KEY"
+ ```
+
+
+ ```
+ GOOGLE_API_KEY="YOUR_API_KEY"
+ ```
+
+
+ ```
+ AZURE_BASE_URL="YOUR_MODEL_ENDPOINT"
+ AZURE_API_KEY="YOUR_API_KEY"
+ ```
+
+
+
+## 3. Set up your model using LLMstudio
+Use LLMstudio to choose the provider and model you want to use.
+
+
+ ```python
+ model = ChatLLMstudio(model_id='openai/gpt-4o')
+ ```
+
+
+ ```python
+ model = ChatLLMstudio(model_id='vertexai/gemini-1.5-flash')
+ ```
+
+
+ ```python
+ model = ChatLLMstudio(model_id='azure/Meta-Llama-3.1-70B-Instruct')
+ ```
+
+
+
+## 4. Build the agent
+Set up your agent and agent executor using Langchain.
+
+```python
+from langchain import hub
+from langchain.agents import AgentExecutor, create_openai_tools_agent
+
+prompt = hub.pull("hwchase17/openai-tools-agent")
+agent = create_openai_tools_agent(model, tools, prompt)
+agent_executor = AgentExecutor(agent=agent, tools=tools)
+
+input = "Can you buy me a ticket to madrid?"
+
+# Using with chat history
+agent_executor.invoke(
+ {
+ "input": input,
+ }
+)
+```
+
+
+
diff --git a/docs/how-to/deploy-on-gcp/deploy-on-google-cloud-platform.mdx b/docs/how-to/deploy-on-gcp/deploy-on-google-cloud-platform.mdx
new file mode 100644
index 00000000..6dbf80f2
--- /dev/null
+++ b/docs/how-to/deploy-on-gcp/deploy-on-google-cloud-platform.mdx
@@ -0,0 +1,157 @@
+Learn how to deploy LLMstudio as a containerized application on Google Kubernetes Engine and make calls from a local repository.
+
+
+## Prerequisites
+To follow this guide you need to have the following set-up:
+
+- A **project** on google cloud platform.
+- **Kubernetes Engine** API enabled on your project.
+- **Kubernetes Engine Admin** role for the user performing the guide.
+
+## Deploy LLMstudio
+
+This example demonstrates a public deployment. For a private service accessible only within your enterprise infrastructure, deploy it within your own Virtual Private Cloud (VPC).
+
+
+ Begin by navigating to the Kubernetes Engine page.
+
+
+ Go to **Workloads** and **Create a new Deployment**.
+
+
+
+
+
+ Rename your project. We will call the one in this guide **llmstudio-on-gcp**.
+
+
+
+
+
+ Choose between **creating a new cluster** or **using an existing cluster**.
+ For this guide, we will create a new cluster and use the default region.
+
+
+
+
+
+ Once done done with the **Deployment configuration**, proceed to **Container details**.
+
+
+ In the new container section, select **Existing container image**.
+
+
+ Copy the path to LLMstudio's image available on Docker Hub.
+ ```bash Image Path
+ tensoropsai/llmstudio:latest
+ ```
+ Set it as the **Image path** to your container.
+
+
+
+
+
+ Configure the following mandatory environment variables:
+| Environment Variable | Value |
+|----------------------------|-----------|
+| `LLMSTUDIO_ENGINE_HOST` | 0.0.0.0 |
+| `LLMSTUDIO_ENGINE_PORT` | 8001 |
+| `LLMSTUDIO_TRACKING_HOST` | 0.0.0.0 |
+| `LLMSTUDIO_TRACKING_PORT` | 8002 |
+
+Additionally, set the `GOOGLE_API_KEY` environment variable to enable calls to Google's Gemini models.
+Refer to **SDK/LLM/Providers** for instructions on setting up other providers.
+
+
+
+
+
+
+
+ After configuring your container, proceed to **Expose (Optional)**.
+
+
+ Select **Expose deployment as a new service** and leave the first item as is.
+
+
+
+
+
+ Add two other items, and expose the ports defined in the **Set Environment Variables** step.
+
+
+
+
+
+
+ After setting up and exposing the ports, press **Deploy**.
+ You have successfully deployed **LLMstudio on Google Cloud Platform**!
+
+
+
+
+## Make a Call
+Now let's make a call to our LLMstudio instance on GCP!
+
+
+
+
+
+ Setup a simple project with this two files:
+ 1. `simple-call.ipynb`
+ 2. `.env`
+
+
+
+
+
+
+ Go to your newly deployed **Workload**, scroll to the **Exposing services** section, and take note of the Host of your endpoint.
+
+
+
+
+ Create your `.env` file with the following:
+
+ ```env .env
+ LLMSTUDIO_ENGINE_HOST = "YOUR_HOST"
+ LLMSTUDIO_ENGINE_PORT = "8001"
+ LLMSTUDIO_TRACKING_HOST = "YOUR_TRACKING_PORT"
+ LLMSTUDIO_TRACKING_PORT = "8002"
+ ```
+
+ You are done seting up you **.env** file!
+
+
+
+ Start by importing llmstudio:
+ ```python 1st cell
+ from llmstudio import LLM
+ ```
+
+ Set up your LLM. We will be using `gemini-1.5-flash` for this guide.
+ ```python 2nd cell
+ llm = LLM('vertexai/gemini-1.5-flash')
+ ```
+
+ Chat with your model.
+ ```python 3rd cell
+ llm.chat('Hello!')
+ print(response.chat_output)
+ ```
+
+
+
+
+
+
+ You are done calling llmstudio on GCP!
+
+
+
+
+
+
+
+
+
diff --git a/docs/how-to/deploy-on-gcp/step-2.png b/docs/how-to/deploy-on-gcp/step-2.png
new file mode 100644
index 00000000..4d2bb3ee
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-2.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-3.png b/docs/how-to/deploy-on-gcp/step-3.png
new file mode 100644
index 00000000..adb4eac3
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-3.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-4.png b/docs/how-to/deploy-on-gcp/step-4.png
new file mode 100644
index 00000000..cc18c845
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-4.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-6.png b/docs/how-to/deploy-on-gcp/step-6.png
new file mode 100644
index 00000000..6ac8b8b1
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-6.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-7-1.png b/docs/how-to/deploy-on-gcp/step-7-1.png
new file mode 100644
index 00000000..e3e523a3
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-7-1.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-7.png b/docs/how-to/deploy-on-gcp/step-7.png
new file mode 100644
index 00000000..b8325854
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-7.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-9-1.png b/docs/how-to/deploy-on-gcp/step-9-1.png
new file mode 100644
index 00000000..328ca889
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-9-1.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-9-2.png b/docs/how-to/deploy-on-gcp/step-9-2.png
new file mode 100644
index 00000000..64d9ddb6
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-9-2.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-9.png b/docs/how-to/deploy-on-gcp/step-9.png
new file mode 100644
index 00000000..1953805a
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-9.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-env.png b/docs/how-to/deploy-on-gcp/step-env.png
new file mode 100644
index 00000000..392a49ad
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-env.png differ
diff --git a/docs/how-to/deploy-on-gcp/step-llmstudio-call.png b/docs/how-to/deploy-on-gcp/step-llmstudio-call.png
new file mode 100644
index 00000000..e05609fb
Binary files /dev/null and b/docs/how-to/deploy-on-gcp/step-llmstudio-call.png differ
diff --git a/docs/mint.json b/docs/mint.json
index 284081e2..0645334f 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -43,6 +43,32 @@
"group": "Get Started",
"pages": ["quickstart", "support"]
},
+ {
+ "group": "How to",
+ "pages": [
+ "how-to/build-a-tool-agent",
+ "how-to/deploy-on-gcp/deploy-on-google-cloud-platform"
+ ]
+ },
+ {
+ "group": "SDK",
+ "pages": [
+ {
+ "group": "LLM",
+ "pages":[{
+ "group": "Providers",
+ "pages":[
+ "sdk/llm/providers/openai",
+ "sdk/llm/providers/vertexai",
+ "sdk/llm/providers/anthropic",
+ "sdk/llm/providers/ollama",
+ "sdk/llm/providers/azure"
+ ]
+ },
+ "sdk/llm/chat"]
+ }
+ ]
+ },
{
"group": "Endpoint Examples",
"pages": [
@@ -68,3 +94,4 @@
"baseUrl": "http://localhost:8000"
}
}
+
\ No newline at end of file
diff --git a/docs/sdk/llm/chat.mdx b/docs/sdk/llm/chat.mdx
new file mode 100644
index 00000000..cc8852f4
--- /dev/null
+++ b/docs/sdk/llm/chat.mdx
@@ -0,0 +1,86 @@
+Make chat calls using your LLM.
+
+## Parameters
+The llm.chat method can have the following parameters.
+| Parameter | Type | Description |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `input ` | str | The input message to send to the chat model. |
+| `is_stream` | bool | The temperature parameter for the model. |
+| `**kwargs` | dict | Additional parameters to pass to the chat model. |
+
+Refer to your provider-specific documentation for additional kwargs you can use.
+
+## Returns
+| Output | Type | Description |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `ChatCompletion` | object | A chat completion object in the OpenAI format + metrics computed by LLMstudio.|
+
+
+## Usage
+Here's how to use `.chat()` to make calls to your LLM.
+
+
+
+ Start by importing LLM.
+ ```python
+ from llmstudio import LLM
+ ```
+
+
+ Set up an LLM from your desired provider.
+ ```python
+ llm = LLM('openai/gpt-4o')
+ ```
+
+
+ Create your message. Your message can be a simple `string` or a message in the `OpenAI format`.
+
+
+
+ ```python
+ message = "Hello, how are you today?"
+ ```
+
+
+ ```python
+ message = [
+ {"role": "system", "content": "You are a helpfull assistant."},
+ {"role": "user", "content": "Hello, how are you today?"}
+ ]
+ ```
+
+
+
+
+
+
+
+
+ Get your response.
+ ```python
+ response = llm.chat(message)
+ ```
+
+ Vizualize your response.
+ ```python
+ print(response)
+ ```
+
+
+ Get your response.
+ ```python
+ response = llm.chat(message, is_stream = True)
+ ```
+
+ Vizualize your response.
+ ```python
+ for chunk in response:
+ print(chunk)
+ ```
+
+
+
+ You are done chating with your **LLMstudio LLM**!
+
+
+
\ No newline at end of file
diff --git a/docs/sdk/llm/providers/anthropic.mdx b/docs/sdk/llm/providers/anthropic.mdx
new file mode 100644
index 00000000..eeff4915
--- /dev/null
+++ b/docs/sdk/llm/providers/anthropic.mdx
@@ -0,0 +1,101 @@
+Interact with your Anthropic models using LLMstudios LLM.
+
+## Supported models
+1. `claude-3-opus-20240229`
+2. `claude-3-sonnet-2024022`
+3. `claude-3-haiku-20240307`
+4. `claude-2.1`
+5. `claude-2`
+6. `claude-instant-1.2`
+
+## Parameters
+An Anthropic LLM interface can have the following parameters:
+| Parameter | Type | Description |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `api_key` | str | The API key for authentication. |
+| `temperature` | float | The temperature parameter for the model. |
+| `top_p` | float | The top-p parameter for the model. |
+| `max_tokens` | int | The maximum number of tokens for the model's output. |
+| `top_k` | int | The top-k parameter for the model. |
+
+
+## Usage
+Here is how you setup an interface to interact with your Anthropic models.
+
+
+
+
+
+ Create a `.env` file with you `ANTHROPIC_API_KEY`
+
+ Make sure you call your environment variable ANTHROPIC_API_KEY
+ ```bash
+ ANTHROPIC_API_KEY="YOUR-KEY"
+ ```
+
+
+ In your python code, import LLM from llmstudio.
+ ```python
+ from llmstudio import LLM
+ ```
+
+
+ Create your **llm** instance.
+ ```python
+ llm = LLM('anthropic/{model}')
+ ```
+
+
+ **Optional:** You can add your parameters as follows:
+ ```python
+ llm = LLM('anthropic/model',
+ temperature= ...,
+ max_tokens= ...,
+ top_p= ...,
+ frequency_penalty= ...,
+ presence_penalty= ...)
+ ```
+ You are done setting up your **Anthropic LLM**!
+
+
+
+
+
+
+ In your python code, import LLM from llmstudio.
+ ```python
+ from llmstudio import LLM
+ ```
+
+
+ Create your **llm** instance.
+ ```python
+ llm = LLM('anthropic/{model}',api_key="YOUR_API_KEY")
+ ```
+
+
+ **Optional:** You can add your parameters as follows:
+ ```python
+ llm = LLM('anthropic/model',
+ temperature= ...,
+ max_tokens= ...,
+ top_p= ...,
+ frequency_penalty= ...,
+ presence_penalty= ...)
+ ```
+ You are done setting up your **Anthropic LLM**!
+
+
+
+
+
+
+## What's next?
+
+
+ Learn how to send messeges and recieve responses next!
+
+
+ Learn how to build a tool calling agent using llmstudio.
+
+
diff --git a/docs/sdk/llm/providers/azure.mdx b/docs/sdk/llm/providers/azure.mdx
new file mode 100644
index 00000000..161a2491
--- /dev/null
+++ b/docs/sdk/llm/providers/azure.mdx
@@ -0,0 +1,126 @@
+Interact with your Azure models using LLM.
+
+## Parameters
+An Azure LLM interface can have the following parameters:
+| Parameter | Type | Description |
+|---------------------|--------|-----------------------------------------------------------------------------|
+| `temperature` | float | The temperature parameter for the model. |
+| `max_tokens` | int | The maximum number of tokens to generate. |
+| `top_p` | float | The top-p parameter for the model. |
+| `frequency_penalty` | float | The frequency penalty parameter for the model. |
+| `presence_penalty` | float | The presence penalty parameter for the model. |
+
+
+## Usage
+Here is how you setup an interface to interact with your Azure models.
+
+
+
+
+ Create a `config.yaml` file in the same directory as your code.
+ 1. π src
+ 1. π PythonCode.py
+ 2. π PyNotebook.ipynb
+ 3. π **config.yaml**
+
+
+ Define your Azure OpenAI provider and models inside the `config.yaml` file.
+ ```yaml
+ providers:
+ azure:
+ id: azure
+ name: Azure
+ chat: true
+ embed: true
+ models:
+ YOUR_MODEL: <- Replace with your model name
+ mode: chat
+ max_tokens: ...
+ input_token_cost: ...
+ output_token_cost: ...
+ ```
+ If you are not sure, you can leave `max_tokens`, `input_tokens` and the other parameters as **0**
+
+
+ Create your **llm** instance.
+ ```python
+ llm = LLM('azure/YOUR_MODEL',
+ api_key = YOUR_API_KEY,
+ api_endpoint = YOUR_ENDPOINT,
+ api_version = YOUR_API_VERSION)
+ ```
+
+
+ **Optional:** You can add your parameters as follows:
+ ```python
+ llm = LLM('azure/model',
+ temperature= ...,
+ max_tokens= ...,
+ top_p= ...,
+ frequency_penalty= ...,
+ presence_penalty= ...)
+ ```
+ You are done setting up your **Azure LLM**!
+
+
+
+
+
+
+ Create a `config.yaml` file in the same directory as your code.
+ 1. π src
+ 1. π PythonCode.py
+ 2. π PyNotebook.ipynb
+ 3. π **config.yaml**
+
+
+ Define your Azure provider and models inside the `config.yaml` file.
+ ```yaml
+ providers:
+ azure:
+ id: azure
+ name: Azure
+ chat: true
+ embed: true
+ models:
+ YOUR_MODEL: <- Replace with your model name
+ mode: chat
+ max_tokens: ...
+ input_token_cost: ...
+ output_token_cost: ...
+ ```
+ If you are not sure, you can leave `max_tokens`, `input_tokens` and the other parameters as **0**
+
+
+ Create your **llm** instance.
+ ```python
+ llm = LLM('azure/YOUR_MODEL',
+ api_key = YOUR_API_KEY,
+ base_url = YOUR_ENDPOINT)
+ ```
+
+
+ **Optional:** You can add your parameters as follows:
+ ```python
+ llm = LLM('azure/model',
+ temperature= ...,
+ max_tokens= ...,
+ top_p= ...,
+ frequency_penalty= ...,
+ presence_penalty= ...)
+ ```
+ You are done setting up your **Azure LLM**!
+
+
+
+
+
+## What's next?
+
+
+ Learn how to send messeges and recieve responses next!
+
+
+ Learn how to build a tool calling agent using llmstudio.
+
+
diff --git a/docs/sdk/llm/providers/ollama.mdx b/docs/sdk/llm/providers/ollama.mdx
new file mode 100644
index 00000000..eae25f25
--- /dev/null
+++ b/docs/sdk/llm/providers/ollama.mdx
@@ -0,0 +1,70 @@
+Interact with your Ollama models using LLM.
+
+## Parameters
+An Ollama LLM interface can have the following parameters:
+| Parameter | Type | Description |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `temperature` | float | The temperature parameter for the model. |
+| `top_p` | float | The top-p parameter for the model. |
+| `num_predict` | int | The number of tokens to predict. |
+| `top_k` | int | The top-k parameter for the model. |
+
+
+## Usage
+Here is how you setup an interface to interact with your Ollama models.
+
+
+
+ Create a `config.yaml` in the same directory your code is in.
+ 1. src
+ 1. yourPythonCode.py
+ 2. yourPyNotebook.py
+ 3. **config.yaml**
+
+
+ Define your Ollama provider and models inside the `config.yaml` file.
+ ```yaml
+ providers:
+ ollama:
+ id: ollama
+ name: Ollama
+ chat: true
+ embed: true
+ keys:
+ models:
+ YOUR_MODEL: <- Replace with your model name
+ mode: chat
+ max_tokens: ...
+ input_token_cost: ...
+ output_token_cost: ...
+ ```
+ If you are not sure about any of these parameters, you can just leave them as **0**
+
+
+ Create your **llm** instance.
+ ```python
+ llm = LLM('ollama/{YOUR_MODEL}')
+ ```
+
+
+ **Optional:** You can add your parameters as follows:
+ ```python
+ llm = LLM('ollama/model',
+ temperature= ...,
+ num_predict= ...,
+ top_p= ...,
+ top_k= ...,)
+ ```
+ You are done setting up your **Ollama LLM**!
+
+
+
+## What's next?
+
+
+ Learn how to send messeges and recieve responses next!
+
+
+ Learn how to build a tool calling agent using llmstudio.
+
+
diff --git a/docs/sdk/llm/providers/openai.mdx b/docs/sdk/llm/providers/openai.mdx
new file mode 100644
index 00000000..4c6d3528
--- /dev/null
+++ b/docs/sdk/llm/providers/openai.mdx
@@ -0,0 +1,100 @@
+Interact with your OpenAI models using LLM.
+
+## Supported models
+1. `gpt-4o`
+2. `gpt-4-turbo`
+3. `gpt-4`
+4. `gpt-3.5-turbo`
+5. `gpt-3.5-turbo-instruct`
+
+## Parameters
+An OpenAI LLM interface can have the following parameters:
+| Parameter | Type | Description |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `api_key` | str | The API key for authentication. |
+| `temperature` | float | The temperature parameter for the model. |
+| `top_p` | float | The top-p parameter for the model. |
+| `max_tokens` | int | The maximum number of tokens for the model's output. |
+| `frequency_penalty` | float | The frequency penalty parameter for the model. |
+| `presence_penalty` | float | The presence penalty parameter for the model. |
+
+
+## Usage
+Here is how you setup an interface to interact with your OpenAI models.
+
+
+
+
+
+ Create a `.env` file with you `OPENAI_API_KEY`
+
+ Make sure you call your environment variable OPENAI_API_KEY
+ ```bash
+ OPENAI_API_KEY="YOUR-KEY"
+ ```
+
+
+ In your python code, import LLM from llmstudio.
+ ```python
+ from llmstudio import LLM
+ ```
+
+
+ Create your **llm** instance.
+ ```python
+ llm = LLM('openai/{model}')
+ ```
+
+
+ **Optional:** You can add your parameters as follows:
+ ```python
+ llm = LLM('openai/model',
+ temperature= ...,
+ max_tokens= ...,
+ top_p= ...,
+ frequency_penalty= ...,
+ presence_penalty= ...)
+ ```
+ You are done setting up your **OpenAI LLM**!
+
+
+
+
+
+
+ In your python code, import LLM from llmstudio.
+ ```python
+ from llmstudio import LLM
+ ```
+
+
+ Create your **llm** instance.
+ ```python
+ llm = LLM('openai/{model}',api_key="YOUR_API_KEY")
+ ```
+
+
+ **Optional:** You can add your parameters as follows:
+ ```python
+ llm = LLM('openai/model',
+ temperature= ...,
+ max_tokens= ...,
+ top_p= ...,
+ frequency_penalty= ...,
+ presence_penalty= ...)
+ ```
+ You are done setting up your **OpenAI LLM**!
+
+
+
+
+
+## What's next?
+
+
+ Learn how to send messeges and recieve responses next!
+
+
+ Learn how to build a tool calling agent using llmstudio.
+
+
diff --git a/docs/sdk/llm/providers/vertexai.mdx b/docs/sdk/llm/providers/vertexai.mdx
new file mode 100644
index 00000000..1f5a1c81
--- /dev/null
+++ b/docs/sdk/llm/providers/vertexai.mdx
@@ -0,0 +1,98 @@
+Interact with your VertexAI models using LLM.
+
+## Supported models
+1. `gemini-1.5-flash`
+2. `gemini-1.5-pro`
+3. `gemini-1.0-pro`
+
+## Parameters
+A VertexAI LLM interface can have the following parameters:
+| Parameter | Type | Description |
+|-------------------|--------|-----------------------------------------------------------------------------|
+| `api_key` | str | The API key for authentication. |
+| `temperature` | float | The temperature parameter for the model. |
+| `top_p` | float | The top-p parameter for the model. |
+| `max_tokens` | int | The maximum number of tokens for the model's output. |
+| `frequency_penalty` | float | The frequency penalty parameter for the model. |
+| `presence_penalty` | float | The presence penalty parameter for the model. |
+
+
+## Usage
+Here is how you setup an interface to interact with your VertexAI models.
+
+
+
+
+
+ Create a `.env` file with you `GOOGLE_API_KEY`
+
+ Make sure you call your environment variable GOOGLE_API_KEY
+ ```bash
+ GOOGLE_API_KEY="YOUR-KEY"
+ ```
+
+
+ In your python code, import LLM from llmstudio.
+ ```python
+ from llmstudio import LLM
+ ```
+
+
+ Create your **llm** instance.
+ ```python
+ llm = LLM('vertexai/{model}')
+ ```
+
+
+ **Optional:** You can add your parameters as follows:
+ ```python
+ llm = LLM('vertexai/model',
+ temperature= ...,
+ max_tokens= ...,
+ top_p= ...,
+ frequency_penalty= ...,
+ presence_penalty= ...)
+ ```
+ You are done setting up your **VertexAI LLM**!
+
+
+
+
+
+
+ In your python code, import LLM from llmstudio.
+ ```python
+ from llmstudio import LLM
+ ```
+
+
+ Create your **llm** instance.
+ ```python
+ llm = LLM('vertexai/{model}',api_key="YOUR_API_KEY")
+ ```
+
+
+ **Optional:** You can add your parameters as follows:
+ ```python
+ llm = LLM('vertexai/model',
+ temperature= ...,
+ max_tokens= ...,
+ top_p= ...,
+ frequency_penalty= ...,
+ presence_penalty= ...)
+ ```
+ You are done setting up your **VertexAI LLM**!
+
+
+
+
+
+## What's next?
+
+
+ Learn how to send messeges and recieve responses next!
+
+
+ Learn how to build a tool calling agent using llmstudio.
+
+
\ No newline at end of file
diff --git a/examples/06_gcloud_guide.ipynb b/examples/06_gcloud_guide.ipynb
new file mode 100644
index 00000000..fa29d234
--- /dev/null
+++ b/examples/06_gcloud_guide.ipynb
@@ -0,0 +1,108 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Engine server already running on 0.0.0.0:8001\n",
+ "Tracking server already running on 0.0.0.0:8002\n"
+ ]
+ }
+ ],
+ "source": [
+ "from llmstudio import LLM"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "llm = LLM('openai/gpt-3.5-turbo')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "ConnectionError",
+ "evalue": "('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mRemoteDisconnected\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:793\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 792\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 793\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 803\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 804\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 806\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 808\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:537\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 537\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connection.py:466\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 465\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 466\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:1395\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1394\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1395\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1396\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:325\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 325\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:294\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m line:\n\u001b[1;32m 292\u001b[0m \u001b[38;5;66;03m# Presumably, the server closed the connection before\u001b[39;00m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;66;03m# sending a valid response.\u001b[39;00m\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RemoteDisconnected(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRemote end closed connection without\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m response\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 296\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+ "\u001b[0;31mRemoteDisconnected\u001b[0m: Remote end closed connection without response",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[0;31mProtocolError\u001b[0m Traceback (most recent call last)",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/adapters.py:589\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 596\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 597\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 598\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 599\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 600\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 601\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:847\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 845\u001b[0m new_e \u001b[38;5;241m=\u001b[39m ProtocolError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConnection aborted.\u001b[39m\u001b[38;5;124m\"\u001b[39m, new_e)\n\u001b[0;32m--> 847\u001b[0m retries \u001b[38;5;241m=\u001b[39m \u001b[43mretries\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mincrement\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 848\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnew_e\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_stacktrace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msys\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexc_info\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m 849\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 850\u001b[0m retries\u001b[38;5;241m.\u001b[39msleep()\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/util/retry.py:470\u001b[0m, in \u001b[0;36mRetry.increment\u001b[0;34m(self, method, url, response, error, _pool, _stacktrace)\u001b[0m\n\u001b[1;32m 469\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m read \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_is_method_retryable(method):\n\u001b[0;32m--> 470\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[43mreraise\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43merror\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_stacktrace\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 471\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m read \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/util/util.py:38\u001b[0m, in \u001b[0;36mreraise\u001b[0;34m(tp, value, tb)\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m value\u001b[38;5;241m.\u001b[39m__traceback__ \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m tb:\n\u001b[0;32m---> 38\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m value\u001b[38;5;241m.\u001b[39mwith_traceback(tb)\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m value\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:793\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 792\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 793\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 803\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 804\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 806\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 808\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connectionpool.py:537\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 537\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/urllib3/connection.py:466\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 465\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 466\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:1395\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1394\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1395\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1396\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:325\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 325\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/http/client.py:294\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m line:\n\u001b[1;32m 292\u001b[0m \u001b[38;5;66;03m# Presumably, the server closed the connection before\u001b[39;00m\n\u001b[1;32m 293\u001b[0m \u001b[38;5;66;03m# sending a valid response.\u001b[39;00m\n\u001b[0;32m--> 294\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RemoteDisconnected(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRemote end closed connection without\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m response\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 296\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+ "\u001b[0;31mProtocolError\u001b[0m: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[0;31mConnectionError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mHi\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m~/Documents/GitHub/LLMstudio/llmstudio/llm/__init__.py:31\u001b[0m, in \u001b[0;36mLLM.chat\u001b[0;34m(self, input, is_stream, retries, **kwargs)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mchat\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: \u001b[38;5;28mstr\u001b[39m, is_stream: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, retries: \u001b[38;5;28mint\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 31\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mrequests\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 32\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhttp://\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mENGINE_HOST\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m:\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mENGINE_PORT\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/api/engine/chat/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprovider\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 33\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 34\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 35\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msession_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msession_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 36\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_key\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 37\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_endpoint\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_endpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 38\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_version\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapi_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 39\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbase_url\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mchat_input\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 41\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mis_stream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mis_stream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 42\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mretries\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mparameters\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 44\u001b[0m \u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\n\u001b[1;32m 45\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 46\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtemperature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 47\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtop_p\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 48\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_k\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_k\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtop_k\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 49\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 50\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_output_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 51\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrequency_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrequency_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrequency_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 54\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpresence_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpresence_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpresence_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 56\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\n\u001b[1;32m 58\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 59\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 60\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_stream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 62\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mContent-Type\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapplication/json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m response\u001b[38;5;241m.\u001b[39mok:\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(url, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 104\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[1;32m 105\u001b[0m \n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124;03m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpost\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n",
+ "File \u001b[0;32m/opt/anaconda3/envs/llmstudiodev/lib/python3.11/site-packages/requests/adapters.py:604\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m conn\u001b[38;5;241m.\u001b[39murlopen(\n\u001b[1;32m 590\u001b[0m method\u001b[38;5;241m=\u001b[39mrequest\u001b[38;5;241m.\u001b[39mmethod,\n\u001b[1;32m 591\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 600\u001b[0m chunked\u001b[38;5;241m=\u001b[39mchunked,\n\u001b[1;32m 601\u001b[0m )\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 604\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n\u001b[1;32m 606\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m MaxRetryError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 607\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e\u001b[38;5;241m.\u001b[39mreason, ConnectTimeoutError):\n\u001b[1;32m 608\u001b[0m \u001b[38;5;66;03m# TODO: Remove this in 3.0.0: see #2811\u001b[39;00m\n",
+ "\u001b[0;31mConnectionError\u001b[0m: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))"
+ ]
+ }
+ ],
+ "source": [
+ "llm.chat('Hi')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "llmstudiodev",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/llmstudio/cli.py b/llmstudio/cli.py
index 2c6197ec..57944402 100644
--- a/llmstudio/cli.py
+++ b/llmstudio/cli.py
@@ -1,5 +1,6 @@
import os
import signal
+import threading
import click
@@ -25,8 +26,9 @@ def server(ui):
print("Servers are running. Press CTRL+C to stop.")
+ stop_event = threading.Event()
try:
- signal.pause()
+ stop_event.wait() # Wait indefinitely until the event is set
except KeyboardInterrupt:
print("Shutting down servers...")
diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py
index 79afa2fe..35fa8ecc 100644
--- a/llmstudio/engine/providers/azure.py
+++ b/llmstudio/engine/providers/azure.py
@@ -153,6 +153,7 @@ def prepare_messages(self, request: AzureRequest):
async def parse_response(
self, response: AsyncGenerator, **kwargs
) -> AsyncGenerator[str, None]:
+
if self.is_llama and (self.has_tools or self.has_functions):
async for chunk in self.handle_tool_response(response, **kwargs):
yield chunk
diff --git a/llmstudio/llm/__init__.py b/llmstudio/llm/__init__.py
index 8f42086b..ac086dc2 100644
--- a/llmstudio/llm/__init__.py
+++ b/llmstudio/llm/__init__.py
@@ -14,6 +14,7 @@
class LLM:
def __init__(self, model_id: str, **kwargs):
+
self.provider, self.model = model_id.split("/")
self.session_id = kwargs.get("session_id")
self.api_key = kwargs.get("api_key")
diff --git a/pyproject.toml b/pyproject.toml
index 943d61e6..09c90d39 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "llmstudio"
-version = "0.3.11"
+version = "0.3.12a10"
description = "Prompt Perfection at Your Fingertips"
authors = ["ClΓ‘udio Lemos "]
license = "MIT"