diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 31ae8953c..efcdb636f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -29,6 +29,10 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - name: Install Ollama + run: | + curl -fsSL https://ollama.com/install.sh | sh + ollama --version - name: Set up test environment run: | python -m pip install --upgrade pip diff --git a/docs/reference/models/ollama.md b/docs/reference/models/ollama.md new file mode 100644 index 000000000..0d2961832 --- /dev/null +++ b/docs/reference/models/ollama.md @@ -0,0 +1,72 @@ +# Ollama + +!!! Installation + + To be able to use Ollama in Outlines, you must install both Ollama and the `ollama` python sdk. + + - To download Ollama: https://ollama.com/download + - To install the ollama python sdk: `pip install ollama` + +## Ollama models + +You must provide a model name when instantiating the `outlines.models.Ollama` class. This model must be available on your system. +```python +from outlines.models import Ollama + +model = Ollama("tinyllama") +``` + +To download a new model from the Ollama model hub, you can use the following command (it will return an `Ollama` instance): +```python +from outlines.models import Ollama + +Ollama.from_pretrained("llama3.1:8b") +``` + +You can find the list of available models on the [Ollama library](https://ollama.com/library). + +## Generate text + +As with other models, you can either first create a `Generator` object and then call it +```python +from outlines.models import Ollama +from outlines.generate import Generator + +model = Ollama("tinyllama") +generator = Generator(model) +answer = generator("Write a sentence about a cat.") +``` +or directly call the model +```python +from outlines.models import Ollama + +model = Ollama("tinyllama") +answer = model("Write a sentence about a cat.") +``` + +The input of the generation must be a string. Batch generation is not supported. +The only output type supported is `Json`. +```python +from outlines.models import Ollama +from outlines.types import Json +from pydantic import BaseModel + +class Character(BaseModel): + name: str + +model = Ollama("tinyllama") +answer = model("Create a character.", output_type=Json(Character)) +``` + +You can also stream the tokens: +```python +from outlines.models import Ollama + +model = Ollama("tinyllama") +tokens = model.stream("Write a sentence about a cat.") +``` + +## Optional parameters + +You can provide the same optional parameters you would pass to the `ollama` sdk's client both during the initialization of the `Ollama` class and when generating text. +Consult the [ollama python sdk Github repository](https://github.com/ollama/ollama-python) for the full list of parameters. diff --git a/outlines/models/__init__.py b/outlines/models/__init__.py index 320c20564..5816bbcb1 100644 --- a/outlines/models/__init__.py +++ b/outlines/models/__init__.py @@ -15,12 +15,15 @@ from .gemini import Gemini from .llamacpp import LlamaCpp from .mlxlm import MLXLM, mlxlm +from .ollama import Ollama from .openai import AzureOpenAI, OpenAI from .transformers import Transformers, TransformerTokenizer, mamba, transformers from .transformers_vision import TransformersVision, transformers_vision from .vllm import VLLM, vllm -LogitsGenerator = Union[Transformers, LlamaCpp, OpenAI, ExLlamaV2Model, MLXLM, VLLM] +LogitsGenerator = Union[ + Transformers, LlamaCpp, OpenAI, ExLlamaV2Model, MLXLM, VLLM, Ollama +] LocalModel = LlamaCpp -APIModel = Union[AzureOpenAI, OpenAI, Anthropic, Gemini, Dottxt] +APIModel = Union[AzureOpenAI, OpenAI, Anthropic, Gemini, Ollama, Dottxt] diff --git a/outlines/models/ollama.py b/outlines/models/ollama.py new file mode 100644 index 000000000..c97460200 --- /dev/null +++ b/outlines/models/ollama.py @@ -0,0 +1,95 @@ +from functools import singledispatchmethod +from types import NoneType +from typing import Iterator + +from outlines.models.base import Model, ModelTypeAdapter +from outlines.types import Json + + +class OllamaTypeAdapter(ModelTypeAdapter): + """Type adapter for the Ollama model.""" + + @singledispatchmethod + def format_input(self, model_input): + """Generate the prompt argument to pass to the model. + + Argument + -------- + model_input + The input passed by the user. + + """ + raise NotImplementedError( + f"The input type {input} is not available. " + "Ollama does not support batch inference." + ) + + @format_input.register(str) + def format_str_input(self, model_input: str): + return model_input + + @singledispatchmethod + def format_output_type(self, output_type): + """Generate the `format` argument to pass to the model. + + Argument + -------- + output_type + The output type passed by the user. + """ + raise NotImplementedError( + f"The output type {input} is not available. " + "Ollama only supports structured output with `Json`." + ) + + @format_output_type.register(NoneType) + def format_none_output_type(self, output_type: None): + return "" + + @format_output_type.register(Json) + def format_json_output_type(self, output_type: Json): + return output_type.to_json_schema() + + +class Ollama(Model): + """Thin wrapper around the `ollama` client. + + This wrapper is used to convert the input and output types specified by the + users at a higher level to arguments to the `ollama` client. + + """ + + def __init__(self, model_name: str, *args, **kwargs): + from ollama import Client + + self.client = Client(*args, **kwargs) + self.model_name = model_name + self.type_adapter = OllamaTypeAdapter() + + @classmethod + def from_pretrained(cls, model_name: str, *args, **kwargs): + """Download the model weights from Ollama and create a `Ollama` instance.""" + from ollama import pull + + pull(model_name) + return cls(model_name, *args, **kwargs) + + def generate(self, model_input, output_type=None, **kwargs) -> str: + response = self.client.generate( + model=self.model_name, + prompt=self.type_adapter.format_input(model_input), + format=self.type_adapter.format_output_type(output_type), + **kwargs, + ) + return response.response + + def stream(self, model_input, output_type=None, **kwargs) -> Iterator[str]: + response = self.client.generate( + model=self.model_name, + prompt=self.type_adapter.format_input(model_input), + format=self.type_adapter.format_output_type(output_type), + stream=True, + **kwargs, + ) + for chunk in response: + yield chunk.response diff --git a/pyproject.toml b/pyproject.toml index d32fbb31f..d64f2867e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ gemini = ["google-generativeai"] llamacpp = ["llama-cpp-python", "transformers", "datasets", "numpy<2"] exllamav2 = ["exllamav2"] dottxt = ["dottxt"] +ollama = ["ollama"] test = [ "pre-commit", "pytest", @@ -78,7 +79,8 @@ test = [ "pillow", "exllamav2", "jax", - "dottxt" + "dottxt", + "ollama", ] serve = [ "vllm>=0.3.0", @@ -157,6 +159,7 @@ module = [ "airportsdata.*", "outlines_core.*", "dottxt.*", + "ollama.*", ] ignore_missing_imports = true diff --git a/tests/models/test_ollama.py b/tests/models/test_ollama.py new file mode 100644 index 000000000..ed05f3e21 --- /dev/null +++ b/tests/models/test_ollama.py @@ -0,0 +1,76 @@ +import json +from enum import Enum + +import pytest +from pydantic import BaseModel + +from outlines.models import Ollama +from outlines.types import Choice, Json + +MODEL_NAME = "tinyllama" + + +def test_pull_model(): + model = Ollama.from_pretrained(MODEL_NAME) + assert isinstance(model, Ollama) + + +def test_ollama_wrong_init_parameters(): + with pytest.raises(TypeError, match="got an unexpected"): + Ollama(MODEL_NAME, foo=10) + + +def test_wrong_inference_parameters(): + with pytest.raises(TypeError, match="got an unexpected"): + Ollama(MODEL_NAME).generate("Respond with one word. Not more.", None, foo=10) + + +def test_ollama_simple(): + result = Ollama(MODEL_NAME).generate("Respond with one word. Not more.", None) + assert isinstance(result, str) + + +def test_ollama_direct(): + result = Ollama(MODEL_NAME)("Respond with one word. Not more.", None) + assert isinstance(result, str) + + +def test_ollama_json(): + class Foo(BaseModel): + foo: str + + result = Ollama(MODEL_NAME)("Respond with one word. Not more.", Json(Foo)) + assert isinstance(result, str) + assert "foo" in json.loads(result) + + +def test_ollama_wrong_output_type(): + class Foo(Enum): + bar = "Bar" + foor = "Foo" + + with pytest.raises(NotImplementedError, match="is not available"): + Ollama(MODEL_NAME).generate("foo?", Choice(Foo)) + + +def test_ollama_wrong_input_type(): + with pytest.raises(NotImplementedError, match="is not available"): + Ollama(MODEL_NAME).generate(["foo?", "bar?"], None) + + +def test_ollama_stream(): + model = Ollama(MODEL_NAME) + generator = model.stream("Write a sentence about a cat.") + assert isinstance(next(generator), str) + + +def test_ollama_stream_json(): + class Foo(BaseModel): + foo: str + + model = Ollama(MODEL_NAME) + generator = model.stream("Create a character.", Json(Foo)) + generated_text = [] + for text in generator: + generated_text.append(text) + assert "foo" in json.loads("".join(generated_text))