diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0565d0f3c..754a47873 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -33,6 +33,8 @@ jobs: run: | curl -fsSL https://ollama.com/install.sh | sh ollama --version + ollama pull tinyllama + ollama serve - name: Set up test environment run: | python -m pip install --upgrade pip diff --git a/outlines/__init__.py b/outlines/__init__.py index 77a4becd2..6229236f8 100644 --- a/outlines/__init__.py +++ b/outlines/__init__.py @@ -24,7 +24,7 @@ ) -models = [ +model_list = [ "from_anthropic", "from_gemini", "from_llamacpp", @@ -47,4 +47,4 @@ "Prompt", "vectorize", "grammars", -] + models +] + model_list diff --git a/outlines/function.py b/outlines/function.py index 48577be8f..1302fdb7d 100644 --- a/outlines/function.py +++ b/outlines/function.py @@ -4,7 +4,8 @@ import requests -from outlines import generate, models +import outlines +from outlines import Generator, JsonType if TYPE_CHECKING: from outlines.generate.api import SequenceGenerator @@ -37,8 +38,14 @@ def from_github(cls, program_path: str, function_name: str = "fn"): def init_generator(self): """Load the model and initialize the generator.""" - model = models.transformers(self.model_name) - self.generator = generate.json(model, self.schema) + from transformers import AutoModelForCausalLM, AutoTokenizer + + model = outlines.from_transformers( + AutoModelForCausalLM.from_pretrained(self.model_name), + AutoTokenizer.from_pretrained(self.model_name) + ) + + self.generator = Generator(model, JsonType(self.schema)) def __call__(self, *args, **kwargs): """Call the function. diff --git a/tests/generate/test_generate.py b/tests/generate/test_generate.py index 64dee6e4b..1bf3f2a41 100644 --- a/tests/generate/test_generate.py +++ b/tests/generate/test_generate.py @@ -4,8 +4,8 @@ import pytest +import outlines import outlines.generate as generate -import outlines.models as models import outlines.samplers as samplers ########################################## @@ -22,11 +22,12 @@ def model_llamacpp(tmp_path_factory): filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", verbose=False, ) - return models.LlamaCpp(llm) + return outlines.from_llamacpp(llm) @pytest.fixture(scope="session") def model_exllamav2(tmp_path_factory): + from outlines.models.exllamav2 import exl2 from huggingface_hub import snapshot_download tmp_dir = tmp_path_factory.mktemp("model_download") @@ -35,7 +36,7 @@ def model_exllamav2(tmp_path_factory): cache_dir=tmp_dir, ) - return models.exl2( + return exl2( model_path=model_path, cache_q4=True, paged=False, @@ -44,56 +45,79 @@ def model_exllamav2(tmp_path_factory): @pytest.fixture(scope="session") def model_mlxlm(tmp_path_factory): - return models.mlxlm("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") + from mlx_lm import load + + return outlines.from_mlxlm(*load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit")) @pytest.fixture(scope="session") def model_mlxlm_phi3(tmp_path_factory): - return models.mlxlm("mlx-community/Phi-3-mini-4k-instruct-4bit") + from mlx_lm import load + + return outlines.from_mlxlm(*load("mlx-community/Phi-3-mini-4k-instruct-4bit")) @pytest.fixture(scope="session") def model_transformers_random(tmp_path_factory): - return models.Transformers("hf-internal-testing/tiny-random-gpt2") + from transformers import AutoModelForCausalLM, AutoTokenizer + + return outlines.from_transformers( + AutoModelForCausalLM.fromt_pretrained("hf-internal-testing/tiny-random-gpt2"), + AutoTokenizer.fromt_pretrained("hf-internal-testing/tiny-random-gpt2"), + ) @pytest.fixture(scope="session") def model_transformers_opt125m(tmp_path_factory): - return models.Transformers("facebook/opt-125m") + from transformers import AutoModelForCausalLM, AutoTokenizer + + return outlines.from_transformers( + AutoModelForCausalLM.fromt_pretrained("facebook/opt-125m"), + AutoTokenizer.fromt_pretrained("facebook/opt-125m"), + ) @pytest.fixture(scope="session") def model_mamba(tmp_path_factory): - return models.Mamba(model_name="state-spaces/mamba-130m-hf") + from transformers import MambaModel, AutoTokenizer + + return outlines.from_transformers( + MambaModel.from_pretrained(model_name="state-spaces/mamba-130m-hf"), + AutoTokenizer.from_pretrained(model_name="state-spaces/mamba-130m-hf"), + ) @pytest.fixture(scope="session") def model_bart(tmp_path_factory): - from transformers import AutoModelForSeq2SeqLM + from transformers import AutoModelForSeq2SeqLM, AutoTokenizer - return models.Transformers("facebook/bart-base", model_class=AutoModelForSeq2SeqLM) + return outlines.from_transformers( + AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-base"), + AutoTokenizer.from_pretrained("facebook/bart-base"), + ) @pytest.fixture(scope="session") def model_transformers_vision(tmp_path_factory): import torch - from transformers import LlavaNextForConditionalGeneration + from transformers import LlavaNextForConditionalGeneration, AutoTokenizer - return models.transformers_vision( - "llava-hf/llava-v1.6-mistral-7b-hf", - model_class=LlavaNextForConditionalGeneration, - device="cuda", - model_kwargs=dict( + return outlines.from_transformers( + LlavaNextForConditionalGeneration.from_pretrained( + "llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.bfloat16, load_in_4bit=True, - low_cpu_mem_usage=True, + low_mem_cpu_usage=True, ), + AutoTokenizer.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf"), ) @pytest.fixture(scope="session") def model_vllm(tmp_path_factory): - return models.vllm("facebook/opt-125m", gpu_memory_utilization=0.1) + from vllm import LLM + + return outlines.from_vllm(LLM("facebook/opt-125m", gpu_memory_utilization=0.1)) # TODO: exllamav2 failing in main, address in https://github.com/dottxt-ai/outlines/issues/808 diff --git a/tests/generate/test_integration_vllm.py b/tests/generate/test_integration_vllm.py index faa8a404a..7b9acc240 100644 --- a/tests/generate/test_integration_vllm.py +++ b/tests/generate/test_integration_vllm.py @@ -4,12 +4,6 @@ import pytest import torch from pydantic import BaseModel, constr -from vllm import LLM - -try: - from vllm.sampling_params import SamplingParams -except ImportError: - pass import outlines import outlines.generate as generate @@ -17,6 +11,12 @@ import outlines.models as models import outlines.samplers as samplers +try: + from vllm import LLM + from vllm.sampling_params import SamplingParams +except ImportError: + pass + pytestmark = pytest.mark.skipif( not torch.cuda.is_available(), reason="vLLM models can only be run on GPU." ) diff --git a/tests/test_function.py b/tests/test_function.py index 62f7ea29f..3b9407ac5 100644 --- a/tests/test_function.py +++ b/tests/test_function.py @@ -1,6 +1,9 @@ +from typing import Annotated + +import json import pytest import responses -from pydantic import BaseModel +from pydantic import BaseModel, Field from requests.exceptions import HTTPError import outlines @@ -13,14 +16,15 @@ def test_template(text: str): """{{ text }}""" class Foo(BaseModel): - id: int + id: Annotated[str, Field(min_length=10, max_length=10)] fn = Function(test_template, Foo, "hf-internal-testing/tiny-random-GPTJForCausalLM") assert fn.generator is None result = fn("test") - assert isinstance(result, BaseModel) + assert isinstance(json.loads(result), dict) + assert "id" in json.loads(result) def test_download_from_github_invalid():