From ee4bb9c6cd6a229537ac21cd7e3159bbe51e7486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Louf?= Date: Sat, 22 Feb 2025 16:33:48 +0100 Subject: [PATCH] Fix tests --- outlines/__init__.py | 4 +-- tests/generate/test_generate.py | 48 +++++++++++++------------ tests/generate/test_integration_vllm.py | 12 +++---- 3 files changed, 34 insertions(+), 30 deletions(-) diff --git a/outlines/__init__.py b/outlines/__init__.py index 77a4becd2..6229236f8 100644 --- a/outlines/__init__.py +++ b/outlines/__init__.py @@ -24,7 +24,7 @@ ) -models = [ +model_list = [ "from_anthropic", "from_gemini", "from_llamacpp", @@ -47,4 +47,4 @@ "Prompt", "vectorize", "grammars", -] + models +] + model_list diff --git a/tests/generate/test_generate.py b/tests/generate/test_generate.py index 64dee6e4b..aae31b959 100644 --- a/tests/generate/test_generate.py +++ b/tests/generate/test_generate.py @@ -4,8 +4,8 @@ import pytest +import outlines import outlines.generate as generate -import outlines.models as models import outlines.samplers as samplers ########################################## @@ -22,7 +22,7 @@ def model_llamacpp(tmp_path_factory): filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf", verbose=False, ) - return models.LlamaCpp(llm) + return outlines.from_llamacpp(LlamaCpp(llm)) @pytest.fixture(scope="session") @@ -44,56 +44,60 @@ def model_exllamav2(tmp_path_factory): @pytest.fixture(scope="session") def model_mlxlm(tmp_path_factory): - return models.mlxlm("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit") + from mlx_lm import load + return outlines.from_mlxlm(*load("mlx-community/TinyLlama-1.1B-Chat-v1.0-4bit")) @pytest.fixture(scope="session") def model_mlxlm_phi3(tmp_path_factory): - return models.mlxlm("mlx-community/Phi-3-mini-4k-instruct-4bit") + from mlx_lm import load + return outlines.from_mlxlm(*load("mlx-community/Phi-3-mini-4k-instruct-4bit")) @pytest.fixture(scope="session") def model_transformers_random(tmp_path_factory): - return models.Transformers("hf-internal-testing/tiny-random-gpt2") + from transformers import AutoModelForCausalLM, AutoTokenizer + return outlines.from_transformers( + AutoModelForCausalLM.fromt_pretrained("hf-internal-testing/tiny-random-gpt2"), + Tokenizer.fromt_pretrained("hf-internal-testing/tiny-random-gpt2") + ) @pytest.fixture(scope="session") def model_transformers_opt125m(tmp_path_factory): - return models.Transformers("facebook/opt-125m") + from transformers import AutoModelForCausalLM, AutoTokenizer + return outlines.from_transformers( + AutoModelForCausalLM.fromt_pretrained("facebook/opt-125m"), + Tokenizer.fromt_pretrained("facebook/opt-125m") + ) @pytest.fixture(scope="session") def model_mamba(tmp_path_factory): - return models.Mamba(model_name="state-spaces/mamba-130m-hf") + from transformers import MambaModel, AutoTokenizer + return outlines.from_transformers(MambaModel.from_pretrained(model_name="state-spaces/mamba-130m-hf"), AutoTokenizer.from_pretrained(model_name="state-spaces/mamba-130m-hf")) @pytest.fixture(scope="session") def model_bart(tmp_path_factory): - from transformers import AutoModelForSeq2SeqLM - - return models.Transformers("facebook/bart-base", model_class=AutoModelForSeq2SeqLM) + from transformers import AutoModelForSeq2SeqLM, AutoTokenizer + return outlines.from_transformers(AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-base"), AutoTokenizer.from_pretrained('facebook/bart-base')) @pytest.fixture(scope="session") def model_transformers_vision(tmp_path_factory): import torch - from transformers import LlavaNextForConditionalGeneration - - return models.transformers_vision( - "llava-hf/llava-v1.6-mistral-7b-hf", - model_class=LlavaNextForConditionalGeneration, - device="cuda", - model_kwargs=dict( - torch_dtype=torch.bfloat16, - load_in_4bit=True, - low_cpu_mem_usage=True, - ), + from transformers import LlavaNextForConditionalGeneration, AutoTokenizer + return outlines.from_transformers( + LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf", torch_dtype=torch.bfloat16, load_in_4bit=True, low_mem_cpu_usage=True), + AutoTokenizer.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf") ) @pytest.fixture(scope="session") def model_vllm(tmp_path_factory): - return models.vllm("facebook/opt-125m", gpu_memory_utilization=0.1) + from vllm import LLM + return outlines.from_vllm(LLM("facebook/opt-125m", gpu_memory_utilization=0.1)) # TODO: exllamav2 failing in main, address in https://github.com/dottxt-ai/outlines/issues/808 diff --git a/tests/generate/test_integration_vllm.py b/tests/generate/test_integration_vllm.py index faa8a404a..7b9acc240 100644 --- a/tests/generate/test_integration_vllm.py +++ b/tests/generate/test_integration_vllm.py @@ -4,12 +4,6 @@ import pytest import torch from pydantic import BaseModel, constr -from vllm import LLM - -try: - from vllm.sampling_params import SamplingParams -except ImportError: - pass import outlines import outlines.generate as generate @@ -17,6 +11,12 @@ import outlines.models as models import outlines.samplers as samplers +try: + from vllm import LLM + from vllm.sampling_params import SamplingParams +except ImportError: + pass + pytestmark = pytest.mark.skipif( not torch.cuda.is_available(), reason="vLLM models can only be run on GPU." )