From 6f7ea406bf7de9186979a0a2a1c13578f6d158aa Mon Sep 17 00:00:00 2001 From: web-sst <36205453+web-sst@users.noreply.github.com> Date: Wed, 22 Jan 2025 22:14:03 -0600 Subject: [PATCH] Register full embedding model names (#654) Provide backward compatible aliases. This makes available the same model names that ttok uses. --- llm/default_plugins/openai_models.py | 12 ++++++------ tests/test_aliases.py | 4 ++-- tests/test_embed_cli.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llm/default_plugins/openai_models.py b/llm/default_plugins/openai_models.py index 0ec9dae2..a59690f1 100644 --- a/llm/default_plugins/openai_models.py +++ b/llm/default_plugins/openai_models.py @@ -134,14 +134,14 @@ def register_models(register): @hookimpl def register_embedding_models(register): register( - OpenAIEmbeddingModel("ada-002", "text-embedding-ada-002"), aliases=("ada",) + OpenAIEmbeddingModel("text-embedding-ada-002", "text-embedding-ada-002"), aliases=("ada","ada-002",) ) - register(OpenAIEmbeddingModel("3-small", "text-embedding-3-small")) - register(OpenAIEmbeddingModel("3-large", "text-embedding-3-large")) + register(OpenAIEmbeddingModel("text-embedding-3-small", "text-embedding-3-small"), aliases=("3-small",)) + register(OpenAIEmbeddingModel("text-embedding-3-large", "text-embedding-3-large"), aliases=("3-large",)) # With varying dimensions - register(OpenAIEmbeddingModel("3-small-512", "text-embedding-3-small", 512)) - register(OpenAIEmbeddingModel("3-large-256", "text-embedding-3-large", 256)) - register(OpenAIEmbeddingModel("3-large-1024", "text-embedding-3-large", 1024)) + register(OpenAIEmbeddingModel("text-embedding-3-small-512", "text-embedding-3-small", 512), aliases=("3-small-512",)) + register(OpenAIEmbeddingModel("text-embedding-3-large-256", "text-embedding-3-large", 256), aliases=("3-large-256",)) + register(OpenAIEmbeddingModel("text-embedding-3-large-1024", "text-embedding-3-large", 1024), aliases=("3-large-1024",)) class OpenAIEmbeddingModel(EmbeddingModel): diff --git a/tests/test_aliases.py b/tests/test_aliases.py index dc1483b8..b08871a9 100644 --- a/tests/test_aliases.py +++ b/tests/test_aliases.py @@ -39,7 +39,7 @@ def test_cli_aliases_list(args): "gpt4 : gpt-4\n" "4-32k : gpt-4-32k\n" "e-demo : embed-demo (embedding)\n" - "ada : ada-002 (embedding)\n" + "ada : text-embedding-ada-002 (embedding)\n" ).split("\n"): line = line.strip() if not line: @@ -65,7 +65,7 @@ def test_cli_aliases_list_json(args): "4": "gpt-4", "gpt4": "gpt-4", "4-32k": "gpt-4-32k", - "ada": "ada-002", + "ada": "text-embedding-ada-002", "e-demo": "embed-demo", }.items() ) diff --git a/tests/test_embed_cli.py b/tests/test_embed_cli.py index 007aac5d..57462316 100644 --- a/tests/test_embed_cli.py +++ b/tests/test_embed_cli.py @@ -554,7 +554,7 @@ def test_default_embedding_model(): assert result2.exit_code == 0 result3 = runner.invoke(cli, ["embed-models", "default"]) assert result3.exit_code == 0 - assert result3.output == "ada-002\n" + assert result3.output == "text-embedding-ada-002\n" result4 = runner.invoke(cli, ["embed-models", "default", "--remove-default"]) assert result4.exit_code == 0 result5 = runner.invoke(cli, ["embed-models", "default"])