diff --git a/cache_config_template.yml b/cache_config_template.yml index 4d07c867..8721c955 100644 --- a/cache_config_template.yml +++ b/cache_config_template.yml @@ -1,9 +1,9 @@ # For `model_src`, `evaluation`, `post_function`, `pre_function`, # `storage_config` options, Check README for more. -model_src: +embedding: onnx -model_config: +embedding_config: # Set model kws here including `model`, `api_key` if needed storage_config: data_dir: @@ -14,7 +14,7 @@ storage_config: # Set vector storage related params here evaluation: distance -evaluation_kws: +evaluation_config: # Set evaluation metric kws here pre_function: get_prompt diff --git a/docs/release_note.md b/docs/release_note.md index 85aa123c..911dc170 100644 --- a/docs/release_note.md +++ b/docs/release_note.md @@ -5,6 +5,97 @@ To read the following content, you need to understand the basic use of GPTCache, - [Readme doc](https://github.com/zilliztech/GPTCache) - [Usage doc](https://github.com/zilliztech/GPTCache/blob/main/docs/usage.md) +## v0.1.28 (2023.5.29) +To handle a large prompt, there are currently two options available: + +1. Increase the column size of CacheStorage. + +```python +from gptcache.manager import manager_factory + +data_manager = manager_factory( + "sqlite,faiss", scalar_params={"table_len_config": {"question_question": 5000}} +) + +``` +More Details: +- 'question_question': the question column size in the question table, default to 3000. +- 'answer_answer': the answer column size in the answer table, default to 3000. +- 'session_id': the session id column size in the session table, default to 1000. +- 'dep_name': the name column size in the dep table, default to 1000. +- 'dep_data': the data column size in the dep table, default to 3000. + +2. When using a template, use the dynamic value in the template as the cache key instead of using the entire template as the key. + +- **str template** +```python +from gptcache import Config +from gptcache.processor.pre import last_content_without_template + +template_obj = "tell me a joke about {subject}" +prompt = template_obj.format(subject="animal") +value = last_content_without_template( + data={"messages": [{"content": prompt}]}, cache_config=Config(template=template_obj) +) +print(value) +# ['animal'] +``` + +- **langchain prompt template** + +```python +from langchain import PromptTemplate + +from gptcache import Config +from gptcache.processor.pre import last_content_without_template + +template_obj = PromptTemplate.from_template("tell me a joke about {subject}") +prompt = template_obj.format(subject="animal") + +value = last_content_without_template( + data={"messages": [{"content": prompt}]}, + cache_config=Config(template=template_obj.template), +) +print(value) +# ['animal'] +``` + +3. Wrap the openai object, reference: [BaseCacheLLM](https://gptcache.readthedocs.io/en/dev/references/adapter.html#module-gptcache.adapter.base) + +```python +import random + +from gptcache import Cache +from gptcache.adapter import openai +from gptcache.adapter.api import init_similar_cache +from gptcache.processor.pre import last_content + +cache_obj = Cache() +init_similar_cache( + data_dir=str(random.random()), pre_func=last_content, cache_obj=cache_obj +) + + +def proxy_openai_chat_complete(*args, **kwargs): + nonlocal is_proxy + is_proxy = True + import openai as real_openai + + return real_openai.ChatCompletion.create(*args, **kwargs) + + +openai.ChatCompletion.llm = proxy_openai_chat_complete +openai.ChatCompletion.cache_args = {"cache_obj": cache_obj} + +openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What's GitHub"}, + ], +) +``` + ## v0.1.27 (2023.5.25) 1. Support the uform embedding, which can be used the **bilingual** (english + chinese) language diff --git a/examples/README.md b/examples/README.md index db3a7517..4c719485 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,9 +1,9 @@ # Example - [How to run Visual Question Answering with MiniGPT-4](#How-to-run-Visual-Question-Answering-with-MiniGPT-4) -- [How to set the `embedding` function](#How-to-set-the-embedding-function) -- [How to set the `data manager` class](#How-to-set-the-data-manager-class) -- [How to set the `similarity evaluation` interface](#How-to-set-the-similarity-evaluation-interface) +- [How to set the **embedding** function](#How-to-set-the-embedding-function) +- [How to set the **data manager** class](#How-to-set-the-data-manager-class) +- [How to set the **similarity evaluation** interface](#How-to-set-the-similarity-evaluation-interface) - [Other cache init params](#Other-cache-init-params) - [How to run with session](#How-to-run-with-session) - [How to use GPTCache server](#How-to-use-GPTCache-server) @@ -572,10 +572,10 @@ The args are optional: You can config the server via a YAML file, here is an example config yaml: ```yaml -model_src: +embedding: onnx -model_config: - # Set model kws here including `model`, `api_key` if needed +embedding_config: + # Set embedding model params here storage_config: data_dir: gptcache_data @@ -585,7 +585,7 @@ storage_config: # Set vector storage related params here evaluation: distance -evaluation_kws: +evaluation_config: # Set evaluation metric kws here pre_function: get_prompt @@ -595,15 +595,15 @@ config: similarity_threshold: 0.8 # Set other config here ``` -- model_source: The model source. -- model_config: The model name, model config, api key. +- embedding: The embedding model source, options: [How to set the **embedding** function](#How-to-set-the-embedding-function) +- embedding_config: The embedding model config, details: [Embedding Reference](https://gptcache.readthedocs.io/en/latest/references/embedding.html) - data_dir: The cache directory. - manager: The cache storage and vector storage. -- evaluation: The evaluation storage. +- evaluation: The evaluation component, options: [How to set the **similarity evaluation** interface](#How-to-set-the-similarity-evaluation-interface) +- evaluation_config: The evaluation config, options: [Similarity Evaluation Reference](https://gptcache.readthedocs.io/en/latest/references/similarity_evaluation.html) - pre_function: The pre-processing function. - post_function: The post-processing function. - -For `model_src`, `evaluation`, `storage_config` options, check [README.md](https://github.com/zilliztech/gpt-cache/tree/main/examples) for more. +- config: The cache config, like `similarity_threshold` **Use the docker to start the GPTCache server** diff --git a/examples/context_process/selective_context.py b/examples/context_process/selective_context.py index 2a5b8704..2b6df3aa 100644 --- a/examples/context_process/selective_context.py +++ b/examples/context_process/selective_context.py @@ -5,7 +5,7 @@ from gptcache.adapter import openai from gptcache.embedding import Onnx from gptcache.manager import manager_factory -from gptcache.processor.context.selective_context import SelectiveContextProcess +from gptcache.processor.context import SelectiveContextProcess from gptcache.similarity_evaluation import SearchDistanceEvaluation from gptcache.utils import import_selective_context diff --git a/examples/context_process/summarization_context.py b/examples/context_process/summarization_context.py index eeae76c9..3ec16750 100644 --- a/examples/context_process/summarization_context.py +++ b/examples/context_process/summarization_context.py @@ -5,7 +5,7 @@ from gptcache.adapter import openai from gptcache.embedding import Onnx from gptcache.manager import manager_factory -from gptcache.processor.context.summarization_context import SummarizationContextProcess +from gptcache.processor.context import SummarizationContextProcess from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation diff --git a/gptcache/adapter/api.py b/gptcache/adapter/api.py index 690b483f..3cf1671f 100644 --- a/gptcache/adapter/api.py +++ b/gptcache/adapter/api.py @@ -21,6 +21,11 @@ from gptcache.embedding.base import BaseEmbedding from gptcache.manager import manager_factory from gptcache.manager.data_manager import DataManager +from gptcache.processor.context import ( + SummarizationContextProcess, + SelectiveContextProcess, + ConcatContextProcess, +) from gptcache.processor.post import temperature_softmax from gptcache.processor.pre import get_prompt from gptcache.similarity_evaluation import ( @@ -192,9 +197,15 @@ def init_similar_cache_from_config(config_dir: str, cache_obj: Optional[Cache] = else: init_conf = {} - model_src = init_conf.get("model_source", "onnx") - model_config = init_conf.get("model_config", {}) - embedding_model = _get_model(model_src, model_config) + # Due to the problem with the first naming, it is reserved to ensure compatibility + embedding = init_conf.get("model_source", "") + if not embedding: + embedding = init_conf.get("embedding", "onnx") + # ditto + embedding_config = init_conf.get("model_config", {}) + if not embedding_config: + embedding_config = init_conf.get("embedding_config", {}) + embedding_model = _get_model(embedding, embedding_config) storage_config = init_conf.get("storage_config", {}) storage_config.setdefault("manager", "sqlite,faiss") @@ -205,13 +216,23 @@ def init_similar_cache_from_config(config_dir: str, cache_obj: Optional[Cache] = data_manager = manager_factory(**storage_config) eval_strategy = init_conf.get("evaluation", "distance") - eval_kws = init_conf.get("evaluation_kws") - evaluation = _get_eval(eval_strategy, eval_kws) + # Due to the problem with the first naming, it is reserved to ensure compatibility + eval_config = init_conf.get("evaluation_kws", {}) + if not eval_config: + eval_config = init_conf.get("evaluation_config", {}) + evaluation = _get_eval(eval_strategy, eval_config) cache_obj = cache_obj if cache_obj else cache - pre_prcocess = init_conf.get("pre_function", "get_prompt") - pre_func = _get_pre_func(pre_prcocess) + pre_process = init_conf.get("pre_context_function") + if pre_process: + pre_func = _get_pre_context_function( + pre_process, init_conf.get("pre_context_config") + ) + pre_func = pre_func.pre_process + else: + pre_process = init_conf.get("pre_function", "get_prompt") + pre_func = _get_pre_func(pre_process) post_process = init_conf.get("post_function", "first") post_func = _get_post_func(post_process) @@ -273,8 +294,19 @@ def _get_eval(strategy, kws=None): return KReciprocalEvaluation(**kws) -def _get_pre_func(pre_prcocess): - return getattr(gptcache.processor.pre, pre_prcocess) +def _get_pre_func(pre_process): + return getattr(gptcache.processor.pre, pre_process) + + +def _get_pre_context_function(pre_context_process, kws=None): + pre_context_process = pre_context_process.lower() + kws = kws or {} + if pre_context_process in "summarization": + return SummarizationContextProcess(**kws) + if pre_context_process in "selective": + return SelectiveContextProcess(**kws) + if pre_context_process in "concat": + return ConcatContextProcess() def _get_post_func(post_process): diff --git a/gptcache/processor/context/__init__.py b/gptcache/processor/context/__init__.py index e69de29b..3439e55e 100644 --- a/gptcache/processor/context/__init__.py +++ b/gptcache/processor/context/__init__.py @@ -0,0 +1,44 @@ +from gptcache.utils.lazy_import import LazyImport + +summarization = LazyImport( + "summarization_context", + globals(), + "gptcache.processor.context.summarization_context", +) +selective = LazyImport( + "selective_context", globals(), "gptcache.processor.context.selective_context" +) +concat = LazyImport( + "concat_context", globals(), "gptcache.processor.context.concat_context" +) + + +__all__ = [ + "SummarizationContextProcess", + "SelectiveContextProcess", + "ConcatContextProcess", +] + + +def SummarizationContextProcess(summarizer=None, tokenizer=None, target_length=512): + return summarization.SummarizationContextProcess( + summarizer, tokenizer, target_length + ) + + +def SelectiveContextProcess( + model_type: str = "gpt2", + lang: str = "en", + reduce_ratio: float = 0.35, + reduce_level: str = "phrase", +): + return selective.SelectiveContextProcess( + model_type=model_type, + lang=lang, + reduce_ratio=reduce_ratio, + reduce_level=reduce_level, + ) + + +def ConcatContextProcess(): + return concat.ConcatContextProcess() diff --git a/gptcache/processor/context/summarization_context.py b/gptcache/processor/context/summarization_context.py index e82fae8a..00cb620b 100644 --- a/gptcache/processor/context/summarization_context.py +++ b/gptcache/processor/context/summarization_context.py @@ -31,6 +31,8 @@ class SummarizationContextProcess(ContextProcess): """ def __init__(self, summarizer=transformers.pipeline("summarization", model="facebook/bart-large-cnn"), tokenizer=None, target_length=512): + if not summarizer: + summarizer = transformers.pipeline("summarization", model="facebook/bart-large-cnn") self.summarizer = summarizer self.target_length = target_length if tokenizer is None: diff --git a/tests/unit_tests/adapter/test_api.py b/tests/unit_tests/adapter/test_api.py index a308e059..6667d7cf 100644 --- a/tests/unit_tests/adapter/test_api.py +++ b/tests/unit_tests/adapter/test_api.py @@ -1,15 +1,18 @@ # pylint: disable=wrong-import-position import os from pathlib import Path +from unittest.mock import patch -from gptcache.utils import import_ruamel +from gptcache import cache, Config, Cache +from gptcache.adapter import openai from gptcache.adapter.api import put, get, init_similar_cache, init_similar_cache_from_config +from gptcache.embedding import Onnx as EmbeddingOnnx from gptcache.manager import CacheBase, VectorBase, get_data_manager -from gptcache.processor.pre import get_prompt from gptcache.processor.post import nop -from gptcache import cache, Config, Cache -from gptcache.embedding import Onnx as EmbeddingOnnx +from gptcache.processor.pre import get_prompt from gptcache.similarity_evaluation import SearchDistanceEvaluation +from gptcache.utils import import_ruamel +from gptcache.utils.response import get_message_from_openai_answer import_ruamel() @@ -88,13 +91,13 @@ def test_init_with_config(): config = { "storage_config": { "manager": "sqlite,faiss", - "data_dir": "./", + "data_dir": "test-config/", }, "model_source": "onnx", "evaluation": "distance", "pre_function": "get_prompt", "post_function": "first", - "config_kws": {"threshold": 0} + "config": {"similarity_threshold": 0} } with open(yaml_path, "w+", encoding="utf-8") as f: @@ -102,10 +105,83 @@ def test_init_with_config(): yaml.dump(config, f) init_similar_cache_from_config( - config_dir=yaml_path + config_dir=str(yaml_path.resolve()), ) put("api-hello", "foo") assert get("api-hello") == "foo" yaml_path.unlink() + + +def test_init_with_new_config(): + yaml_path = Path("test_new.yaml") + + if yaml_path.exists(): + yaml_path.unlink() + + config = { + "storage_config": { + "manager": "sqlite,faiss", + "data_dir": "test-new-config/", + }, + "embedding": "onnx", + "embedding_config": { + "model": "GPTCache/paraphrase-albert-onnx" + }, + "evaluation": "distance", + "evaluation_config": { + "max_distance": 4.0, + "positive": False, + }, + "pre_context_function": "concat", + "post_function": "first", + } + + with open(yaml_path, "w+", encoding="utf-8") as f: + yaml = YAML(typ="unsafe", pure=True) + yaml.dump(config, f) + + init_similar_cache_from_config( + config_dir=str(yaml_path.resolve()), + ) + + question = "calculate 1+3" + expect_answer = "the result is 4" + with patch("openai.ChatCompletion.create") as mock_create: + datas = { + "choices": [ + { + "message": {"content": expect_answer, "role": "assistant"}, + "finish_reason": "stop", + "index": 0, + } + ], + "created": 1677825464, + "id": "chatcmpl-6ptKyqKOGXZT6iQnqiXAH8adNLUzD", + "model": "gpt-3.5-turbo-0301", + "object": "chat.completion.chunk", + } + mock_create.return_value = datas + + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": question}, + ], + ) + + assert get_message_from_openai_answer(response) == expect_answer, response + + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": question}, + ], + ) + answer_text = get_message_from_openai_answer(response) + assert answer_text == expect_answer, answer_text + + yaml_path.unlink() diff --git a/tests/unit_tests/processor/test_concat_context.py b/tests/unit_tests/processor/test_concat_context.py index 4ae65039..206aa2cb 100644 --- a/tests/unit_tests/processor/test_concat_context.py +++ b/tests/unit_tests/processor/test_concat_context.py @@ -1,8 +1,8 @@ -from gptcache.processor.context.concat_context import ConcatContextProcess +from gptcache.adapter.api import _get_pre_context_function def test_concat_context_process(): - context_process = ConcatContextProcess() + context_process = _get_pre_context_function("concat") chat = [] chat.append( { diff --git a/tests/unit_tests/processor/test_selective_context.py b/tests/unit_tests/processor/test_selective_context.py index b51b6ec5..c37fbaa8 100644 --- a/tests/unit_tests/processor/test_selective_context.py +++ b/tests/unit_tests/processor/test_selective_context.py @@ -1,13 +1,13 @@ import pytest -from gptcache.processor.context.selective_context import SelectiveContextProcess +from gptcache.adapter.api import _get_pre_context_function from gptcache.utils import import_selective_context @pytest.mark.tags("L2") def test_selective_context_process(): import_selective_context() - context_process = SelectiveContextProcess() + context_process = _get_pre_context_function("selective") chat = [] chat.append( { diff --git a/tests/unit_tests/processor/test_summarize_context.py b/tests/unit_tests/processor/test_summarize_context.py index 91f666f0..d830e560 100644 --- a/tests/unit_tests/processor/test_summarize_context.py +++ b/tests/unit_tests/processor/test_summarize_context.py @@ -1,13 +1,13 @@ import pytest from transformers import pipeline, RobertaTokenizer -from gptcache.processor.context.summarization_context import SummarizationContextProcess +from gptcache.adapter.api import _get_pre_context_function @pytest.mark.tags("L2") def test_summarization_context_process(): summarizer = pipeline("summarization", model="ainize/bart-base-cnn") - context_process = SummarizationContextProcess(summarizer, None, 512) + context_process = _get_pre_context_function("summarization", kws={"summarizer": summarizer, "target_length": 512}) chat = [] chat.append( {