From 268e32c086cbf3d95a9a2277c56d6f5da7478580 Mon Sep 17 00:00:00 2001 From: SimFG Date: Wed, 26 Apr 2023 23:05:42 +0800 Subject: [PATCH] Update the version to `0.1.20` (#297) Signed-off-by: SimFG --- docs/release_note.md | 17 +++++++++++++++++ gptcache/adapter/adapter.py | 2 +- gptcache/adapter/llama_cpp.py | 2 +- setup.py | 2 +- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/docs/release_note.md b/docs/release_note.md index a435bbfd..1802e40f 100644 --- a/docs/release_note.md +++ b/docs/release_note.md @@ -5,6 +5,23 @@ To read the following content, you need to understand the basic use of GPTCache, - [Readme doc](https://github.com/zilliztech/GPTCache) - [Usage doc](https://github.com/zilliztech/GPTCache/blob/main/docs/usage.md) +## v0.1.20 (2023.4.26) + +1. support the `temperature` param, like openai + +A non-negative number of sampling temperature, defaults to 0. +A higher temperature makes the output more random. +A lower temperature means a more deterministic and confident output. + +2. Add llama adapter + +```python +from gptcache.adapter.llama_cpp import Llama + +llm = Llama('./models/7B/ggml-model.bin') +answer = llm(prompt=question) +``` + ## v0.1.19 (2023.4.24) 1. Add stability sdk adapter (text -> image) diff --git a/gptcache/adapter/adapter.py b/gptcache/adapter/adapter.py index b48cee26..84653141 100644 --- a/gptcache/adapter/adapter.py +++ b/gptcache/adapter/adapter.py @@ -98,7 +98,7 @@ def adapt(llm_handler, cache_data_convert, update_cache_callback, *args, **kwarg if chat_cache.post_process_messages_func is temperature_softmax: return_message = chat_cache.post_process_messages_func( messages=[t[1] for t in cache_answers], - scores = [t[0] for t in cache_answers], + scores=[t[0] for t in cache_answers], temperature=temperature ) else: diff --git a/gptcache/adapter/llama_cpp.py b/gptcache/adapter/llama_cpp.py index 5c9c1f44..1e7d3bc4 100644 --- a/gptcache/adapter/llama_cpp.py +++ b/gptcache/adapter/llama_cpp.py @@ -29,7 +29,7 @@ class Llama(llama_cpp.Llama): data_manager=m, embedding_func=onnx.to_embeddings ) - llm = LlamaCpp('./models/7B/ggml-model.bin') + llm = Llama('./models/7B/ggml-model.bin') answer = llm(prompt=question, cache_obj=llm_cache) """ def __call__( diff --git a/setup.py b/setup.py index 84d36c7d..481a899e 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ def parse_requirements(file_name: str) -> List[str]: setuptools.setup( name="gptcache", packages=find_packages(), - version="0.1.19", + version="0.1.20", author="SimFG", author_email="bang.fu@zilliz.com", description="GPTCache, a powerful caching library that can be used to speed up and lower the cost of chat "