Skip to content

Commit

Permalink
remove dependency of towhee. (zilliztech#80)
Browse files Browse the repository at this point in the history
Signed-off-by: wxywb <[email protected]>
  • Loading branch information
wxywb authored Apr 6, 2023
1 parent 7a1259a commit d9c64a2
Show file tree
Hide file tree
Showing 13 changed files with 58 additions and 159 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,33 @@
from gptcache.adapter import openai
from gptcache.core import cache, Config
from gptcache.cache.factory import get_ss_data_manager
from gptcache.similarity_evaluation import Towhee as EvaluationTowhee
from gptcache.embedding import Towhee as EmbeddingTowhee
from gptcache.similarity_evaluation import Onnx as EvaluationOnnx
from gptcache.embedding import Onnx as EmbeddingOnnx
from gptcache.similarity_evaluation.simple import SearchDistanceEvaluation


def run():
with open('mock_data.json', 'r') as mock_file:
mock_data = json.load(mock_file)

embedding_towhee = EmbeddingTowhee()
embedding_onnx = EmbeddingOnnx()

# if you want more accurate results,
# you can use towhee's results to evaluate the model,
# you can use onnx's results to evaluate the model,
# it will make the results more accurate, but the cache hit rate will decrease

# evaluation_towhee = EvaluationTowhee()
# evaluation_onnx = EvaluationOnnx()
# class WrapEvaluation(SearchDistanceEvaluation):
#
#
# def __init__(self):
# self.evaluation_towhee = EvaluationTowhee()
#
# self.evaluation_onnx = EvaluationOnnx()
#
# def evaluation(self, src_dict, cache_dict, **kwargs):
# rank1 = super().evaluation(src_dict, cache_dict, **kwargs)
# if rank1 <= 0.5:
# rank2 = evaluation_towhee.evaluation(src_dict, cache_dict, **kwargs)
# rank2 = evaluation_onnx.evaluation(src_dict, cache_dict, **kwargs)
# return rank2 if rank2 != 0 else 1
#
#
# def range(self):
# return 0.0, 1.0

Expand All @@ -46,8 +46,8 @@ def range(self):
faiss_file = "faiss.index"
has_data = os.path.isfile(sqlite_file) and os.path.isfile(faiss_file)

data_manager = get_ss_data_manager("sqlite", "faiss", dimension=embedding_towhee.dimension, max_size=100000)
cache.init(embedding_func=embedding_towhee.to_embeddings,
data_manager = get_ss_data_manager("sqlite", "faiss", dimension=embedding_onnx.dimension, max_size=100000)
cache.init(embedding_func=embedding_onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=WrapEvaluation(),
)
Expand Down
8 changes: 4 additions & 4 deletions examples/openai_examples/basic_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from gptcache.cache.factory import get_data_manager, get_ss_data_manager
from gptcache.core import cache, Cache, Config
from gptcache.embedding import Towhee
from gptcache.embedding import Onnx
from gptcache.similarity_evaluation.simple import SearchDistanceEvaluation
from gptcache.adapter import openai

Expand Down Expand Up @@ -65,10 +65,10 @@ def stream_request():


def similar_request():
towhee = Towhee()
data_manager = get_ss_data_manager("sqlite", "faiss", dimension=towhee.dimension)
onnx = Onnx()
data_manager = get_ss_data_manager("sqlite", "faiss", dimension=onnx.dimension)
one_cache = Cache()
one_cache.init(embedding_func=towhee.to_embeddings,
one_cache.init(embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation(),
)
Expand Down
8 changes: 4 additions & 4 deletions examples/openai_examples/readme.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,14 @@ def response_text(openai_resp):

from gptcache.core import cache
from gptcache.adapter import openai
from gptcache.embedding import Towhee
from gptcache.embedding import Onnx
from gptcache.cache.factory import get_ss_data_manager
from gptcache.similarity_evaluation.simple import SearchDistanceEvaluation

towhee = Towhee()
data_manager = get_ss_data_manager("sqlite", "faiss", dimension=towhee.dimension)
onnx = Onnx()
data_manager = get_ss_data_manager("sqlite", "faiss", dimension=onnx.dimension)
cache.init(
embedding_func=towhee.to_embeddings,
embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation(),
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,22 @@
from gptcache.core import cache, Config
from gptcache.cache.factory import get_ss_data_manager
from gptcache.similarity_evaluation.simple import SearchDistanceEvaluation
from gptcache.embedding import Towhee
from gptcache.embedding import Onnx


def run():
towhee = Towhee()
# chinese model
# towhee = Towhee(model="uer/albert-base-chinese-cluecorpussmall-onnx")
onnx = Onnx()

sqlite_file = "gptcache.db"
faiss_file = "faiss.index"
has_data = os.path.isfile(sqlite_file) and os.path.isfile(faiss_file)
data_manager = get_ss_data_manager("sqlite", "faiss",
dimension=towhee.dimension, max_size=2000)
dimension=onnx.dimension, max_size=2000)

def log_time_func(func_name, delta_time):
print("func `{}` consume time: {:.2f}s".format(func_name, delta_time))

cache.init(embedding_func=towhee.to_embeddings,
cache.init(embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation(),
config=Config(
Expand Down
7 changes: 1 addition & 6 deletions gptcache/embedding/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@

__all__ = ['Towhee', 'OpenAI', 'Huggingface', 'SBERT', 'Cohere', 'Onnx']
__all__ = ['OpenAI', 'Huggingface', 'SBERT', 'Cohere', 'Onnx']


from gptcache.utils.lazy_import import LazyImport

towhee = LazyImport('towhee', globals(), 'gptcache.embedding.towhee')
openai = LazyImport('openai', globals(), 'gptcache.embedding.openai')
huggingface = LazyImport('huggingface', globals(), 'gptcache.embedding.huggingface')
sbert = LazyImport('sbert', globals(), 'gptcache.embedding.sbert')
onnx = LazyImport('onnx', globals(), 'gptcache.embedding.onnx')
cohere = LazyImport('cohere', globals(), 'gptcache.embedding.cohere')



def Towhee(model="paraphrase-albert-small-v2-onnx"):
return towhee.Towhee(model)

def Cohere(model="large", api_key=None):
return cohere.Cohere(model, api_key)

Expand Down
4 changes: 3 additions & 1 deletion gptcache/embedding/onnx.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from gptcache.utils import import_onnxruntime, import_huggingface_hub
from gptcache.utils import import_onnxruntime, import_huggingface_hub, import_huggingface
import_huggingface()
import_onnxruntime()
import_huggingface_hub()
import numpy as np
Expand Down Expand Up @@ -47,6 +48,7 @@ def post_proc(self, token_embeddings, attention_mask):
sentence_embs = np.sum(token_embeddings * input_mask_expanded, 1) / np.maximum(input_mask_expanded.sum(1), 1e-9)
return sentence_embs

@property
def dimension(self):
return self.__dimension

55 changes: 0 additions & 55 deletions gptcache/embedding/towhee.py

This file was deleted.

7 changes: 1 addition & 6 deletions gptcache/similarity_evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
__all__ = ['Towhee', 'Onnx']
__all__ = ['Onnx']

from gptcache.utils.lazy_import import LazyImport

towhee = LazyImport('towhee', globals(), 'gptcache.similarity_evaluation.towhee')
onnx = LazyImport('onnx', globals(), 'gptcache.similarity_evaluation.onnx')


def Towhee():
return towhee.Towhee()

def Onnx(model = 'GPTCache/albert-duplicate-onnx'):
return onnx.Onnx(model)
28 changes: 0 additions & 28 deletions gptcache/similarity_evaluation/towhee.py

This file was deleted.

13 changes: 2 additions & 11 deletions gptcache/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
__all__ = ['import_pymilvus', 'import_towhee',
__all__ = ['import_pymilvus', 'import_huggingface_hub',
'import_faiss', 'import_chromadb',
'import_sqlalchemy', 'import_sql_client',
'import_huggingface', 'import_torch',
'import_sbert', 'import_onnxruntime',
'import_huggingface_hub', 'import_cohere']

'import_sbert', 'import_onnxruntime', 'import_cohere' ]
from .dependency_control import prompt_install


Expand Down Expand Up @@ -32,13 +30,6 @@ def import_cohere():
prompt_install('cohere')
import cohere

def import_towhee():
try:
import towhee
except ModuleNotFoundError as e:
prompt_install('towhee==0.9.0')
import towhee

def import_huggingface():
try:
import transformers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from gptcache.adapter import openai
from gptcache.cache.factory import get_ss_data_manager
from gptcache.core import cache, Config
from gptcache.embedding import Towhee
from gptcache.embedding import Onnx
from gptcache.similarity_evaluation.simple import SearchDistanceEvaluation


Expand All @@ -28,12 +28,12 @@ def test_invalid_similarity_threshold(self, threshold):
method: input non-num and num which is out of range [0, 1]
expected: raise exception and report the error
"""
towhee = Towhee()
onnx = Onnx()
data_manager = get_ss_data_manager("sqlite", "faiss",
dimension=towhee.dimension, max_size=2000)
dimension=onnx.dimension, max_size=2000)
is_exception = False
try:
cache.init(embedding_func=towhee.to_embeddings,
cache.init(embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation,
config=Config(
Expand All @@ -54,10 +54,10 @@ def test_no_openai_key(self):
method: set similarity_threshold as 1 and no openai key
expected: raise exception and report the error
"""
towhee = Towhee()
onnx = Onnx()
data_manager = get_ss_data_manager("sqlite", "faiss",
dimension=towhee.dimension, max_size=2000)
cache.init(embedding_func=towhee.to_embeddings,
dimension=onnx.dimension, max_size=2000)
cache.init(embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation,
config=Config(
Expand Down Expand Up @@ -98,10 +98,10 @@ def test_hit_default(self):
expected: hit successfully
"""

towhee = Towhee()
onnx = Onnx()
data_manager = get_ss_data_manager("sqlite", "faiss",
dimension=towhee.dimension, max_size=2000)
cache.init(embedding_func=towhee.to_embeddings,
dimension=onnx.dimension, max_size=2000)
cache.init(embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation(),
config=Config(
Expand Down Expand Up @@ -129,10 +129,10 @@ def test_hit(self):
expected: hit successfully
"""

towhee = Towhee()
onnx = Onnx()
data_manager = get_ss_data_manager("sqlite", "faiss",
dimension=towhee.dimension, max_size=2000)
cache.init(embedding_func=towhee.to_embeddings,
dimension=onnx.dimension, max_size=2000)
cache.init(embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation(),
config=Config(
Expand Down Expand Up @@ -160,10 +160,10 @@ def test_miss(self):
method: set similarity_threshold as 0
expected: raise exception and report the error
"""
towhee = Towhee()
onnx = Onnx()
data_manager = get_ss_data_manager("sqlite", "faiss",
dimension=towhee.dimension, max_size=2000)
cache.init(embedding_func=towhee.to_embeddings,
dimension=onnx.dimension, max_size=2000)
cache.init(embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation,
config=Config(
Expand Down Expand Up @@ -199,11 +199,11 @@ def test_disable_cache(self):
expected: hit successfully
"""

towhee = Towhee()
onnx = Onnx()
data_manager = get_ss_data_manager("sqlite", "faiss",
dimension=towhee.dimension, max_size=2000)
dimension=onnx.dimension, max_size=2000)
cache.init(cache_enable_func=cf.disable_cache,
embedding_func=towhee.to_embeddings,
embedding_func=onnx.to_embeddings,
data_manager=data_manager,
similarity_evaluation=SearchDistanceEvaluation(),
config=Config(
Expand Down
Loading

0 comments on commit d9c64a2

Please sign in to comment.