Add the unit test for common module (zilliztech#63)

Signed-off-by: SimFG <[email protected]>
SimFG · Apr 5, 2023 · ce0a5ac · ce0a5ac
1 parent 42f1365
commit ce0a5ac
Show file tree

Hide file tree

Showing 12 changed files with 299 additions and 4 deletions.
diff --git a/gptcache/similarity_evaluation/np.py b/gptcache/similarity_evaluation/np.py
@@ -5,10 +5,21 @@
 
 class NumpyNormEvaluation(SimilarityEvaluation):
 
+    def __init__(self, enable_normal: bool = False):
+        self.enable_normal = enable_normal
+
+    @staticmethod
+    def normalize(vec):
+        magnitude = np.linalg.norm(vec)
+        normalized_v = vec / magnitude
+        return normalized_v
+
     def evaluation(self, src_dict, cache_dict, **kwargs):
-        src_embedding = src_dict["embedding"]
+        src_embedding = self.normalize(src_dict["embedding"]) if self.enable_normal else src_dict["embedding"]
         _, cache_embedding = cache_dict["search_result"]
-        return 1.0 - np.linalg.norm(src_embedding - cache_embedding)
+        cache_embedding = self.normalize(cache_embedding) if self.enable_normal \
+            else cache_embedding
+        return np.linalg.norm(src_embedding - cache_embedding)
 
     def range(self):
-        return 0.0, 1.0
+        return 0.0, 2.0
diff --git a/gptcache/similarity_evaluation/towhee.py b/gptcache/similarity_evaluation/towhee.py
@@ -1,5 +1,5 @@
 from .similarity_evaluation import SimilarityEvaluation
-from ..util import import_towhee
+from ..utils import import_towhee
 import_towhee()
 
 from towhee.dc2 import ops, pipe
@@ -18,6 +18,8 @@ def evaluation(self, src_dict, cache_dict, **kwargs):
         try:
             src_question = src_dict["question"]
             cache_question = cache_dict["question"]
+            if src_question == cache_question:
+                return 1
             return self._pipe(src_question, [cache_question]).get_dict()['similarity'][0]
         except Exception:
             return 0

diff --git a/tests/unit_tests/adapter/test_adapter.py b/tests/unit_tests/adapter/test_adapter.py
@@ -0,0 +1,49 @@
+import os
+import time
+
+from gptcache.adapter.adapter import adapt
+from gptcache.core import cache, time_cal
+
+data_map_path = "data_map.txt"
+
+
+def test_adapt():
+    def llm_handler(*llm_args, **llm_kwargs):
+        a = llm_kwargs.get("a", 0)
+        b = llm_kwargs.get("b", 0)
+        time.sleep(1)
+        return a + b
+
+    def pre_embedding(data, **kwargs):
+        a = data.get("a", 0)
+        b = data.get("b", 0)
+        return f"{a}+{b}"
+
+    def cache_data_convert(cache_data):
+        return int(cache_data)
+
+    def update_cache_callback(llm_data, update_cache_func):
+        update_cache_func(str(llm_data))
+        return llm_data
+
+    def add_llm(*args, **kwargs):
+        return adapt(llm_handler, cache_data_convert, update_cache_callback, *args, **kwargs)
+
+    if os.path.isfile(data_map_path):
+        os.remove(data_map_path)
+
+    cache.init(pre_embedding_func=pre_embedding)
+
+    def report_func(delta_time):
+        assert delta_time > 0.9
+
+    def add1():
+        res = add_llm(a=1, b=2)
+        assert res == 3, res
+
+    time_cal(add1, report_func=report_func)()
+
+    def report_func(delta_time):
+        assert delta_time < 0.2
+
+    time_cal(add1, report_func=report_func)()
diff --git a/tests/unit_tests/embedding/test_embedding_string.py b/tests/unit_tests/embedding/test_embedding_string.py
@@ -0,0 +1,6 @@
+from gptcache.embedding.string import to_embeddings
+
+
+def test_embedding():
+    message = to_embeddings("foo")
+    assert message == "foo"
diff --git a/tests/unit_tests/embedding/test_towhee.py b/tests/unit_tests/embedding/test_towhee.py
@@ -0,0 +1,7 @@
+from gptcache.embedding import Towhee
+
+
+def test_towhee():
+    t = Towhee()
+    data = t.to_embeddings("foo")
+    assert len(data) == t.dimension(), f"{len(data)}, {t.dimension}"
diff --git a/tests/unit_tests/processor/test_post.py b/tests/unit_tests/processor/test_post.py
@@ -0,0 +1,16 @@
+from gptcache.processor.post import random_one, first, nop
+
+
+def test_random_one():
+    message = random_one(["foo", "foo2"])
+    assert message
+
+
+def test_first():
+    message = first(["foo", "foo2"])
+    assert message == "foo"
+
+
+def test_nop():
+    message = nop("foo")
+    assert message == "foo"
diff --git a/tests/unit_tests/processor/test_pre.py b/tests/unit_tests/processor/test_pre.py
@@ -0,0 +1,36 @@
+from gptcache.processor.pre import last_content, all_content, nop
+
+
+def test_last_content():
+    content = last_content({
+        "messages": [
+            {
+                "content": "foo1"
+            },
+            {
+                "content": "foo2"
+            }
+        ]
+    })
+
+    assert content == "foo2"
+
+
+def test_all_content():
+    content = all_content({
+        "messages": [
+            {
+                "content": "foo1"
+            },
+            {
+                "content": "foo2"
+            }
+        ]
+    })
+
+    assert content == "foo1\nfoo2"
+
+
+def test_nop():
+    content = nop("hello")
+    assert content == "hello"
diff --git a/tests/unit_tests/similarity_evaluation/test_evaluation_string.py b/tests/unit_tests/similarity_evaluation/test_evaluation_string.py
@@ -0,0 +1,31 @@
+import math
+
+from gptcache.similarity_evaluation.string import AbsoluteEvaluation
+
+
+def test_absolute_evaluation():
+    evaluation = AbsoluteEvaluation()
+
+    range_min, range_max = evaluation.range()
+    assert math.isclose(range_min, 0.0)
+    assert math.isclose(range_max, 1.0)
+
+    score = evaluation.evaluation(
+        {
+            "question": "hello"
+        },
+        {
+            "question": "hello"
+        }
+    )
+    assert math.isclose(score, 1.0)
+
+    score = evaluation.evaluation(
+        {
+            "question": "tello"
+        },
+        {
+            "question": "hello"
+        }
+    )
+    assert math.isclose(score, 0.0)
diff --git a/tests/unit_tests/similarity_evaluation/test_evaluation_towhee.py b/tests/unit_tests/similarity_evaluation/test_evaluation_towhee.py
@@ -0,0 +1,45 @@
+import math
+
+from gptcache.similarity_evaluation import Towhee
+
+
+def test_towhee():
+    evaluation = Towhee()
+
+    range_min, range_max = evaluation.range()
+    assert math.isclose(range_min, 0.0)
+    assert math.isclose(range_max, 1.0)
+
+    score = evaluation.evaluation(
+        {
+            "question": "hello"
+        },
+        {
+            "question": "hello"
+        }
+    )
+    assert math.isclose(score, 1.0)
+
+    query = 'Can you pass a urine test for meth in 4 days?'
+    candidate_1 = 'Can meth be detected in a urine test if last used was Thursday night and the test was tuesday morning?'
+    candidate_2 = 'how old are you?'
+
+    score = evaluation.evaluation(
+        {
+            "question": query
+        },
+        {
+            "question": candidate_1
+        }
+    )
+    assert score > 0.8
+
+    score = evaluation.evaluation(
+        {
+            "question": query
+        },
+        {
+            "question": candidate_2
+        }
+    )
+    assert score < 0.1
diff --git a/tests/unit_tests/similarity_evaluation/test_np.py b/tests/unit_tests/similarity_evaluation/test_np.py
@@ -0,0 +1,33 @@
+import math
+
+import numpy as np
+
+from gptcache.similarity_evaluation.np import NumpyNormEvaluation
+
+
+def test_norm():
+    evaluation = NumpyNormEvaluation(enable_normal=True)
+
+    range_min, range_max = evaluation.range()
+    assert math.isclose(range_min, 0.0)
+    assert math.isclose(range_max, 2.0)
+
+    score = evaluation.evaluation(
+        {
+            "embedding": np.array([-0.5, -0.5])
+        },
+        {
+            "search_result": (0, np.array([1, 1]))
+        }
+    )
+    assert math.isclose(score, 2.0), score
+
+    score = evaluation.evaluation(
+        {
+            "embedding": np.array([1, 2, 3, 4])
+        },
+        {
+            "search_result": (0, np.array([0.1, 0.2, 0.3, 0.4]))
+        }
+    )
+    assert math.isclose(score, 0.0), score
diff --git a/tests/unit_tests/similarity_evaluation/test_simple.py b/tests/unit_tests/similarity_evaluation/test_simple.py
@@ -0,0 +1,47 @@
+import math
+
+from gptcache.similarity_evaluation.simple import SearchDistanceEvaluation
+
+
+def test_search_distance_evaluation():
+    evaluation = SearchDistanceEvaluation()
+
+    range_min, range_max = evaluation.range()
+    assert math.isclose(range_min, 0.0)
+    assert math.isclose(range_max, 4.0)
+
+    score = evaluation.evaluation(
+        {},
+        {
+            "search_result": (1, None)
+        }
+    )
+    assert math.isclose(score, 3.0)
+
+    score = evaluation.evaluation(
+        {},
+        {
+            "search_result": (-1, None)
+        }
+    )
+    assert math.isclose(score, 4.0)
+
+    evaluation = SearchDistanceEvaluation(max_distance=10, positive=True)
+    range_min, range_max = evaluation.range()
+    assert math.isclose(range_min, 0.0)
+    assert math.isclose(range_max, 10.0)
+
+    score = evaluation.evaluation(
+        {},
+        {
+            "search_result": (5, None)
+        }
+    )
+    assert math.isclose(score, 5.0)
+    score = evaluation.evaluation(
+        {},
+        {
+            "search_result": (20, None)
+        }
+    )
+    assert math.isclose(score, 10.0)
diff --git a/tests/unit_tests/utils/test_error.py b/tests/unit_tests/utils/test_error.py
@@ -0,0 +1,12 @@
+from gptcache.utils.error import CacheError, NotInitError, NotFoundStoreError, ParamError
+
+
+def test_error_type():
+    not_init_error = NotInitError()
+    assert issubclass(type(not_init_error), CacheError)
+
+    not_found_store_error = NotFoundStoreError("unittest", "test_error_type")
+    assert issubclass(type(not_found_store_error), CacheError)
+
+    param_error = ParamError("unittest")
+    assert issubclass(type(param_error), CacheError)