JohnSnowLabs · chakravarthik27 · Oct 18, 2023 · Oct 18, 2023 · Oct 18, 2023 · Oct 18, 2023
diff --git a/langtest/embeddings/__init__.py b/langtest/embeddings/__init__.py
@@ -1,10 +0,0 @@
-from .huggingface import HuggingfaceEmbeddings
-from .openai import OpenAIEmbeddings
-
-embedding_info = {
-    "openai": {"class": OpenAIEmbeddings, "default_model": "text-embedding-ada-002"},
-    "huggingface": {
-        "class": HuggingfaceEmbeddings,
-        "default_model": "sentence-transformers/all-mpnet-base-v2",
-    },
-}

diff --git a/langtest/embeddings/huggingface.py b/langtest/embeddings/huggingface.py
@@ -15,7 +15,7 @@ class HuggingfaceEmbeddings:
 
     def __init__(
         self,
-        model: str,
+        model: str = "sentence-transformers/all-mpnet-base-v2",
     ):
         """Constructor method
 

diff --git a/langtest/embeddings/openai.py b/langtest/embeddings/openai.py
@@ -5,12 +5,13 @@
 from tenacity import retry, wait_random_exponential, stop_after_attempt
 
 
-class OpenAIEmbeddings:
+class OpenaiEmbeddings:
     LIB_NAME = "openai"
 
     def __init__(self, model="text-embedding-ada-002"):
         self.model = model
         self.api_key = os.environ.get("OPENAI_API_KEY")
+        self.openai = None
         self._check_openai_package()
         if not self.api_key:
             raise ValueError(

diff --git a/langtest/modelhandler/llm_modelhandler.py b/langtest/modelhandler/llm_modelhandler.py
@@ -7,7 +7,6 @@
 
 from ..metrics import EmbeddingDistance
 from langchain import OpenAI
-from ..embeddings import OpenAIEmbeddings
 import os
 from langtest.transform.utils import compare_generations_overlap
 
@@ -243,7 +242,7 @@ def __init__(self, model: str):
         self.model, self.embeddings_model = model
 
     @classmethod
-    def load_model(cls, path: str) -> tuple:
+    def load_model(cls, path: str, *args, **kwargs) -> tuple:
         """
         Load the pretrained language model and embeddings model from a given path.
 
@@ -257,12 +256,16 @@ def load_model(cls, path: str) -> tuple:
             ValueError: If the 'OPENAI_API_KEY' environment variable is not set.
         """
         try:
+            from ..embeddings.openai import OpenaiEmbeddings
+
             llm = OpenAI(
                 model_name=path,
                 temperature=0,
                 openai_api_key=os.environ["OPENAI_API_KEY"],
+                *args,
+                **kwargs,
             )
-            embeddings_model = OpenAIEmbeddings(model="text-embedding-ada-002")
+            embeddings_model = OpenaiEmbeddings(model="text-embedding-ada-002")
             return llm, embeddings_model
         except KeyError:
             raise ValueError("The 'OPENAI_API_KEY' environment variable is not set.")

diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py
@@ -1,5 +1,6 @@
 import re
 import string
+import importlib
 from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union, Callable
 from copy import deepcopy
 from pydantic import BaseModel, PrivateAttr, validator, Field
@@ -502,7 +503,11 @@ def is_pass_embedding_distance(self):
         """Check if the sample passes based on embedding distance."""
 
         from ...metrics import EmbeddingDistance
-        from ...embeddings import embedding_info
+
+        embedding_info = {
+            "openai": {"default_model": "text-embedding-ada-002"},
+            "huggingface": {"default_model": "sentence-transformers/all-mpnet-base-v2"},
+        }
 
         default_threshold = {
             "cosine": {"threshold": 0.80, "comparison": lambda a, b: a >= b},
@@ -516,14 +521,20 @@ def is_pass_embedding_distance(self):
         hub_name = embeddings.get("hub", "openai")
         evaluations = self.config["evaluation"]
         selected_metric = evaluations.get("distance", "cosine")
+        module_name = f"langtest.embeddings.{hub_name}"
+        class_name = f"{hub_name.capitalize()}Embeddings"
+
+        try:
+            module = importlib.import_module(module_name)
+            embeddings_class = getattr(module, class_name)
 
-        if hub_name not in embedding_info:
-            raise ValueError(f"Unsupported hub: {hub_name}")
+        except (ModuleNotFoundError, AttributeError):
+            raise ValueError(f"No {hub_name} embeddings class found")
 
         if selected_metric not in EmbeddingDistance.available_embedding_distance:
             raise ValueError(f"Unsupported distance metric: {selected_metric}")
 
-        model = embedding_info[hub_name]["class"](
+        model = embeddings_class(
             model=embeddings.get("model", embedding_info[hub_name]["default_model"])
         )
 
@@ -1051,7 +1062,7 @@ def _is_eval(self) -> Tuple[bool, float]:
         if self.test_case == self.actual_results.translation_text:
             return False, 1
         else:
-            from ...embeddings import HuggingfaceEmbeddings
+            from ...embeddings.huggingface import HuggingfaceEmbeddings
 
             model = HuggingfaceEmbeddings(
                 model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
@@ -1237,7 +1248,7 @@ def is_pass(self):
     def _is_eval(self) -> bool:
         """"""
 
-        from ...embeddings import HuggingfaceEmbeddings
+        from ...embeddings.huggingface import HuggingfaceEmbeddings
 
         model = HuggingfaceEmbeddings(
             model="pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb"
@@ -1394,7 +1405,7 @@ def _is_eval(self) -> bool:
         evaluation = harness_config.get("evaluation", {"threshold": 0.85})
         threshold = evaluation["threshold"]
 
-        from ...embeddings import HuggingfaceEmbeddings
+        from ...embeddings.huggingface import HuggingfaceEmbeddings
 
         model = HuggingfaceEmbeddings(
             model="sentence-transformers/distiluse-base-multilingual-cased-v2"
@@ -1959,7 +1970,7 @@ def _is_eval(self) -> bool:
 
                 evaluation = harness_config.get("evaluation", {"threshold": 0.85})
 
-                from ...embeddings import HuggingfaceEmbeddings
+                from ...embeddings.huggingface import HuggingfaceEmbeddings
 
                 model = HuggingfaceEmbeddings(
                     model="sentence-transformers/distiluse-base-multilingual-cased-v2"

diff --git a/tests/test_HuggingfaceEmbeddings.py b/tests/test_HuggingfaceEmbeddings.py
@@ -1,7 +1,7 @@
 import unittest
 import torch
 import numpy as np
-from langtest.embeddings import HuggingfaceEmbeddings
+from langtest.embeddings.huggingface import HuggingfaceEmbeddings
 
 
 class TestHuggingfaceEmbeddings(unittest.TestCase):