Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hot-fixes/QA evaluation and llm senetivity test #831

Merged
merged 6 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions langtest/embeddings/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +0,0 @@
from .huggingface import HuggingfaceEmbeddings
from .openai import OpenAIEmbeddings

embedding_info = {
"openai": {"class": OpenAIEmbeddings, "default_model": "text-embedding-ada-002"},
"huggingface": {
"class": HuggingfaceEmbeddings,
"default_model": "sentence-transformers/all-mpnet-base-v2",
},
}
2 changes: 1 addition & 1 deletion langtest/embeddings/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class HuggingfaceEmbeddings:

def __init__(
self,
model: str,
model: str = "sentence-transformers/all-mpnet-base-v2",
):
"""Constructor method

Expand Down
3 changes: 2 additions & 1 deletion langtest/embeddings/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
from tenacity import retry, wait_random_exponential, stop_after_attempt


class OpenAIEmbeddings:
class OpenaiEmbeddings:
LIB_NAME = "openai"

def __init__(self, model="text-embedding-ada-002"):
self.model = model
self.api_key = os.environ.get("OPENAI_API_KEY")
self.openai = None
self._check_openai_package()
if not self.api_key:
raise ValueError(
Expand Down
9 changes: 6 additions & 3 deletions langtest/modelhandler/llm_modelhandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from ..metrics import EmbeddingDistance
from langchain import OpenAI
from ..embeddings import OpenAIEmbeddings
import os
from langtest.transform.utils import compare_generations_overlap

Expand Down Expand Up @@ -243,7 +242,7 @@ def __init__(self, model: str):
self.model, self.embeddings_model = model

@classmethod
def load_model(cls, path: str) -> tuple:
def load_model(cls, path: str, *args, **kwargs) -> tuple:
"""
Load the pretrained language model and embeddings model from a given path.

Expand All @@ -257,12 +256,16 @@ def load_model(cls, path: str) -> tuple:
ValueError: If the 'OPENAI_API_KEY' environment variable is not set.
"""
try:
from ..embeddings.openai import OpenaiEmbeddings

llm = OpenAI(
model_name=path,
temperature=0,
openai_api_key=os.environ["OPENAI_API_KEY"],
*args,
**kwargs,
)
embeddings_model = OpenAIEmbeddings(model="text-embedding-ada-002")
embeddings_model = OpenaiEmbeddings(model="text-embedding-ada-002")
return llm, embeddings_model
except KeyError:
raise ValueError("The 'OPENAI_API_KEY' environment variable is not set.")
Expand Down
27 changes: 19 additions & 8 deletions langtest/utils/custom_types/sample.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import string
import importlib
from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union, Callable
from copy import deepcopy
from pydantic import BaseModel, PrivateAttr, validator, Field
Expand Down Expand Up @@ -502,7 +503,11 @@ def is_pass_embedding_distance(self):
"""Check if the sample passes based on embedding distance."""

from ...metrics import EmbeddingDistance
from ...embeddings import embedding_info

embedding_info = {
"openai": {"default_model": "text-embedding-ada-002"},
"huggingface": {"default_model": "sentence-transformers/all-mpnet-base-v2"},
}

default_threshold = {
"cosine": {"threshold": 0.80, "comparison": lambda a, b: a >= b},
Expand All @@ -516,14 +521,20 @@ def is_pass_embedding_distance(self):
hub_name = embeddings.get("hub", "openai")
evaluations = self.config["evaluation"]
selected_metric = evaluations.get("distance", "cosine")
module_name = f"langtest.embeddings.{hub_name}"
class_name = f"{hub_name.capitalize()}Embeddings"

try:
module = importlib.import_module(module_name)
embeddings_class = getattr(module, class_name)

if hub_name not in embedding_info:
raise ValueError(f"Unsupported hub: {hub_name}")
except (ModuleNotFoundError, AttributeError):
raise ValueError(f"No {hub_name} embeddings class found")

if selected_metric not in EmbeddingDistance.available_embedding_distance:
raise ValueError(f"Unsupported distance metric: {selected_metric}")

model = embedding_info[hub_name]["class"](
model = embeddings_class(
model=embeddings.get("model", embedding_info[hub_name]["default_model"])
)

Expand Down Expand Up @@ -1051,7 +1062,7 @@ def _is_eval(self) -> Tuple[bool, float]:
if self.test_case == self.actual_results.translation_text:
return False, 1
else:
from ...embeddings import HuggingfaceEmbeddings
from ...embeddings.huggingface import HuggingfaceEmbeddings

model = HuggingfaceEmbeddings(
model="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
Expand Down Expand Up @@ -1237,7 +1248,7 @@ def is_pass(self):
def _is_eval(self) -> bool:
""""""

from ...embeddings import HuggingfaceEmbeddings
from ...embeddings.huggingface import HuggingfaceEmbeddings

model = HuggingfaceEmbeddings(
model="pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb"
Expand Down Expand Up @@ -1394,7 +1405,7 @@ def _is_eval(self) -> bool:
evaluation = harness_config.get("evaluation", {"threshold": 0.85})
threshold = evaluation["threshold"]

from ...embeddings import HuggingfaceEmbeddings
from ...embeddings.huggingface import HuggingfaceEmbeddings

model = HuggingfaceEmbeddings(
model="sentence-transformers/distiluse-base-multilingual-cased-v2"
Expand Down Expand Up @@ -1959,7 +1970,7 @@ def _is_eval(self) -> bool:

evaluation = harness_config.get("evaluation", {"threshold": 0.85})

from ...embeddings import HuggingfaceEmbeddings
from ...embeddings.huggingface import HuggingfaceEmbeddings

model = HuggingfaceEmbeddings(
model="sentence-transformers/distiluse-base-multilingual-cased-v2"
Expand Down
2 changes: 1 addition & 1 deletion tests/test_HuggingfaceEmbeddings.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest
import torch
import numpy as np
from langtest.embeddings import HuggingfaceEmbeddings
from langtest.embeddings.huggingface import HuggingfaceEmbeddings


class TestHuggingfaceEmbeddings(unittest.TestCase):
Expand Down