Merge pull request #869 from JohnSnowLabs/fix/model_import_error

resolved: errors in sycophancy-test, factuality-test and augmentation.
JohnSnowLabs · Nov 3, 2023 · 81d6d37 · 81d6d37
2 parents 7624772 + 6d458ba
commit 81d6d37
Show file tree

Hide file tree

Showing 8 changed files with 53 additions and 37 deletions.
diff --git a/demo/tutorials/misc/Loading_Data_with_Custom_Columns.ipynb b/demo/tutorials/misc/Loading_Data_with_Custom_Columns.ipynb
@@ -11,7 +11,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/custom_column_csv.ipynb)"
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/Loading_Data_with_Custom_Columns.ipynb)"
    ]
   },
   {
@@ -2216,7 +2216,7 @@
     "harness = Harness(task=\"question-answering\",\n",
     "                  model={\"model\":\"text-davinci-003\",\"hub\":\"openai\"},\n",
     "                  data={\"data_source\":\"SQuAD_csv.csv\",\n",
-    "                  \"feature_column\":{\"passage\": \"context\", \"question\": \"question\"},\n",
+    "                  \"feature_column\":{\"context\": \"context\", \"question\": \"question\"},\n",
     "                  \"target_column\":'answer_start',\n",
     "                  })"
    ]

diff --git a/docs/pages/tutorials/tutorials.md b/docs/pages/tutorials/tutorials.md
@@ -56,7 +56,7 @@ The following table gives an overview of the different tutorial notebooks. We ha
 | LogiQA                              | OpenAI                            | Question-Answering                | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/llm_notebooks/dataset-notebooks/LogiQA_dataset.ipynb)                       |
 | ASDiv                               | OpenAI                            | Question-Answering                | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/llm_notebooks/dataset-notebooks/ASDiv_dataset.ipynb)                        |
 | BigBench                            | OpenAI                            | Question-Answering                | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/llm_notebooks/dataset-notebooks/Bigbench_dataset.ipynb)                     |
-| HuggingFaceDataset-Support          | Hugging Face/OpenAI               | Text-Classification/Summarization | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/HuggingFace_Dataset_Notebook.ipynb)                                    |
+| HuggingFaceDataset-Support          | Hugging Face/Spacy/OpenAI               | NER/Text-Classification/Question-Answering/Summarization | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/HuggingFace_Dataset_Notebook.ipynb)                                    |
 | Augmentation-Control                | John Snow Labs                    | NER                               | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/Augmentation_Control_Notebook.ipynb)                                   |
 | Comparing Models                    | Hugging Face/John Snow Labs/Spacy | NER/Text-Classification           | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/Comparing_Models_Notebook.ipynb)                                       |
 | Runtime Test                        | Hugging Face/John Snow Labs/Spacy | NER                               | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/RuntimeTest_Notebook.ipynb)                                            |
@@ -84,7 +84,7 @@ The following table gives an overview of the different tutorial notebooks. We ha
 | Evaluation Metrics                         | OpenAI                      | Question-Answering                       | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/Evaluation_Metrics.ipynb)     |
 | Fiqa                         | OpenAI                     | Question-Answering                       | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/llm_notebooks/dataset-notebooks/Fiqa_dataset.ipynb)  |
 | Customized Model                         | Custom                     | Text-Classification                       | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/Custom_Hub_Notebook.ipynb)  |
-
+| Loading Data with Custom Columns                         | Hugging Face/OpenAI                     | NER/Text-Classification/Question-Answering/Summarization                       | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/langtest/blob/main/demo/tutorials/misc/Loading_Data_with_Custom_Columns.ipynb)  |
 
 <style>
   .heading {

diff --git a/langtest/datahandler/datasource.py b/langtest/datahandler/datasource.py
@@ -145,6 +145,7 @@ class DataFactory:
     """
 
     data_sources: Dict[str, BaseDataset] = BaseDataset.data_sources
+    CURATED_DATASETS = ["BoolQ-bias", "XSum-bias"]
 
     def __init__(self, file_path: dict, task: TaskManager, **kwargs) -> None:
         """Initializes DataFactory object.
@@ -172,6 +173,9 @@ def __init__(self, file_path: dict, task: TaskManager, **kwargs) -> None:
             elif self._file_path in ("synthetic-math-data", "synthetic-nlp-data"):
                 self.file_ext = "syntetic"
                 self._file_path = file_path
+            elif self._file_path in self.CURATED_DATASETS:
+                self.file_ext = "curated"
+                self._file_path = file_path.get("data_source")
             else:
                 self._file_path = self._load_dataset(self._file_path)
                 _, self.file_ext = os.path.splitext(self._file_path)
@@ -193,17 +197,16 @@ def load(self) -> List[Sample]:
         Returns:
             list[Sample]: Loaded text data.
         """
-        # if "data_source" in self._file_path:
-        #     if isinstance(self._file_path["data_source"], list):
-        #         return self._file_path
-        # elif isinstance(self._file_path, list):
-        #     return self._file_path
 
         if len(self._custom_label) > 1 and self.file_ext == "csv":
             self.init_cls = self.data_sources[self.file_ext.replace(".", "")](
                 self._custom_label, task=self.task, **self.kwargs
             )
-        # if
+        elif self._file_path in self.CURATED_DATASETS and self.task in (
+            "question-answering",
+            "summarization",
+        ):
+            return DataFactory.load_curated_bias(self._file_path)
         else:
             self.init_cls = self.data_sources[self.file_ext.replace(".", "")](
                 self._file_path, task=self.task, **self.kwargs

diff --git a/langtest/errors.py b/langtest/errors.py
@@ -221,6 +221,7 @@ class Errors(metaclass=ErrorsWithCodes):
             "Pipeline should be '{Pipeline}', passed model is: '{type_model}'")
     E080 = ("Invalid SpaCy Pipeline. Expected return type is {expected_type} "
             "but pipeline returns: {returned_type}")
+    E081 = ("Provded the task is not supported in the {hub} hub.")
 
 
 class ColumnNameError(Exception):

diff --git a/langtest/modelhandler/__init__.py b/langtest/modelhandler/__init__.py
@@ -7,7 +7,7 @@
     "huggingfacehub": "huggingface-inference-api",
 }
 
-INSTALLED_HUBS = []
+INSTALLED_HUBS = ["custom"]
 
 libraries = [
     ("johnsnowlabs", "langtest.modelhandler.jsl_modelhandler"),

diff --git a/langtest/modelhandler/jsl_modelhandler.py b/langtest/modelhandler/jsl_modelhandler.py
@@ -174,18 +174,20 @@ def load_model(cls, path) -> "NLUPipeline":
         Args:
             path (str): Path to pretrained local or NLP Models Hub SparkNLP model
         """
-        if os.path.exists(path):
-            if try_import_lib("johnsnowlabs"):
-                loaded_model = nlp.load(path=path)
+        if isinstance(path, str):
+            if os.path.exists(path):
+                if try_import_lib("johnsnowlabs"):
+                    loaded_model = nlp.load(path=path)
+                else:
+                    loaded_model = PipelineModel.load(path)
             else:
-                loaded_model = PipelineModel.load(path)
-        else:
-            if try_import_lib("johnsnowlabs"):
-                loaded_model = nlp.load(path)
-            else:
-                raise ValueError(Errors.E039)
+                if try_import_lib("johnsnowlabs"):
+                    loaded_model = nlp.load(path)
+                else:
+                    raise ValueError(Errors.E039)
 
-        return cls(loaded_model)
+            return cls(loaded_model)
+        return cls(path)
 
     @abstractmethod
     def predict(self, text: str, *args, **kwargs) -> Any:

diff --git a/langtest/tasks/task.py b/langtest/tasks/task.py
@@ -47,17 +47,23 @@ def load_model(cls, model_path: str, model_hub: str, *args, **kwargs):
         if model_hub not in supported_hubs:
             raise AssertionError(Errors.E042.format(supported_hubs=supported_hubs))
 
-        if model_hub in LANGCHAIN_HUBS:
-            # LLM models
-            cls.model = models["llm"][cls._name].load_model(
-                hub=model_hub, path=model_path, *args, **kwargs
-            )
-        else:
-            # JSL, Huggingface, and Spacy models
-            cls.model = models[model_hub][cls._name].load_model(
-                path=model_path, *args, **kwargs
-            )
-        return cls.model
+        if "user_prompt" in kwargs:
+            cls.user_prompt = kwargs.get("user_prompt")
+            kwargs.pop("user_prompt")
+        try:
+            if model_hub in LANGCHAIN_HUBS:
+                # LLM models
+                cls.model = models["llm"][cls._name].load_model(
+                    hub=model_hub, path=model_path, *args, **kwargs
+                )
+            else:
+                # JSL, Huggingface, and Spacy models
+                cls.model = models[model_hub][cls._name].load_model(
+                    path=model_path, *args, **kwargs
+                )
+            return cls.model
+        except TypeError:
+            raise ValueError(Errors.E081.format(hub=model_hub))
 
     @classmethod
     def __init_subclass__(cls, **kwargs):
@@ -595,7 +601,7 @@ def create_sample(
 class FactualityTest(BaseTask):
     """Factuality task."""
 
-    _name = "factuality"
+    _name = "factualitytest"
     _default_col = {
         "article_sent": ["article_sent"],
         "correct_sent": ["correct_sent"],

diff --git a/langtest/utils/custom_types/sample.py b/langtest/utils/custom_types/sample.py
@@ -2379,9 +2379,7 @@ def prompt_eval(self):
                 answer_key="answer",
                 prediction_key="text",
             )
-            if (graded_outputs1[0]["text"].strip() == "CORRECT") and (
-                graded_outputs2[0]["text"].strip() == "CORRECT"
-            ):
+            if self.output(graded_outputs1) and self.output(graded_outputs2):
                 return True
             else:
                 return False
@@ -2406,7 +2404,7 @@ def prompt_eval(self):
                 answer_key="answer",
                 prediction_key="text",
             )
-            return graded_outputs[0]["text"].strip() == "CORRECT"
+            return self.output(graded_outputs)
 
     def is_pass_with_ground_truth(self) -> bool:
         """
@@ -2501,6 +2499,12 @@ def run(self, model, **kwargs):
 
         return True
 
+    def output(self, graded_outputs):
+        """
+        Check if the output is correct.
+        """
+        return list(graded_outputs[0].values())[0].replace("\n", "").strip() == "CORRECT"
+
 
 Sample = TypeVar(
     "Sample",