Merge pull request #1161 from JohnSnowLabs/release/2.6.0

vulnerabilities and security issues
JohnSnowLabs · Jan 6, 2025 · 2b4f018 · 2b4f018
2 parents c457432 + 8cc3b2f
commit 2b4f018
Show file tree

Hide file tree

Showing 24 changed files with 713 additions and 527 deletions.
diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock
@@ -7,30 +7,35 @@ GEM
       minitest (~> 5.1)
       tzinfo (~> 1.1)
       zeitwerk (~> 2.2, >= 2.2.2)
-    addressable (2.8.1)
-      public_suffix (>= 2.0.2, < 6.0)
+    addressable (2.8.7)
+      public_suffix (>= 2.0.2, < 7.0)
+    base64 (0.2.0)
     coffee-script (2.4.1)
       coffee-script-source
       execjs
     coffee-script-source (1.11.1)
     colorator (1.1.0)
-    commonmarker (0.23.8)
-    concurrent-ruby (1.2.0)
-    dnsruby (1.61.9)
-      simpleidn (~> 0.1)
+    commonmarker (0.23.11)
+    concurrent-ruby (1.3.4)
+    dnsruby (1.72.3)
+      base64 (~> 0.2.0)
+      simpleidn (~> 0.2.1)
     em-websocket (0.5.3)
       eventmachine (>= 0.12.9)
       http_parser.rb (~> 0)
     ethon (0.16.0)
       ffi (>= 1.15.0)
     eventmachine (1.2.7)
-    eventmachine (1.2.7-x64-mingw32)
-    execjs (2.8.1)
-    faraday (2.7.4)
-      faraday-net_http (>= 2.0, < 3.1)
-      ruby2_keywords (>= 0.0.4)
-    faraday-net_http (3.0.2)
-    ffi (1.15.5)
+    execjs (2.10.0)
+    faraday (2.12.2)
+      faraday-net_http (>= 2.0, < 3.5)
+      json
+      logger
+    faraday-net_http (3.4.0)
+      net-http (>= 0.5.0)
+    ffi (1.17.1)
+    ffi (1.17.1-arm64-darwin)
+    ffi (1.17.1-x86_64-darwin)
     forwardable-extended (2.6.0)
     gemoji (3.0.1)
     github-pages (226)
@@ -198,37 +203,42 @@ GEM
       gemoji (~> 3.0)
       html-pipeline (~> 2.2)
       jekyll (>= 3.0, < 5.0)
+    json (2.9.1)
     kramdown (2.3.2)
       rexml
     kramdown-parser-gfm (1.1.0)
       kramdown (~> 2.0)
     liquid (4.0.3)
-    listen (3.8.0)
+    listen (3.9.0)
       rb-fsevent (~> 0.10, >= 0.10.3)
       rb-inotify (~> 0.9, >= 0.9.10)
+    logger (1.6.4)
     mercenary (0.3.6)
-    mini_portile2 (2.8.1)
     minima (2.5.1)
       jekyll (>= 3.5, < 5.0)
       jekyll-feed (~> 0.9)
       jekyll-seo-tag (~> 2.1)
-    minitest (5.17.0)
-    nokogiri (1.14.1)
-      mini_portile2 (~> 2.8.0)
+    minitest (5.25.4)
+    net-http (0.6.0)
+      uri
+    nokogiri (1.17.2-arm64-darwin)
+      racc (~> 1.4)
+    nokogiri (1.17.2-x86_64-darwin)
+      racc (~> 1.4)
+    nokogiri (1.17.2-x86_64-linux)
       racc (~> 1.4)
     octokit (4.25.1)
       faraday (>= 1, < 3)
       sawyer (~> 0.9)
     pathutil (0.16.2)
       forwardable-extended (~> 2.6)
     public_suffix (4.0.7)
-    racc (1.6.2)
+    racc (1.8.1)
     rb-fsevent (0.11.2)
-    rb-inotify (0.10.1)
+    rb-inotify (0.11.1)
       ffi (~> 1.0)
-    rexml (3.2.5)
+    rexml (3.4.0)
     rouge (3.26.0)
-    ruby2_keywords (0.0.5)
     rubyzip (2.3.2)
     safe_yaml (1.0.5)
     sass (3.7.4)
@@ -239,33 +249,27 @@ GEM
     sawyer (0.9.2)
       addressable (>= 2.3.5)
       faraday (>= 0.17.3, < 3)
-    simpleidn (0.2.1)
-      unf (~> 0.1.4)
+    simpleidn (0.2.3)
     terminal-table (1.8.0)
       unicode-display_width (~> 1.1, >= 1.1.1)
     thread_safe (0.3.6)
-    typhoeus (1.4.0)
+    typhoeus (1.4.1)
       ethon (>= 0.9.0)
     tzinfo (1.2.11)
       thread_safe (~> 0.1)
-    unf (0.1.4)
-      unf_ext
-    unf_ext (0.0.8.2)
     unicode-display_width (1.8.0)
-    webrick (1.7.0)
-    zeitwerk (2.6.6)
+    uri (1.0.2)
+    webrick (1.9.1)
+    zeitwerk (2.6.18)
 
 PLATFORMS
-  x64-mingw-ucrt
-  x64-mingw32
-  x86_64-darwin-20
-  x86_64-darwin-21
-  x86_64-darwin-22
+  arm64-darwin
+  x86_64-darwin
   x86_64-linux
 
 DEPENDENCIES
   github-pages (= 226)
   webrick (~> 1.7)
 
 BUNDLED WITH
-   2.3.24
+   2.5.16
diff --git a/langtest/augmentation/base.py b/langtest/augmentation/base.py
@@ -402,9 +402,7 @@ def fix(
         new_data = (
             data.copy()
             if isinstance(data, (pd.DataFrame, pd.Series))
-            else copy.deepcopy(data)
-            if append_original
-            else []
+            else copy.deepcopy(data) if append_original else []
         )
         self.__search_results = self.search_sample_results(data)
 

diff --git a/langtest/evaluation/__init__.py b/langtest/evaluation/__init__.py
@@ -77,7 +77,6 @@ async def aevaluate(
             return EvaluationResult(0.0, 0.0, 0.0, 0.0, 0.0)
 
     class LangtestRetrieverEvaluator(RetrieverEvaluator):
-
         """
         A class for evaluating the performance of a retriever model against a set of test cases and configurations.
 

diff --git a/langtest/langtest.py b/langtest/langtest.py
@@ -437,9 +437,9 @@ def load_checkpoints(cls, task, model, save_checkpoints_dir: str) -> "Harness":
                     checkpoint_folder=dataset_checkpoint_dir
                 )
                 harness._checkpoints[dataset_name] = checkpoint_manager.load_checkpoint()
-                harness._testcases[
-                    dataset_name
-                ] = checkpoint_manager.load_remaining_batch()
+                harness._testcases[dataset_name] = (
+                    checkpoint_manager.load_remaining_batch()
+                )
                 harness.batches[dataset_name] = checkpoint_manager.load_batches()
 
         elif isinstance(model, dict):
@@ -458,12 +458,12 @@ def load_checkpoints(cls, task, model, save_checkpoints_dir: str) -> "Harness":
                 checkpoint_manager = CheckpointManager(
                     checkpoint_folder=model_checkpoint_dir
                 )
-                harness._checkpoints[
-                    model_name["model"]
-                ] = checkpoint_manager.load_checkpoint()
-                harness._testcases[
-                    model_name["model"]
-                ] = checkpoint_manager.load_remaining_batch()
+                harness._checkpoints[model_name["model"]] = (
+                    checkpoint_manager.load_checkpoint()
+                )
+                harness._testcases[model_name["model"]] = (
+                    checkpoint_manager.load_remaining_batch()
+                )
                 harness.batches[model_name["model"]] = checkpoint_manager.load_batches()
         return harness
 

diff --git a/langtest/metrics/string_distance.py b/langtest/metrics/string_distance.py
@@ -4,7 +4,6 @@
 
 
 class StringDistance:
-
     """
     A class for calculating various string distance metrics.
     """

diff --git a/langtest/modelhandler/__init__.py b/langtest/modelhandler/__init__.py
@@ -36,9 +36,11 @@
     import langchain
 
     LANGCHAIN_HUBS = {
-        RENAME_HUBS.get(hub.lower(), hub.lower())
-        if hub.lower() in RENAME_HUBS
-        else hub.lower(): hub
+        (
+            RENAME_HUBS.get(hub.lower(), hub.lower())
+            if hub.lower() in RENAME_HUBS
+            else hub.lower()
+        ): hub
         for hub in langchain.llms.__all__
     }
     INSTALLED_HUBS += list(LANGCHAIN_HUBS.keys())

diff --git a/langtest/modelhandler/modelhandler.py b/langtest/modelhandler/modelhandler.py
@@ -17,9 +17,11 @@
     import langchain.llms
 
     LANGCHAIN_HUBS = {
-        RENAME_HUBS.get(hub.lower(), hub.lower())
-        if hub.lower() in RENAME_HUBS
-        else hub.lower(): hub
+        (
+            RENAME_HUBS.get(hub.lower(), hub.lower())
+            if hub.lower() in RENAME_HUBS
+            else hub.lower()
+        ): hub
         for hub in langchain.llms.__all__
     }
 else:

diff --git a/langtest/tasks/task.py b/langtest/tasks/task.py
@@ -249,12 +249,17 @@ def create_sample(
                     word=token,
                     start=cursor,
                     end=cursor + len(token),
-                    pos_tag=row_data[column_mapper[pos_tag]][token_indx]
-                    if pos_tag in column_mapper and column_mapper[pos_tag] in row_data
-                    else None,
-                    chunk_tag=row_data[column_mapper[chunk_tag]][token_indx]
-                    if chunk_tag in column_mapper and column_mapper[chunk_tag] in row_data
-                    else None,
+                    pos_tag=(
+                        row_data[column_mapper[pos_tag]][token_indx]
+                        if pos_tag in column_mapper and column_mapper[pos_tag] in row_data
+                        else None
+                    ),
+                    chunk_tag=(
+                        row_data[column_mapper[chunk_tag]][token_indx]
+                        if chunk_tag in column_mapper
+                        and column_mapper[chunk_tag] in row_data
+                        else None
+                    ),
                 )
             )
             cursor += len(token) + 1  # +1 to account for the white space
@@ -302,9 +307,11 @@ def create_sample(
             if not isinstance(labels, list):
                 labels = [str(labels)]
             labels = [
-                samples.SequenceLabel(label=label, score=1.0)
-                if isinstance(label, str)
-                else label
+                (
+                    samples.SequenceLabel(label=label, score=1.0)
+                    if isinstance(label, str)
+                    else label
+                )
                 for label in labels
             ]
         else:

diff --git a/langtest/transform/accuracy.py b/langtest/transform/accuracy.py
@@ -235,9 +235,11 @@ def predict_summarization(sample):
 
         if kwargs["is_default"]:
             y_pred = y_pred.apply(
-                lambda x: "1"
-                if x in ["pos", "LABEL_1", "POS"]
-                else ("0" if x in ["neg", "LABEL_0", "NEG"] else x)
+                lambda x: (
+                    "1"
+                    if x in ["pos", "LABEL_1", "POS"]
+                    else ("0" if x in ["neg", "LABEL_0", "NEG"] else x)
+                )
             )
 
         supported_tests = cls.available_tests()

diff --git a/langtest/transform/fairness.py b/langtest/transform/fairness.py
@@ -228,9 +228,11 @@ def predict_summarization(sample: Sample):
 
                 if kwargs["is_default"]:
                     y_pred = y_pred.apply(
-                        lambda x: "1"
-                        if x in ["pos", "LABEL_1", "POS"]
-                        else ("0" if x in ["neg", "LABEL_0", "NEG"] else x)
+                        lambda x: (
+                            "1"
+                            if x in ["pos", "LABEL_1", "POS"]
+                            else ("0" if x in ["neg", "LABEL_0", "NEG"] else x)
+                        )
                     )
 
                 grouped_label[gender] = [y_true, y_pred]

diff --git a/langtest/transform/grammar.py b/langtest/transform/grammar.py
@@ -67,13 +67,15 @@ def transform(self) -> List[Sample]:
 
             if str(TestFactory.task) in ("question-answering"):
                 _ = [
-                    sample.transform(
-                        test_func,
-                        params.get("parameters", {}),
-                        prob=params.pop("prob", 1.0),
+                    (
+                        sample.transform(
+                            test_func,
+                            params.get("parameters", {}),
+                            prob=params.pop("prob", 1.0),
+                        )
+                        if hasattr(sample, "transform")
+                        else sample
                     )
-                    if hasattr(sample, "transform")
-                    else sample
                     for sample in data_handler_copy
                 ]
                 transformed_samples = data_handler_copy

diff --git a/langtest/transform/robustness.py b/langtest/transform/robustness.py
@@ -132,13 +132,15 @@ def transform(self) -> List[Sample]:
                 and test_name != "multiple_perturbations"
             ):
                 _ = [
-                    sample.transform(
-                        test_func,
-                        params.get("parameters", {}),
-                        prob=params.pop("prob", 1.0),
+                    (
+                        sample.transform(
+                            test_func,
+                            params.get("parameters", {}),
+                            prob=params.pop("prob", 1.0),
+                        )
+                        if hasattr(sample, "transform")
+                        else sample
                     )
-                    if hasattr(sample, "transform")
-                    else sample
                     for sample in data_handler_copy
                 ]
                 transformed_samples = data_handler_copy
@@ -163,24 +165,28 @@ def transform(self) -> List[Sample]:
                                 "parameters"
                             ] = {"accent_map": {v: k for k, v in A2B_DICT.items()}}
                         _ = [
-                            sample.transform(
-                                func=test_func,
-                                params=self.tests,
-                                prob=prob,
-                                perturbations=perturbations,
+                            (
+                                sample.transform(
+                                    func=test_func,
+                                    params=self.tests,
+                                    prob=prob,
+                                    perturbations=perturbations,
+                                )
+                                if hasattr(sample, "transform")
+                                else sample
                             )
-                            if hasattr(sample, "transform")
-                            else sample
                             for sample in data_handler_copy
                         ]
                         transformed_samples_perturbation = copy.deepcopy(
                             data_handler_copy
                         )  # Create a deep copy
                         if perturbation_number != "":
                             test_type = "-".join(
-                                str(perturbation)
-                                if not isinstance(perturbation, dict)
-                                else next(iter(perturbation))
+                                (
+                                    str(perturbation)
+                                    if not isinstance(perturbation, dict)
+                                    else next(iter(perturbation))
+                                )
                                 for perturbation in perturbations
                             )
                             for sample in transformed_samples_perturbation:
@@ -219,9 +225,11 @@ def transform(self) -> List[Sample]:
 
                         if perturbation_number != "":
                             test_type = "-".join(
-                                str(perturbation)
-                                if not isinstance(perturbation, dict)
-                                else next(iter(perturbation))
+                                (
+                                    str(perturbation)
+                                    if not isinstance(perturbation, dict)
+                                    else next(iter(perturbation))
+                                )
                                 for perturbation in perturbations
                             )
                             for sample in transformed_samples_perturbation:
-Original file line number
+Diff line change
@@ Expand Up / @@ -77,7 +77,6 @@ async def aevaluate( @@
                 return EvaluationResult(0.0, 0.0, 0.0, 0.0, 0.0)
         class LangtestRetrieverEvaluator(RetrieverEvaluator):
             """
             A class for evaluating the performance of a retriever model against a set of test cases and configurations.
@@ Expand Down @@