Skip to content

Commit

Permalink
Merge pull request #1161 from JohnSnowLabs/release/2.6.0
Browse files Browse the repository at this point in the history
vulnerabilities and security issues
  • Loading branch information
chakravarthik27 authored Jan 6, 2025
2 parents c457432 + 8cc3b2f commit 2b4f018
Show file tree
Hide file tree
Showing 24 changed files with 713 additions and 527 deletions.
76 changes: 40 additions & 36 deletions docs/Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,35 @@ GEM
minitest (~> 5.1)
tzinfo (~> 1.1)
zeitwerk (~> 2.2, >= 2.2.2)
addressable (2.8.1)
public_suffix (>= 2.0.2, < 6.0)
addressable (2.8.7)
public_suffix (>= 2.0.2, < 7.0)
base64 (0.2.0)
coffee-script (2.4.1)
coffee-script-source
execjs
coffee-script-source (1.11.1)
colorator (1.1.0)
commonmarker (0.23.8)
concurrent-ruby (1.2.0)
dnsruby (1.61.9)
simpleidn (~> 0.1)
commonmarker (0.23.11)
concurrent-ruby (1.3.4)
dnsruby (1.72.3)
base64 (~> 0.2.0)
simpleidn (~> 0.2.1)
em-websocket (0.5.3)
eventmachine (>= 0.12.9)
http_parser.rb (~> 0)
ethon (0.16.0)
ffi (>= 1.15.0)
eventmachine (1.2.7)
eventmachine (1.2.7-x64-mingw32)
execjs (2.8.1)
faraday (2.7.4)
faraday-net_http (>= 2.0, < 3.1)
ruby2_keywords (>= 0.0.4)
faraday-net_http (3.0.2)
ffi (1.15.5)
execjs (2.10.0)
faraday (2.12.2)
faraday-net_http (>= 2.0, < 3.5)
json
logger
faraday-net_http (3.4.0)
net-http (>= 0.5.0)
ffi (1.17.1)
ffi (1.17.1-arm64-darwin)
ffi (1.17.1-x86_64-darwin)
forwardable-extended (2.6.0)
gemoji (3.0.1)
github-pages (226)
Expand Down Expand Up @@ -198,37 +203,42 @@ GEM
gemoji (~> 3.0)
html-pipeline (~> 2.2)
jekyll (>= 3.0, < 5.0)
json (2.9.1)
kramdown (2.3.2)
rexml
kramdown-parser-gfm (1.1.0)
kramdown (~> 2.0)
liquid (4.0.3)
listen (3.8.0)
listen (3.9.0)
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
logger (1.6.4)
mercenary (0.3.6)
mini_portile2 (2.8.1)
minima (2.5.1)
jekyll (>= 3.5, < 5.0)
jekyll-feed (~> 0.9)
jekyll-seo-tag (~> 2.1)
minitest (5.17.0)
nokogiri (1.14.1)
mini_portile2 (~> 2.8.0)
minitest (5.25.4)
net-http (0.6.0)
uri
nokogiri (1.17.2-arm64-darwin)
racc (~> 1.4)
nokogiri (1.17.2-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.17.2-x86_64-linux)
racc (~> 1.4)
octokit (4.25.1)
faraday (>= 1, < 3)
sawyer (~> 0.9)
pathutil (0.16.2)
forwardable-extended (~> 2.6)
public_suffix (4.0.7)
racc (1.6.2)
racc (1.8.1)
rb-fsevent (0.11.2)
rb-inotify (0.10.1)
rb-inotify (0.11.1)
ffi (~> 1.0)
rexml (3.2.5)
rexml (3.4.0)
rouge (3.26.0)
ruby2_keywords (0.0.5)
rubyzip (2.3.2)
safe_yaml (1.0.5)
sass (3.7.4)
Expand All @@ -239,33 +249,27 @@ GEM
sawyer (0.9.2)
addressable (>= 2.3.5)
faraday (>= 0.17.3, < 3)
simpleidn (0.2.1)
unf (~> 0.1.4)
simpleidn (0.2.3)
terminal-table (1.8.0)
unicode-display_width (~> 1.1, >= 1.1.1)
thread_safe (0.3.6)
typhoeus (1.4.0)
typhoeus (1.4.1)
ethon (>= 0.9.0)
tzinfo (1.2.11)
thread_safe (~> 0.1)
unf (0.1.4)
unf_ext
unf_ext (0.0.8.2)
unicode-display_width (1.8.0)
webrick (1.7.0)
zeitwerk (2.6.6)
uri (1.0.2)
webrick (1.9.1)
zeitwerk (2.6.18)

PLATFORMS
x64-mingw-ucrt
x64-mingw32
x86_64-darwin-20
x86_64-darwin-21
x86_64-darwin-22
arm64-darwin
x86_64-darwin
x86_64-linux

DEPENDENCIES
github-pages (= 226)
webrick (~> 1.7)

BUNDLED WITH
2.3.24
2.5.16
4 changes: 1 addition & 3 deletions langtest/augmentation/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,9 +402,7 @@ def fix(
new_data = (
data.copy()
if isinstance(data, (pd.DataFrame, pd.Series))
else copy.deepcopy(data)
if append_original
else []
else copy.deepcopy(data) if append_original else []
)
self.__search_results = self.search_sample_results(data)

Expand Down
1 change: 0 additions & 1 deletion langtest/evaluation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ async def aevaluate(
return EvaluationResult(0.0, 0.0, 0.0, 0.0, 0.0)

class LangtestRetrieverEvaluator(RetrieverEvaluator):

"""
A class for evaluating the performance of a retriever model against a set of test cases and configurations.
Expand Down
18 changes: 9 additions & 9 deletions langtest/langtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,9 @@ def load_checkpoints(cls, task, model, save_checkpoints_dir: str) -> "Harness":
checkpoint_folder=dataset_checkpoint_dir
)
harness._checkpoints[dataset_name] = checkpoint_manager.load_checkpoint()
harness._testcases[
dataset_name
] = checkpoint_manager.load_remaining_batch()
harness._testcases[dataset_name] = (
checkpoint_manager.load_remaining_batch()
)
harness.batches[dataset_name] = checkpoint_manager.load_batches()

elif isinstance(model, dict):
Expand All @@ -458,12 +458,12 @@ def load_checkpoints(cls, task, model, save_checkpoints_dir: str) -> "Harness":
checkpoint_manager = CheckpointManager(
checkpoint_folder=model_checkpoint_dir
)
harness._checkpoints[
model_name["model"]
] = checkpoint_manager.load_checkpoint()
harness._testcases[
model_name["model"]
] = checkpoint_manager.load_remaining_batch()
harness._checkpoints[model_name["model"]] = (
checkpoint_manager.load_checkpoint()
)
harness._testcases[model_name["model"]] = (
checkpoint_manager.load_remaining_batch()
)
harness.batches[model_name["model"]] = checkpoint_manager.load_batches()
return harness

Expand Down
1 change: 0 additions & 1 deletion langtest/metrics/string_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@


class StringDistance:

"""
A class for calculating various string distance metrics.
"""
Expand Down
8 changes: 5 additions & 3 deletions langtest/modelhandler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@
import langchain

LANGCHAIN_HUBS = {
RENAME_HUBS.get(hub.lower(), hub.lower())
if hub.lower() in RENAME_HUBS
else hub.lower(): hub
(
RENAME_HUBS.get(hub.lower(), hub.lower())
if hub.lower() in RENAME_HUBS
else hub.lower()
): hub
for hub in langchain.llms.__all__
}
INSTALLED_HUBS += list(LANGCHAIN_HUBS.keys())
Expand Down
8 changes: 5 additions & 3 deletions langtest/modelhandler/modelhandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
import langchain.llms

LANGCHAIN_HUBS = {
RENAME_HUBS.get(hub.lower(), hub.lower())
if hub.lower() in RENAME_HUBS
else hub.lower(): hub
(
RENAME_HUBS.get(hub.lower(), hub.lower())
if hub.lower() in RENAME_HUBS
else hub.lower()
): hub
for hub in langchain.llms.__all__
}
else:
Expand Down
25 changes: 16 additions & 9 deletions langtest/tasks/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,12 +249,17 @@ def create_sample(
word=token,
start=cursor,
end=cursor + len(token),
pos_tag=row_data[column_mapper[pos_tag]][token_indx]
if pos_tag in column_mapper and column_mapper[pos_tag] in row_data
else None,
chunk_tag=row_data[column_mapper[chunk_tag]][token_indx]
if chunk_tag in column_mapper and column_mapper[chunk_tag] in row_data
else None,
pos_tag=(
row_data[column_mapper[pos_tag]][token_indx]
if pos_tag in column_mapper and column_mapper[pos_tag] in row_data
else None
),
chunk_tag=(
row_data[column_mapper[chunk_tag]][token_indx]
if chunk_tag in column_mapper
and column_mapper[chunk_tag] in row_data
else None
),
)
)
cursor += len(token) + 1 # +1 to account for the white space
Expand Down Expand Up @@ -302,9 +307,11 @@ def create_sample(
if not isinstance(labels, list):
labels = [str(labels)]
labels = [
samples.SequenceLabel(label=label, score=1.0)
if isinstance(label, str)
else label
(
samples.SequenceLabel(label=label, score=1.0)
if isinstance(label, str)
else label
)
for label in labels
]
else:
Expand Down
8 changes: 5 additions & 3 deletions langtest/transform/accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,11 @@ def predict_summarization(sample):

if kwargs["is_default"]:
y_pred = y_pred.apply(
lambda x: "1"
if x in ["pos", "LABEL_1", "POS"]
else ("0" if x in ["neg", "LABEL_0", "NEG"] else x)
lambda x: (
"1"
if x in ["pos", "LABEL_1", "POS"]
else ("0" if x in ["neg", "LABEL_0", "NEG"] else x)
)
)

supported_tests = cls.available_tests()
Expand Down
8 changes: 5 additions & 3 deletions langtest/transform/fairness.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,9 +228,11 @@ def predict_summarization(sample: Sample):

if kwargs["is_default"]:
y_pred = y_pred.apply(
lambda x: "1"
if x in ["pos", "LABEL_1", "POS"]
else ("0" if x in ["neg", "LABEL_0", "NEG"] else x)
lambda x: (
"1"
if x in ["pos", "LABEL_1", "POS"]
else ("0" if x in ["neg", "LABEL_0", "NEG"] else x)
)
)

grouped_label[gender] = [y_true, y_pred]
Expand Down
14 changes: 8 additions & 6 deletions langtest/transform/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,15 @@ def transform(self) -> List[Sample]:

if str(TestFactory.task) in ("question-answering"):
_ = [
sample.transform(
test_func,
params.get("parameters", {}),
prob=params.pop("prob", 1.0),
(
sample.transform(
test_func,
params.get("parameters", {}),
prob=params.pop("prob", 1.0),
)
if hasattr(sample, "transform")
else sample
)
if hasattr(sample, "transform")
else sample
for sample in data_handler_copy
]
transformed_samples = data_handler_copy
Expand Down
46 changes: 27 additions & 19 deletions langtest/transform/robustness.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,15 @@ def transform(self) -> List[Sample]:
and test_name != "multiple_perturbations"
):
_ = [
sample.transform(
test_func,
params.get("parameters", {}),
prob=params.pop("prob", 1.0),
(
sample.transform(
test_func,
params.get("parameters", {}),
prob=params.pop("prob", 1.0),
)
if hasattr(sample, "transform")
else sample
)
if hasattr(sample, "transform")
else sample
for sample in data_handler_copy
]
transformed_samples = data_handler_copy
Expand All @@ -163,24 +165,28 @@ def transform(self) -> List[Sample]:
"parameters"
] = {"accent_map": {v: k for k, v in A2B_DICT.items()}}
_ = [
sample.transform(
func=test_func,
params=self.tests,
prob=prob,
perturbations=perturbations,
(
sample.transform(
func=test_func,
params=self.tests,
prob=prob,
perturbations=perturbations,
)
if hasattr(sample, "transform")
else sample
)
if hasattr(sample, "transform")
else sample
for sample in data_handler_copy
]
transformed_samples_perturbation = copy.deepcopy(
data_handler_copy
) # Create a deep copy
if perturbation_number != "":
test_type = "-".join(
str(perturbation)
if not isinstance(perturbation, dict)
else next(iter(perturbation))
(
str(perturbation)
if not isinstance(perturbation, dict)
else next(iter(perturbation))
)
for perturbation in perturbations
)
for sample in transformed_samples_perturbation:
Expand Down Expand Up @@ -219,9 +225,11 @@ def transform(self) -> List[Sample]:

if perturbation_number != "":
test_type = "-".join(
str(perturbation)
if not isinstance(perturbation, dict)
else next(iter(perturbation))
(
str(perturbation)
if not isinstance(perturbation, dict)
else next(iter(perturbation))
)
for perturbation in perturbations
)
for sample in transformed_samples_perturbation:
Expand Down
Loading

0 comments on commit 2b4f018

Please sign in to comment.