Skip to content

Commit

Permalink
Merge branch 'main' into feature/gsk-4033-fix-correctness-aggregation…
Browse files Browse the repository at this point in the history
…-error
  • Loading branch information
henchaves authored Dec 19, 2024
2 parents 2ce7fca + 91222db commit effa6a0
Show file tree
Hide file tree
Showing 18 changed files with 95 additions and 40 deletions.
2 changes: 1 addition & 1 deletion giskard/core/model_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def validate_model_loading_and_saving(model: BaseModel):
with tempfile.TemporaryDirectory(prefix="giskard-model-") as f:
model.save(f)

with open(f + "/giskard-model-meta.yaml") as yaml_f:
with open(f + "/giskard-model-meta.yaml", encoding="utf-8") as yaml_f:
saved_meta = yaml.load(yaml_f, Loader=yaml.Loader)

meta = ModelMeta(
Expand Down
4 changes: 2 additions & 2 deletions giskard/core/savable.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _get_meta_endpoint(cls, uuid: str, project_key: str) -> str:
return posixpath.join("project", project_key, cls._get_name(), uuid)

def _save_meta_locally(self, local_dir):
with open(Path(local_dir) / "meta.yaml", "w") as f:
with open(Path(local_dir) / "meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(self.meta, f)


Expand All @@ -70,7 +70,7 @@ def _load_meta_locally(cls, local_dir, uuid: str) -> Optional[SMT]:
if meta is not None:
return meta

with open(local_dir / "meta.yaml", "r") as f:
with open(local_dir / "meta.yaml", "r", encoding="utf-8") as f:
return yaml.load(f, Loader=yaml.Loader)

@classmethod
Expand Down
10 changes: 5 additions & 5 deletions giskard/core/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ def to_json(self, filename=None):
"metric_value": suite_result.result.metric,
}
if filename is not None:
with open(filename, "w") as json_file:
json.dump(results, json_file, indent=4)
with open(filename, "w", encoding="utf-8") as json_file:
json.dump(results, json_file, indent=4, ensure_ascii=False)
else:
return json.dumps(results, indent=4)

Expand Down Expand Up @@ -628,8 +628,8 @@ def save(self, folder: str):

json_content = self._to_json(folder_path, saved_uuid_status)

with open(folder_path / "suite.json", "w") as f:
json.dump(json_content, f)
with open(folder_path / "suite.json", "w", encoding="utf-8") as f:
json.dump(json_content, f, ensure_ascii=False)

analytics.track("lib:test_suite:saved")

Expand Down Expand Up @@ -843,7 +843,7 @@ def _contains_test(self, test: TestFunctionMeta):
def load(cls, folder: str) -> "Suite":
folder_path = Path(folder)

with open(folder_path / "suite.json", "r") as f:
with open(folder_path / "suite.json", "r", encoding="utf-8") as f:
suite_json = json.load(f)

suite = Suite(name=suite_json.get("name", "Unnamed test suite"))
Expand Down
4 changes: 2 additions & 2 deletions giskard/datasets/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def cast_column_to_dtypes(df, column_dtypes):
@classmethod
def load(cls, local_path: str):
# load metadata
with open(Path(local_path) / "giskard-dataset-meta.yaml", "r") as meta_f:
with open(Path(local_path) / "giskard-dataset-meta.yaml", "r", encoding="utf-8") as meta_f:
meta = yaml.safe_load(meta_f)

# load data
Expand Down Expand Up @@ -560,7 +560,7 @@ def save(self, local_path: str):
f.write(compressed_bytes)
original_size_bytes, compressed_size_bytes = len(uncompressed_bytes), len(compressed_bytes)

with open(Path(local_path) / "giskard-dataset-meta.yaml", "w") as meta_f:
with open(Path(local_path) / "giskard-dataset-meta.yaml", "w", encoding="utf-8") as meta_f:
yaml.dump(
{
"id": str(self.id),
Expand Down
4 changes: 2 additions & 2 deletions giskard/models/base/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def save_model_postprocessing_function(self, local_path: Union[str, Path], *_arg
cloudpickle.dump(self.model_postprocessing_function, f, protocol=pickle.DEFAULT_PROTOCOL)

def save_wrapper_meta(self, local_path, *_args, **_kwargs):
with open(Path(local_path) / "giskard-model-wrapper-meta.yaml", "w") as f:
with open(Path(local_path) / "giskard-model-wrapper-meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(
{
"batch_size": self.batch_size,
Expand Down Expand Up @@ -313,7 +313,7 @@ def load_model_postprocessing_function(cls, local_path: Union[str, Path], *_args
def load_wrapper_meta(cls, local_dir, *args, **kwargs):
wrapper_meta_file = Path(local_dir) / "giskard-model-wrapper-meta.yaml"
if wrapper_meta_file.exists():
with open(wrapper_meta_file) as f:
with open(wrapper_meta_file, encoding="utf-8") as f:
wrapper_meta = yaml.load(f, Loader=yaml.Loader)
wrapper_meta["batch_size"] = int(wrapper_meta["batch_size"]) if wrapper_meta["batch_size"] else None
return wrapper_meta
Expand Down
5 changes: 3 additions & 2 deletions giskard/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class explicitly using :class:`giskard.models.huggingface.HuggingFaceModel`.
the `model_postprocessing_function` argument. This function should take the
raw output of your model and return a numpy array of probabilities.
"""

from typing import Any, Callable, Iterable, Optional, Tuple, Union

import logging
Expand Down Expand Up @@ -199,7 +200,7 @@ def __init__(
def load_model(cls, local_path, model_py_ver: Optional[Tuple[str, str, str]] = None, *args, **kwargs):
huggingface_meta_file = Path(local_path) / "giskard-model-huggingface-meta.yaml"
if huggingface_meta_file.exists():
with open(huggingface_meta_file) as f:
with open(huggingface_meta_file, encoding="utf-8") as f:
huggingface_meta = yaml.load(f, Loader=yaml.Loader)

if huggingface_meta["pipeline_task"]:
Expand All @@ -208,7 +209,7 @@ def load_model(cls, local_path, model_py_ver: Optional[Tuple[str, str, str]] = N
return huggingface_meta["huggingface_module"].from_pretrained(local_path)

def save_huggingface_meta(self, local_path, *args, **kwargs):
with open(Path(local_path) / "giskard-model-huggingface-meta.yaml", "w") as f:
with open(Path(local_path) / "giskard-model-huggingface-meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(
{
"huggingface_module": self.huggingface_module,
Expand Down
4 changes: 2 additions & 2 deletions giskard/models/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def _convert_to_numpy(self, raw_predictions):
return super()._convert_to_numpy(raw_predictions)

def save_pytorch_meta(self, local_path, *_args, **_kwargs):
with open(Path(local_path) / "giskard-model-pytorch-meta.yaml", "w") as f:
with open(Path(local_path) / "giskard-model-pytorch-meta.yaml", "w", encoding="utf-8") as f:
yaml.dump(
{
"device": self.device,
Expand All @@ -224,7 +224,7 @@ def load(cls, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None, *a
def load_pytorch_meta(cls, local_dir):
pytorch_meta_file = Path(local_dir) / "giskard-model-pytorch-meta.yaml"
if pytorch_meta_file.exists():
with open(pytorch_meta_file) as f:
with open(pytorch_meta_file, encoding="utf-8") as f:
pytorch_meta = yaml.load(f, Loader=yaml.Loader)
pytorch_meta["device"] = pytorch_meta.get("device")
pytorch_meta["torch_dtype"] = pytorch_meta.get("torch_dtype")
Expand Down
8 changes: 4 additions & 4 deletions giskard/rag/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,11 @@ def load(
The embedding model to use inside the knowledge base. If not provided, the default model will be used.
"""
path = Path(folder_path)
knowledge_base_meta = json.load(open(path / "knowledge_base_meta.json", "r"))
knowledge_base_meta = json.load(open(path / "knowledge_base_meta.json", "r", encoding="utf-8"))
knowledge_base_data = pd.read_json(path / "knowledge_base.jsonl", orient="records", lines=True)
testset = QATestset.load(path / "testset.jsonl")

answers = json.load(open(path / "agent_answer.json", "r"))
answers = json.load(open(path / "agent_answer.json", "r", encoding="utf-8"))
model_outputs = [AgentAnswer(**answer) for answer in answers]

topics = {int(k): topic for k, topic in knowledge_base_meta.pop("topics", None).items()}
Expand All @@ -219,9 +219,9 @@ def load(

metrics_results = {}
if (path / "metrics_results.json").exists():
metrics_results = json.load(open(path / "metrics_results.json", "r"))
metrics_results = json.load(open(path / "metrics_results.json", "r", encoding="utf-8"))

report_details = json.load(open(path / "report_details.json", "r"))
report_details = json.load(open(path / "report_details.json", "r", encoding="utf-8"))
testset._dataframe.index = testset._dataframe.index.astype(str)

report = cls(testset, model_outputs, metrics_results, knowledge_base)
Expand Down
2 changes: 1 addition & 1 deletion giskard/registry/giskard_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _load_meta_locally(cls, local_dir, uuid: str) -> Optional[TestFunctionMeta]:
if meta is not None:
return meta

with open(local_dir / "meta.yaml", "r") as f:
with open(local_dir / "meta.yaml", "r", encoding="utf-8") as f:
return yaml.load(f, Loader=yaml.Loader)

@classmethod
Expand Down
12 changes: 6 additions & 6 deletions giskard/scanner/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ def to_json(self, filename=None):
results[issue.detector_name][issue.level] = []
results[issue.detector_name][issue.level].append(issue.description)
if filename is not None:
with open(filename, "w") as json_file:
json.dump(results, json_file, indent=4)
with open(filename, "w", encoding="utf-8") as json_file:
json.dump(results, json_file, indent=4, ensure_ascii=False)
else:
return json.dumps(results, indent=4)

Expand All @@ -115,7 +115,7 @@ def to_html(self, filename=None, embed=False):
html = widget.render_html(embed=embed)

if filename is not None:
with open(filename, "w") as f:
with open(filename, "w", encoding="utf-8") as f:
f.write(html)
return

Expand All @@ -139,7 +139,7 @@ def to_markdown(self, filename=None, template="summary"):
markdown = widget.render_markdown(template=template)

if filename is not None:
with open(filename, "w") as f:
with open(filename, "w", encoding="utf-8") as f:
f.write(markdown)
return

Expand Down Expand Up @@ -349,7 +349,7 @@ def to_avid(self, filename=None):
]

if filename is not None:
with open(filename, "w") as f, warnings.catch_warnings():
with open(filename, "w", encoding="utf-8") as f, warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning) # we need to support both pydantic 1 & 2
f.writelines(r.json() + "\n" for r in reports)
return
Expand All @@ -373,7 +373,7 @@ def generate_rails(self, filename=None, colang_version="1.0"):
_rails = generate_rails_from_scan_report(self, colang_version=colang_version)

if filename:
with open(filename, "a") as f:
with open(filename, "a", encoding="utf-8") as f:
f.write(_rails)
return

Expand Down
2 changes: 1 addition & 1 deletion giskard/scanner/robustness/text_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ class TextNationalityTransformation(TextLanguageBasedTransformation):
name = "Switch countries from high- to low-income and vice versa"

def _load_dictionaries(self):
with Path(__file__).parent.joinpath("nationalities.json").open("r") as f:
with Path(__file__).parent.joinpath("nationalities.json").open("r", encoding="utf-8") as f:
nationalities_dict = json.load(f)
self._lang_dictionary = {"en": nationalities_dict["en"], "fr": nationalities_dict["fr"]}

Expand Down
8 changes: 5 additions & 3 deletions giskard/visualization/widget.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,11 @@ def render_html(self, template="full", embed=False) -> str:
escaped = escape(html)
uid = id(self)

with Path(__file__).parent.joinpath("templates", "scan_report", "html", "static", "external.js").open(
"r"
) as f:
with (
Path(__file__)
.parent.joinpath("templates", "scan_report", "html", "static", "external.js")
.open("r", encoding="utf-8") as f
):
js_lib = f.read()

html = f"""<iframe id="scan-{uid}" srcdoc="{escaped}" style="width: 100%; border: none;" class="gsk-scan"></iframe>
Expand Down
4 changes: 2 additions & 2 deletions tests/fixtures/enron_multilabel_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@

# get_labels returns a dictionary representation of these labels.
def get_labels(filename):
with open(filename + ".cats") as f:
with open(filename + ".cats", encoding="utf-8") as f:
labels = defaultdict(dict)
line = f.readline()
while line:
Expand Down Expand Up @@ -99,7 +99,7 @@ def enron_raw_data_full() -> pd.DataFrame:

# Features are metadata from the email object
filename = email_file + ".txt"
with open(filename) as f:
with open(filename, encoding="utf-8") as f:
message = email.message_from_string(f.read())

values_to_add["Subject"] = str(message["Subject"])
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/test_avid.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def test_scan_report_can_be_exported_to_avid():
dest_path = Path(tmpdir).joinpath("test_report.avid")
report.to_avid(dest_path)

with dest_path.open("r") as f:
with dest_path.open("r", encoding="utf-8") as f:
avid_reports_read = [json.loads(line) for line in f.readlines()]

assert len(avid_reports_read) == len(avid_reports)
Expand Down
26 changes: 22 additions & 4 deletions tests/integrations/test_nemoguardrails.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,39 @@
import json
import tempfile
from pathlib import Path
from unittest.mock import Mock, patch

import pandas as pd
import pytest
from nemoguardrails.colang import parse_colang_file

from giskard.llm.client.base import ChatMessage
from giskard.scanner.issues import Issue, Robustness
from giskard.scanner.report import ScanReport


def _generate_rails(report: ScanReport, filename=None, colang_version="1.0"):
if filename:
with tempfile.TemporaryDirectory() as tmpdir:
dest = Path(tmpdir).joinpath("rails.co")
report.generate_rails(filename=dest, colang_version=colang_version)
assert dest.exists()
assert dest.is_file()
rails = dest.read_text(encoding="utf-8")
else:
rails = report.generate_rails(colang_version=colang_version)
return rails


@pytest.mark.parametrize("filename", [(None), ("rails.co")])
@patch("giskard.integrations.nemoguardrails.get_default_client")
def test_generate_colang_v1_rails_from_scan(get_default_client_mock):
def test_generate_colang_v1_rails_from_scan(get_default_client_mock, filename):
report = make_test_report()

llm_client = get_default_client_mock()
llm_client.complete.side_effect = make_llm_answers()

rails = report.generate_rails()
rails = _generate_rails(report, filename=filename, colang_version="1.0")

# Check that the file is correctly formatted
parsed = parse_colang_file("rails.co", rails, version="1.0")
Expand All @@ -27,14 +44,15 @@ def test_generate_colang_v1_rails_from_scan(get_default_client_mock):
assert parsed["flows"][1]["id"] == "ask help on illegal activities"


@pytest.mark.parametrize("filename", [(None), ("rails.co")])
@patch("giskard.integrations.nemoguardrails.get_default_client")
def test_generate_colang_v2_rails_from_scan(get_default_client_mock):
def test_generate_colang_v2_rails_from_scan(get_default_client_mock, filename):
report = make_test_report()

llm_client = get_default_client_mock()
llm_client.complete.side_effect = make_llm_answers()

rails = report.generate_rails(colang_version="2.x")
rails = _generate_rails(report, filename=filename, colang_version="2.x")

# Check that the file is correctly formatted
parsed = parse_colang_file("rails.co", rails, version="2.x")
Expand Down
2 changes: 1 addition & 1 deletion tests/registry/module_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class PythonModule:

def _write_file(dir: Path, file: Union[str, Path], content: str):
os.makedirs(os.path.dirname(dir / file), exist_ok=True)
with open(dir / file, "w") as f:
with open(dir / file, "w", encoding="utf-8") as f:
f.write(content)


Expand Down
2 changes: 1 addition & 1 deletion tests/scan/test_scan_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_scan_report_exports_to_markdown():

assert dest.exists()
assert dest.is_file()
assert dest.read_text() == markdown
assert dest.read_text(encoding="utf-8") == markdown


def test_scan_report_to_json():
Expand Down
34 changes: 34 additions & 0 deletions tests/scan/test_scanner.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import re
import sys
import tempfile
import warnings
from pathlib import Path
from unittest import mock

import numpy as np
Expand Down Expand Up @@ -279,3 +281,35 @@ def test_min_slice_size(titanic_model, titanic_dataset):
detector = SpuriousCorrelationDetector(min_slice_size=2000)
issues = detector.run(titanic_model, titanic_dataset, features=titanic_model.feature_names)
assert len(issues) == 0


@pytest.mark.parametrize(
"filename",
[(None), ("scan_test_suite_results.json")],
)
@pytest.mark.slow
def test_export_scan_test_suite_results_to_json(filename, request):
DATASET_NAME = "diabetes_dataset_with_target"
MODEL_NAME = "linear_regression_diabetes"

dataset = request.getfixturevalue(DATASET_NAME)
model = request.getfixturevalue(MODEL_NAME)

scanner = Scanner()
scan_results = scanner.analyze(model, dataset)
test_suite_results = scan_results.generate_test_suite().run()

if filename:
with tempfile.TemporaryDirectory() as tmpdir:
dest = Path(tmpdir).joinpath(filename)
test_suite_results.to_json(dest)
assert dest.exists()
assert dest.is_file()
test_results_json = dest.read_text(encoding="utf-8")

else:
test_results_json = test_suite_results.to_json()
assert test_results_json is not None

assert test_results_json.startswith("{")
assert test_results_json.strip().endswith("}")

0 comments on commit effa6a0

Please sign in to comment.