Skip to content

Commit

Permalink
Moved model.meta.property to model.property
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinmessiaen committed Jan 4, 2024
1 parent 1adbf5d commit 388b4ba
Show file tree
Hide file tree
Showing 10 changed files with 34 additions and 36 deletions.
14 changes: 7 additions & 7 deletions giskard/ml_worker/testing/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Optional

import numbers
from enum import Enum
from functools import wraps

from typing import Optional

from giskard.core.core import SupportedModelTypes
from giskard.datasets.base import Dataset

Expand All @@ -27,7 +27,7 @@ def wrapper(*args, **kwargs):
if (
classification_label is not None
and model is not None
and isinstance(model.meta.classification_labels[0], numbers.Number)
and isinstance(model.classification_labels[0], numbers.Number)
):
try:
classification_label = int(classification_label)
Expand All @@ -39,14 +39,14 @@ def wrapper(*args, **kwargs):
if target and classification_label and model:
assert classification_label != target, (
'By "classification_label", we refer to one of the values: '
f'{model.meta.classification_labels} and not the target: "{target}". '
f'{model.classification_labels} and not the target: "{target}". '
"Please re-assign this argument."
)

assert (
model.meta.model_type != SupportedModelTypes.CLASSIFICATION
or classification_label in model.meta.classification_labels
), f'"{classification_label}" is not part of model labels: {model.meta.classification_labels}'
model.model_type != SupportedModelTypes.CLASSIFICATION
or classification_label in model.classification_labels
), f'"{classification_label}" is not part of model labels: {model.classification_labels}'
return func(*args, **kwargs)

wrapper.test_fn = func
Expand Down
15 changes: 7 additions & 8 deletions giskard/models/model_explanation.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from typing import Any, Callable, Dict, List

import logging
import warnings

import numpy as np
import pandas as pd
from typing import Any, Callable, Dict, List

from giskard.core.errors import GiskardImportError
from giskard.datasets.base import Dataset
Expand Down Expand Up @@ -69,7 +68,7 @@ def _get_columns_original_order(prepared_dataset: Dataset, model: BaseModel, dat
list
A list of column names in the order that the model was trained with.
"""
features_names = model.meta.feature_names
features_names = model.feature_names
return features_names if features_names else [c for c in dataset.df.columns if c in prepared_dataset.df.columns]


Expand Down Expand Up @@ -199,11 +198,11 @@ def explain_with_shap(model: BaseModel, dataset: Dataset, only_highest_proba: bo
shap_values = _get_highest_proba_shap(shap_values, model, dataset)

# Put SHAP values to the Explanation object for a convenience.
feature_names = model.meta.feature_names or list(dataset.df.columns.drop(dataset.target, errors="ignore"))
feature_names = model.feature_names or list(dataset.df.columns.drop(dataset.target, errors="ignore"))
shap_explanations = Explanation(shap_values, data=dataset.df[feature_names], feature_names=feature_names)

feature_types = {key: dataset.column_types[key] for key in feature_names}
return ShapResult(shap_explanations, feature_types, model.meta.model_type, only_highest_proba)
return ShapResult(shap_explanations, feature_types, model.model_type, only_highest_proba)


def _calculate_sample_shap_values(model: BaseModel, dataset: Dataset, input_data: Dict) -> np.ndarray:
Expand All @@ -228,18 +227,18 @@ def predict_array(array):
@timer()
def explain(model: BaseModel, dataset: Dataset, input_data: Dict):
shap_values = _calculate_sample_shap_values(model, dataset, input_data)
feature_names = model.meta.feature_names or list(dataset.df.columns.drop(dataset.target, errors="ignore"))
feature_names = model.feature_names or list(dataset.df.columns.drop(dataset.target, errors="ignore"))

if model.is_regression:
explanation_chart_data = summary_shap_regression(shap_values=shap_values, feature_names=feature_names)
elif model.is_classification:
explanation_chart_data = summary_shap_classification(
shap_values=shap_values,
feature_names=feature_names,
class_names=model.meta.classification_labels,
class_names=model.classification_labels,
)
else:
raise ValueError(f"Prediction task is not supported: {model.meta.model_type}")
raise ValueError(f"Prediction task is not supported: {model.model_type}")
return explanation_chart_data


Expand Down
9 changes: 4 additions & 5 deletions giskard/utils/analytics_collector.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from types import TracebackType
from typing import Dict, Optional, Type

import getpass
import hashlib
import os
Expand All @@ -11,9 +8,11 @@
from functools import wraps
from threading import ExceptHookArgs, Lock
from traceback import TracebackException
from types import TracebackType

import requests
from mixpanel import Mixpanel
from typing import Dict, Optional, Type

from giskard.client.dtos import ServerInfo
from giskard.settings import settings
Expand Down Expand Up @@ -55,11 +54,11 @@ def get_model_properties(model):
return {}

inner_model_class = fullname(model.model) if isinstance(model, WrapperModel) else None
feature_names = [anonymize(n) for n in model.meta.feature_names] if model.meta.feature_names else None
feature_names = [anonymize(n) for n in model.feature_names] if model.feature_names else None

return {
"model_id": str(model.id),
"model_type": model.meta.model_type.value,
"model_type": model.model_type.value,
"model_class": fullname(model),
"model_inner_class": inner_model_class,
"model_feature_names": feature_names,
Expand Down
2 changes: 1 addition & 1 deletion tests/communications/test_websocket_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ def test_websocket_actor_explain_text_ws_classification(internal, request):
reply = listener.explain_text_ws(client=None if internal else client, params=params)
assert isinstance(reply, websocket.ExplainText)
# Classification labels
for label in model.meta.classification_labels:
for label in model.classification_labels:
assert label in reply.weights.keys()


Expand Down
6 changes: 3 additions & 3 deletions tests/scan/llm/test_prompt_injection_detector.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import ast
from unittest.mock import Mock, patch

import ast
import pandas as pd

from giskard.datasets.base import Dataset
from giskard.llm.evaluators.string_matcher import StringMatcherConfig
from giskard.scanner.llm.llm_prompt_injection_detector import LLMPromptInjectionDetector
from giskard.testing.tests.llm.injections import _test_llm_output_against_strings
from giskard.llm.evaluators.string_matcher import StringMatcherConfig


def test_prompt_injection_data_loader_properties():
Expand Down Expand Up @@ -95,7 +95,7 @@ def test_detector(PromptInjectionDataLoader): # noqa
detector = LLMPromptInjectionDetector()

# First run
issues = detector.run(model, dataset, model.meta.feature_names)
issues = detector.run(model, dataset, model.feature_names)
assert len(issues) == 1
assert issues[0].is_major

Expand Down
10 changes: 5 additions & 5 deletions tests/scan/test_performance_bias_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_performance_bias_detector_skips_small_datasets(german_credit_model, ger
small_dataset = german_credit_data.slice(lambda df: df.sample(50), row_level=False)
detector = PerformanceBiasDetector()
with caplog.at_level(logging.WARNING):
issues = detector.run(german_credit_model, small_dataset, features=german_credit_model.meta.feature_names)
issues = detector.run(german_credit_model, small_dataset, features=german_credit_model.feature_names)
record = caplog.records[-1]

assert len(issues) == 0
Expand All @@ -30,7 +30,7 @@ def test_performance_bias_detector_trims_large_dataset(german_credit_model, germ

detector = PerformanceBiasDetector()
try:
detector.run(german_credit_model, large_dataset, features=german_credit_model.meta.feature_names)
detector.run(german_credit_model, large_dataset, features=german_credit_model.feature_names)
except (ValueError, TypeError):
pass
assert large_dataset.slice.called
Expand All @@ -41,7 +41,7 @@ def test_performance_bias_detector_trims_large_dataset(german_credit_model, germ

detector = PerformanceBiasDetector()
try:
detector.run(german_credit_model, normal_dataset, features=german_credit_model.meta.feature_names)
detector.run(german_credit_model, normal_dataset, features=german_credit_model.feature_names)
except (ValueError, TypeError):
pass

Expand All @@ -51,7 +51,7 @@ def test_performance_bias_detector_trims_large_dataset(german_credit_model, germ
def test_performance_bias_detector_with_tabular(german_credit_model, german_credit_data):
detector = PerformanceBiasDetector()

issues = detector.run(german_credit_model, german_credit_data, features=german_credit_model.meta.feature_names)
issues = detector.run(german_credit_model, german_credit_data, features=german_credit_model.feature_names)
assert len(issues) > 0
assert all([isinstance(issue, Issue) for issue in issues])

Expand All @@ -73,7 +73,7 @@ def test_performance_bias_detector_with_text_features(enron_model, enron_data):
dataset = Dataset(df, target=enron_data.target, column_types=enron_data.column_types)
detector = PerformanceBiasDetector()

issues = detector.run(enron_model, dataset, enron_model.meta.feature_names)
issues = detector.run(enron_model, dataset, enron_model.feature_names)
assert len(issues) > 0
assert all([isinstance(issue, Issue) for issue in issues])

Expand Down
6 changes: 3 additions & 3 deletions tests/scan/test_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _test_scanner_returns_non_empty_scan_result(dataset_name, model_name, reques
dataset = request.getfixturevalue(dataset_name)
model = request.getfixturevalue(model_name)

result = scanner.analyze(model, dataset, features=model.meta.feature_names, raise_exceptions=True)
result = scanner.analyze(model, dataset, features=model.feature_names, raise_exceptions=True)

assert isinstance(result, ScanReport)
assert result.to_html()
Expand All @@ -67,7 +67,7 @@ def test_scanner_should_work_with_empty_model_feature_names(german_credit_data,
scanner = Scanner()
german_credit_model.meta.feature_names = None
result = scanner.analyze(
german_credit_model, german_credit_data, features=german_credit_model.meta.feature_names, raise_exceptions=True
german_credit_model, german_credit_data, features=german_credit_model.feature_names, raise_exceptions=True
)

assert isinstance(result, ScanReport)
Expand All @@ -86,7 +86,7 @@ def test_scanner_works_if_dataset_has_no_target(titanic_model, titanic_dataset):
scanner = Scanner()
no_target_dataset = Dataset(titanic_dataset.df, target=None)
result = scanner.analyze(
titanic_model, no_target_dataset, features=titanic_model.meta.feature_names, raise_exceptions=True
titanic_model, no_target_dataset, features=titanic_model.feature_names, raise_exceptions=True
)

assert isinstance(result, ScanReport)
Expand Down
4 changes: 2 additions & 2 deletions tests/scan/test_spurious_correlation_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ def random_classifier(df):
@pytest.mark.memory_expensive
def test_threshold(titanic_model, titanic_dataset):
detector = SpuriousCorrelationDetector(threshold=0.6)
issues = detector.run(titanic_model, titanic_dataset, features=titanic_model.meta.feature_names)
issues = detector.run(titanic_model, titanic_dataset, features=titanic_model.feature_names)
assert len(issues) > 0

detector = SpuriousCorrelationDetector(threshold=0.9)
issues = detector.run(titanic_model, titanic_dataset, features=titanic_model.meta.feature_names)
issues = detector.run(titanic_model, titanic_dataset, features=titanic_model.feature_names)
assert not issues


Expand Down
2 changes: 1 addition & 1 deletion tests/scan/test_text_perturbation_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

def test_perturbation_classification(titanic_model, titanic_dataset):
analyzer = TextPerturbationDetector(threshold=0.01)
res = analyzer.run(titanic_model, titanic_dataset, features=titanic_model.meta.feature_names)
res = analyzer.run(titanic_model, titanic_dataset, features=titanic_model.feature_names)
assert res


Expand Down
2 changes: 1 addition & 1 deletion tests/test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def constant_pred(df):
return np.stack((np.ones(len(df)), np.zeros(len(df)))).T

bad_model = Model(
constant_pred, model_type="classification", classification_labels=german_credit_model.meta.classification_labels
constant_pred, model_type="classification", classification_labels=german_credit_model.classification_labels
)

# The test will not pass
Expand Down

0 comments on commit 388b4ba

Please sign in to comment.