Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GSK-2135] Show warning if dependencies or python versions are mismatched #1620

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9f11bf2
Show a warning when loading with different Py vers
Inokinoki Nov 14, 2023
77af7ad
Merge branch 'main' of github.com:Giskard-AI/giskard into feature/gsk…
Inokinoki Nov 20, 2023
dd45e6a
Add exception if loading failed due to py version
Inokinoki Nov 20, 2023
168f59c
Merge branch 'main' of github.com:Giskard-AI/giskard into feature/gsk…
Inokinoki Nov 20, 2023
d4fcd61
Polish decriptions
Inokinoki Nov 20, 2023
858cec2
Use `GiskardPythonVerException` to unify exception
Inokinoki Nov 20, 2023
b0a750f
Add test for model loading with different py ver
Inokinoki Nov 20, 2023
b4056a7
Add tests for 3.10 and 3.11 func model loading
Inokinoki Nov 21, 2023
56d29b8
Add py 3.9 test fixtures for func model
Inokinoki Nov 21, 2023
075f0fa
Add a compat table
Inokinoki Nov 21, 2023
a457696
Merge branch 'main' of github.com:Giskard-AI/giskard into feature/gsk…
Inokinoki Nov 22, 2023
f5d3494
Attempt to distinguish the env exceptions
Inokinoki Nov 22, 2023
b6feaea
Capture `TypeError`
Inokinoki Nov 22, 2023
9dd5091
Update error message detections
Inokinoki Nov 23, 2023
897e2a9
Merge branch 'main' of github.com:Giskard-AI/giskard into feature/gsk…
Inokinoki Nov 24, 2023
eacb786
Retrieve py ver of models for cross-version issues
Inokinoki Nov 24, 2023
aa68087
Fix model python version arg name
Inokinoki Nov 24, 2023
a29b371
Fix `model_py_ver` type and missing arg in tests
Inokinoki Nov 24, 2023
98a9fd8
Merge branch 'main' of github.com:Giskard-AI/giskard into feature/gsk…
Inokinoki Nov 28, 2023
06f0a76
Remove `TypeError` checking
Inokinoki Nov 28, 2023
cad5795
Fix python version tuple function call
Inokinoki Nov 28, 2023
0ded656
Try to parse Python version for function wrapper
Inokinoki Nov 28, 2023
cfde3ec
Merge branch 'main' of github.com:Giskard-AI/giskard into feature/gsk…
Inokinoki Nov 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion giskard/ml_worker/core/savable.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from giskard.client.giskard_client import GiskardClient
from giskard.core.core import SMT, SavableMeta
from giskard.ml_worker.exceptions.giskard_exception import python_env_exception_helper
from giskard.ml_worker.testing.registry.registry import tests_registry
from giskard.settings import settings

Expand Down Expand Up @@ -173,7 +174,12 @@ def load(cls, local_dir: Path, uuid: str, meta: SMT):

if local_dir.exists():
with open(local_dir / "data.pkl", "rb") as f:
_function = cloudpickle.load(f)
try:
# According to https://github.com/cloudpipe/cloudpickle#cloudpickle:
# Cloudpickle can only be used to send objects between the exact same version of Python.
_function = cloudpickle.load(f)
except Exception as e:
raise python_env_exception_helper(cls.__name__, e)
else:
try:
func = getattr(sys.modules[meta.module], meta.name)
Expand Down
36 changes: 36 additions & 0 deletions giskard/ml_worker/exceptions/giskard_exception.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,39 @@
from typing import Optional, Tuple
import platform


class GiskardException(Exception):
def __init__(self, *args: object) -> None:
super().__init__(*args)


class GiskardPythonEnvException(GiskardException):
def __init__(self, *args: object) -> None:
super().__init__(*args)


class GiskardPythonVerException(GiskardPythonEnvException):
def __init__(self, cls_name, e, required_py_ver, *args: object) -> None:
super().__init__(
f"Failed to load '{cls_name}' due to {e.__class__.__name__}.\n"
f"Make sure you are loading it in the environment with matched Python version (required {required_py_ver}, loading with {platform.python_version()}).",
*args,
)


class GiskardPythonDepException(GiskardPythonEnvException):
def __init__(self, cls_name, e, *args: object) -> None:
super().__init__(
f"Failed to load '{cls_name}' due to {e.__class__.__name__}.\n"
"Make sure you are loading it in the environment with matched dependencies.",
*args,
)


def python_env_exception_helper(cls_name, e: Exception, required_py_ver: Optional[Tuple[str, str, str]] = None):
if required_py_ver is not None and required_py_ver[:2] != platform.python_version_tuple()[:2]:
# Python major and minor versions are not matched
# Notice that there could be some false positive, check: https://github.com/Giskard-AI/giskard/pull/1620
return GiskardPythonVerException(cls_name, e, required_py_ver=required_py_ver)
# We assume the other cases as the dependency issues
return GiskardPythonDepException(cls_name, e)
6 changes: 5 additions & 1 deletion giskard/ml_worker/testing/registry/giskard_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from giskard.core.core import SMT, TestFunctionMeta
from giskard.core.validation import configured_validate_arguments
from giskard.ml_worker.core.savable import Artifact
from giskard.ml_worker.exceptions.giskard_exception import python_env_exception_helper
from giskard.ml_worker.testing.registry.registry import get_object_uuid, tests_registry
from giskard.ml_worker.testing.test_result import TestResult
from giskard.utils.analytics_collector import analytics
Expand Down Expand Up @@ -75,7 +76,10 @@ def _load_meta_locally(cls, local_dir, uuid: str) -> Optional[TestFunctionMeta]:
def load(cls, local_dir: Path, uuid: str, meta: TestFunctionMeta):
if local_dir.exists():
with open(Path(local_dir) / "data.pkl", "rb") as f:
func = pickle.load(f)
try:
func = pickle.load(f)
except Exception as e:
raise python_env_exception_helper(cls.__name__, e)
elif hasattr(sys.modules[meta.module], meta.name):
func = getattr(sys.modules[meta.module], meta.name)
else:
Expand Down
56 changes: 44 additions & 12 deletions giskard/models/base/model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Iterable, List, Optional, Type, Union
from typing import Iterable, List, Optional, Tuple, Type, Union

import builtins
import importlib
Expand All @@ -22,7 +22,7 @@
from ...core.core import ModelMeta, ModelType, SupportedModelTypes
from ...core.validation import configured_validate_arguments
from ...datasets.base import Dataset
from ...ml_worker.exceptions.giskard_exception import GiskardException
from ...ml_worker.exceptions.giskard_exception import GiskardException, python_env_exception_helper
from ...ml_worker.utils.logging import Timer
from ...models.cache import ModelCache
from ...path_utils import get_size
Expand Down Expand Up @@ -207,11 +207,16 @@ def is_text_generation(self) -> bool:
return self.meta.model_type == SupportedModelTypes.TEXT_GENERATION

@classmethod
def determine_model_class(cls, meta, local_dir):
def determine_model_class(cls, meta, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None):
class_file = Path(local_dir) / MODEL_CLASS_PKL
if class_file.exists():
with open(class_file, "rb") as f:
clazz = cloudpickle.load(f)
try:
# According to https://github.com/cloudpipe/cloudpickle#cloudpickle:
# Cloudpickle can only be used to send objects between the exact same version of Python.
clazz = cloudpickle.load(f)
except Exception as e:
raise python_env_exception_helper(cls.__name__, e, required_py_ver=model_py_ver)
if not issubclass(clazz, BaseModel):
raise ValueError(f"Unknown model class: {clazz}. Models should inherit from 'BaseModel' class")
return clazz
Expand Down Expand Up @@ -440,7 +445,7 @@ def download(cls, client: Optional[GiskardClient], project_key, model_id):
if client is None:
# internal worker case, no token based http client [deprecated, to be removed]
assert local_dir.exists(), f"Cannot find existing model {project_key}.{model_id} in {local_dir}"
_, meta = cls.read_meta_from_local_dir(local_dir)
meta_response, meta = cls.read_meta_from_local_dir(local_dir)
else:
client.load_artifact(local_dir, posixpath.join(project_key, "models", model_id))
meta_response: ModelMetaInfo = client.load_model_meta(project_key, model_id)
Expand All @@ -461,19 +466,23 @@ def download(cls, client: Optional[GiskardClient], project_key, model_id):
loader_class=file_meta["loader_class"],
)

clazz = cls.determine_model_class(meta, local_dir)
model_py_ver = (
tuple(meta_response.languageVersion.split(".")) if "PYTHON" == meta_response.language.upper() else None
)

clazz = cls.determine_model_class(meta, local_dir, model_py_ver=model_py_ver)

constructor_params = meta.__dict__
constructor_params["id"] = str(model_id)

del constructor_params["loader_module"]
del constructor_params["loader_class"]

model = clazz.load(local_dir, **constructor_params)
model = clazz.load(local_dir, model_py_ver=model_py_ver, **constructor_params)
return model

@classmethod
def read_meta_from_local_dir(cls, local_dir):
def read_meta_from_local_dir(cls, local_dir) -> Tuple[ModelMetaInfo, ModelMeta]:
with (Path(local_dir) / META_FILENAME).open(encoding="utf-8") as f:
file_meta = yaml.load(f, Loader=yaml.Loader)
meta = ModelMeta(
Expand All @@ -486,8 +495,26 @@ def read_meta_from_local_dir(cls, local_dir):
loader_module=file_meta["loader_module"],
loader_class=file_meta["loader_class"],
)
# dirty implementation to return id like this, to be decided if meta properties can just be BaseModel properties
return file_meta["id"], meta

# Bring more information, such as language and language version
extra_meta = ModelMetaInfo(
id=file_meta["id"],
name=meta.name,
modelType=file_meta["model_type"],
featureNames=meta.feature_names if meta.feature_names is not None else [],
threshold=meta.classification_threshold,
description=meta.description,
classificationLabels=meta.classification_labels
if meta.classification_labels is None
else list(map(str, meta.classification_labels)),
languageVersion=file_meta["language_version"],
language=file_meta["language"],
size=file_meta["size"],
classificationLabelsDtype=None,
createdDate="",
projectId=-1,
)
return extra_meta, meta

@classmethod
def cast_labels(cls, meta_response: ModelMetaInfo) -> List[Union[str, Type]]:
Expand All @@ -499,7 +526,7 @@ def cast_labels(cls, meta_response: ModelMetaInfo) -> List[Union[str, Type]]:
return labels_

@classmethod
def load(cls, local_dir, **kwargs):
def load(cls, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None, **kwargs):
class_file = Path(local_dir) / MODEL_CLASS_PKL
model_id, meta = cls.read_meta_from_local_dir(local_dir)

Expand All @@ -510,7 +537,12 @@ def load(cls, local_dir, **kwargs):

if class_file.exists():
with open(class_file, "rb") as f:
clazz = cloudpickle.load(f)
try:
# According to https://github.com/cloudpipe/cloudpickle#cloudpickle:
# Cloudpickle can only be used to send objects between the exact same version of Python.
clazz = cloudpickle.load(f)
except Exception as e:
raise python_env_exception_helper(cls.__name__, e, required_py_ver=model_py_ver)
clazz_kwargs = {}
clazz_kwargs.update(constructor_params)
clazz_kwargs.update(kwargs)
Expand Down
13 changes: 10 additions & 3 deletions giskard/models/base/serialization.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import pickle
from pathlib import Path
from typing import Union
from typing import Optional, Tuple, Union

import cloudpickle
import mlflow

from giskard.ml_worker.exceptions.giskard_exception import python_env_exception_helper

from .wrapper import WrapperModel


Expand Down Expand Up @@ -49,12 +51,17 @@ def save_model(self, local_path: Union[str, Path]) -> None:
)

@classmethod
def load_model(cls, local_dir):
def load_model(cls, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None):
local_path = Path(local_dir)
model_path = local_path / "model.pkl"
if model_path.exists():
with open(model_path, "rb") as f:
model = cloudpickle.load(f)
try:
# According to https://github.com/cloudpipe/cloudpickle#cloudpickle:
# Cloudpickle can only be used to send objects between the exact same version of Python.
model = cloudpickle.load(f)
except Exception as e:
raise python_env_exception_helper(cls.__name__, e, required_py_ver=model_py_ver)
return model
else:
raise ValueError(
Expand Down
38 changes: 29 additions & 9 deletions giskard/models/base/wrapper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Callable, Iterable, Optional, Union
from typing import Any, Callable, Iterable, Optional, Tuple, Union

import logging
import pickle
Expand All @@ -16,6 +16,7 @@
from ...core.validation import configured_validate_arguments
from ..utils import warn_once
from .model import BaseModel
from giskard.ml_worker.exceptions.giskard_exception import python_env_exception_helper

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -242,35 +243,44 @@ def save_wrapper_meta(self, local_path):
)

@classmethod
def load(cls, local_dir, **kwargs):
def load(cls, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None, **kwargs):
constructor_params = cls.load_constructor_params(local_dir, **kwargs)

return cls(model=cls.load_model(local_dir), **constructor_params)
if model_py_ver is None:
# Try to extract Python version from meta info under local dir
meta_response, _ = cls.read_meta_from_local_dir(local_dir)
model_py_ver = (
tuple(meta_response.languageVersion.split(".")) if "PYTHON" == meta_response.language.upper() else None
)

return cls(model=cls.load_model(local_dir, model_py_ver), **constructor_params)

@classmethod
def load_constructor_params(cls, local_dir, **kwargs):
def load_constructor_params(cls, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None, **kwargs):
params = cls.load_wrapper_meta(local_dir)
params["data_preprocessing_function"] = cls.load_data_preprocessing_function(local_dir)
params["model_postprocessing_function"] = cls.load_model_postprocessing_function(local_dir)
params.update(kwargs)

model_id, meta = cls.read_meta_from_local_dir(local_dir)
extra_meta, meta = cls.read_meta_from_local_dir(local_dir)
constructor_params = meta.__dict__
constructor_params["id"] = model_id
constructor_params["id"] = extra_meta.id
constructor_params = constructor_params.copy()
constructor_params.update(params)

return constructor_params

@classmethod
@abstractmethod
def load_model(cls, path: Union[str, Path]):
def load_model(cls, path: Union[str, Path], model_py_ver: Optional[Tuple[str, str, str]] = None):
"""Loads the wrapped ``model`` object.

Parameters
----------
path : Union[str, Path]
Path from which the model should be loaded.
model_py_ver : Optional[Tuple[str, str, str]]
Python version used to save the model, to validate if model loading failed.
"""
...

Expand All @@ -280,7 +290,12 @@ def load_data_preprocessing_function(cls, local_path: Union[str, Path]):
file_path = local_path / "giskard-data-preprocessing-function.pkl"
if file_path.exists():
with open(file_path, "rb") as f:
return cloudpickle.load(f)
try:
# According to https://github.com/cloudpipe/cloudpickle#cloudpickle:
# Cloudpickle can only be used to send objects between the exact same version of Python.
return cloudpickle.load(f)
except Exception as e:
raise python_env_exception_helper("Data Preprocessing Function", e)
return None

@classmethod
Expand All @@ -289,7 +304,12 @@ def load_model_postprocessing_function(cls, local_path: Union[str, Path]):
file_path = local_path / "giskard-model-postprocessing-function.pkl"
if file_path.exists():
with open(file_path, "rb") as f:
return cloudpickle.load(f)
try:
# According to https://github.com/cloudpipe/cloudpickle#cloudpickle:
# Cloudpickle can only be used to send objects between the exact same version of Python.
return cloudpickle.load(f)
except Exception as e:
raise python_env_exception_helper("Data Postprocessing Function", e)
return None

@classmethod
Expand Down
3 changes: 2 additions & 1 deletion giskard/models/catboost.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import Optional, Tuple
import mlflow

from .sklearn import SKLearnModel
Expand All @@ -12,7 +13,7 @@ def save_model(self, local_path, mlflow_meta: mlflow.models.Model):
mlflow.catboost.save_model(self.model, path=local_path, mlflow_model=mlflow_meta)

@classmethod
def load_model(cls, local_dir):
def load_model(cls, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None):
return mlflow.catboost.load_model(local_dir)

def to_mlflow(self, artifact_path: str = "catboost-model-from-giskard", **kwargs):
Expand Down
4 changes: 2 additions & 2 deletions giskard/models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class explicitly using :class:`giskard.models.huggingface.HuggingFaceModel`.
the `model_postprocessing_function` argument. This function should take the
raw output of your model and return a numpy array of probabilities.
"""
from typing import Any, Callable, Iterable, Optional, Union
from typing import Any, Callable, Iterable, Optional, Tuple, Union

import logging
from pathlib import Path
Expand Down Expand Up @@ -196,7 +196,7 @@ def __init__(
pass

@classmethod
def load_model(cls, local_path):
def load_model(cls, local_path, model_py_ver: Optional[Tuple[str, str, str]] = None):
huggingface_meta_file = Path(local_path) / "giskard-model-huggingface-meta.yaml"
if huggingface_meta_file.exists():
with open(huggingface_meta_file) as f:
Expand Down
8 changes: 4 additions & 4 deletions giskard/models/langchain.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Any, Callable, Iterable, Optional, Union, Dict
from typing import Any, Callable, Iterable, Optional, Tuple, Union, Dict

import pandas as pd

Expand Down Expand Up @@ -57,16 +57,16 @@ def save_artifacts(self, artifact_dir) -> None:
...

@classmethod
def load(cls, local_dir, **kwargs):
def load(cls, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None, **kwargs):
constructor_params = cls.load_constructor_params(local_dir, **kwargs)

artifacts = cls.load_artifacts(Path(local_dir) / "artifacts") or dict()
constructor_params.update(artifacts)

return cls(model=cls.load_model(local_dir, **artifacts), **constructor_params)
return cls(model=cls.load_model(local_dir, model_py_ver=model_py_ver, **artifacts), **constructor_params)

@classmethod
def load_model(cls, local_dir, **kwargs):
def load_model(cls, local_dir, model_py_ver: Optional[Tuple[str, str, str]] = None, **kwargs):
from langchain.chains import load_chain

path = Path(local_dir)
Expand Down
Loading
Loading