Skip to content

Commit

Permalink
Merge branch 'main' into feature/gsk-2203-move-non-ml-worker-related-…
Browse files Browse the repository at this point in the history
…code-outside-from-ml_worker
  • Loading branch information
kevinmessiaen committed Jan 4, 2024
2 parents a40d5b7 + d046450 commit 8c1f44b
Show file tree
Hide file tree
Showing 66 changed files with 4,543 additions and 3,236 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/build-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ jobs:
PYTEST_XDIST_AUTO_NUM_WORKERS: 2

- name: SonarCloud Scan
if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' && !matrix.langchain_minimal && !matrix.pandas_v1 && !matrix.pydantic_v1 && (github.event.ref == 'refs/heads/main' || github.event_name == 'pull_request')}}
uses: SonarSource/sonarcloud-github-action@v2.0.2
if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' && !matrix.langchain_minimal && !matrix.pandas_v1 && !matrix.pydantic_v1 && (github.event.ref == 'refs/heads/main' || github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository)}}
uses: SonarSource/sonarcloud-github-action@v2.1.1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
Expand All @@ -175,7 +175,7 @@ jobs:

- name: "Python client: archive built artifacts"
if: ${{ github.event_name == 'push' && matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
path: dist/*whl

Expand All @@ -187,7 +187,7 @@ jobs:

- name: "Memory csv"
if: ${{ always() && matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' && !matrix.langchain_minimal && !matrix.pandas_v1 && !matrix.pydantic_v1 }}
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
path: memory*.csv
name: memory-usage
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/do-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,15 @@ jobs:
"version": "${{ github.ref_name }}",
"ref": "${{ github.ref }}"
}
- name: Post to a Slack channel
id: slack
uses: slackapi/[email protected]
with:
# Slack channel id, channel name, or user id to post message.
# See also: https://api.slack.com/methods/chat.postMessage#channels
channel-id: 'C02Q2772VEH'
slack-message: "@channel New release: ${{ github.ref_name }} is being made !:tada:\nRelease notes there: ${{ steps.github-release.outputs.url }}"
# For posting a rich message using Block Kit
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
22 changes: 22 additions & 0 deletions .github/workflows/pre-commit-checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Pre-commit checks
on:
push:
branches:
- main
pull_request:
workflow_dispatch:

env:
GSK_DISABLE_ANALYTICS: true
SENTRY_ENABLED: false
defaults:
run:
shell: bash
jobs:
pre-commit:
name: Pre-commit checks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: pre-commit/[email protected]
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ If you are interested in learning more about Giskard's premium offering, please

To start the **Giskard hub**, run the following command:
```sh
pip install giskard[hub] -U
pip install "giskard[hub]" -U
giskard hub start
```

Expand Down
2 changes: 1 addition & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def pytest_addoption(parser: Parser):

def separate_process(item: Function) -> List[TestReport]:
with NamedTemporaryFile(delete=False) as fp:
proc = subprocess.run(
subprocess.run(
shell=True,
check=False,
stdout=sys.stdout,
Expand Down
6 changes: 6 additions & 0 deletions docs/community/contribution_guidelines/dev-environment.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ brew install pre-commit
pre-commit install
```

## Run pre-commit hook manually to fix easy issues
In case the build is failing because of the pre-commit checks that don't pass it's possible to fix easy issues by running
```sh
pre-commit run --all-files
```
and then committing the fixed files

## Troubleshooting

Expand Down
13 changes: 13 additions & 0 deletions docs/reference/tests/data.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Data quality tests
^^^^^^^^^^^^^^^^^^^^^
.. autofunction:: giskard.testing.test_data_uniqueness
.. autofunction:: giskard.testing.test_data_completeness
.. autofunction:: giskard.testing.test_valid_range
.. autofunction:: giskard.testing.test_valid_values
.. autofunction:: giskard.testing.test_data_correlation
.. autofunction:: giskard.testing.test_outlier_value
.. autofunction:: giskard.testing.test_foreign_constraint
.. autofunction:: giskard.testing.test_label_consistency
.. autofunction:: giskard.testing.test_mislabeling
.. autofunction:: giskard.testing.test_feature_importance
.. autofunction:: giskard.testing.test_class_imbalance
3 changes: 2 additions & 1 deletion docs/reference/tests/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ Tests
statistic
performance
drift
llm
llm
data
3 changes: 2 additions & 1 deletion docs/reference/tests/llm.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ LLM tests
Injections
----------
.. autofunction:: giskard.testing.tests.llm.test_llm_char_injection
.. autofunction:: giskard.testing.tests.llm.test_llm_prompt_injection
.. autofunction:: giskard.testing.tests.llm.test_llm_single_output_against_strings
.. autofunction:: giskard.testing.tests.llm.test_llm_output_against_strings

LLM-as-a-judge
--------------
Expand Down
11 changes: 11 additions & 0 deletions giskard/client/giskard_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,17 @@ def load_model_meta(self, project_key: str, uuid: str) -> ModelMetaInfo:
def load_dataset_meta(self, project_key: str, uuid: str) -> DatasetMeta:
res = self._session.get(f"project/{project_key}/datasets/{uuid}").json()
info = DatasetMetaInfo.parse_obj(res) # Used for validation, and avoid extraand typos
analytics.track(
"hub:dataset:download",
{
"project": anonymize(project_key),
"name": anonymize(info.name),
"target": anonymize(info.target),
"columnTypes": anonymize(info.columnTypes),
"columnDtypes": anonymize(info.columnDtypes),
"nb_rows": info.numberOfRows,
},
)
return DatasetMeta(
name=info.name,
target=info.target,
Expand Down
6 changes: 4 additions & 2 deletions giskard/commands/cli_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import os
import time
from pathlib import Path
from typing import Optional

import click
import docker
Expand All @@ -14,6 +13,7 @@
from packaging import version
from packaging.version import InvalidVersion, Version
from tenacity import retry, wait_exponential
from typing import Optional

import giskard
from giskard.cli_utils import common_options
Expand Down Expand Up @@ -192,7 +192,9 @@ def _pull_image(version):
if not _check_downloaded(version):
logger.info(f"Downloading image for version {version}")
try:
analytics.track("giskard-server:install:start", {"version": version})
create_docker_client().images.pull(IMAGE_NAME, tag=version)
analytics.track("giskard-server:install:success", {"version": version})
except NotFound:
logger.error(
f"Image {get_image_name(version)} not found. Use a valid `--version` argument or check the content of $GSK_HOME/server-settings.yml"
Expand Down Expand Up @@ -334,7 +336,7 @@ def start(attached, skip_version_check, version, environment, env_file):
environment = list(environment)
if env_file is not None:
with open(env_file, "r") as f:
environment = f.readlines() + environment
environment = f.read().splitlines() + environment

_start(attached, skip_version_check, version, environment)

Expand Down
17 changes: 7 additions & 10 deletions giskard/core/core.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import typing

import inspect
import json
import logging
from abc import ABC
from dataclasses import dataclass
from enum import Enum
from pathlib import Path

import typing
from griffe import Docstring
from griffe.docstrings.dataclasses import (
DocstringSection,
Expand Down Expand Up @@ -174,7 +172,7 @@ class CallableMeta(SavableMeta, ABC):
name: str
display_name: str
module: str
doc: str
doc: CallableDocumentation
module_doc: str
tags: List[str]
version: Optional[int]
Expand Down Expand Up @@ -275,14 +273,14 @@ def extract_code(self, callable_obj):
return code

@staticmethod
def default_doc(description: str) -> str:
def default_doc(description: str) -> CallableDocumentation:
doc = CallableDocumentation()
doc.description = description
doc.parameters = {}
return json.dumps(doc.to_dict())
return doc

@staticmethod
def extract_doc(func) -> Optional[str]:
def extract_doc(func) -> Optional[CallableDocumentation]:
if not func.__doc__:
return None

Expand Down Expand Up @@ -328,16 +326,15 @@ def extract_doc(func) -> Optional[str]:
else:
logger.warning(f"Unexpected documentation element for {func.__name__}: {d.kind}")

func_doc = json.dumps(res.to_dict())
return func_doc
return res

def to_json(self):
return {
"uuid": self.uuid,
"name": self.name,
"display_name": self.display_name,
"module": self.module,
"doc": self.doc,
"doc": self.doc.to_dict() if self.doc else None,
"module_doc": self.module_doc,
"code": self.code,
"tags": self.tags,
Expand Down
58 changes: 37 additions & 21 deletions giskard/core/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
from giskard.registry.transformation_function import TransformationFunction
from giskard.core.test_result import TestMessage, TestMessageLevel, TestResult
from giskard.models.base import BaseModel
from ..client.python_utils import warning
from ..utils.analytics_collector import analytics
from ..utils.artifacts import serialize_parameter

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -121,7 +124,6 @@ def to_wandb(self, run: Optional["wandb.wandb_sdk.wandb_run.Run"] = None) -> Non
except ImportError as e:
raise GiskardImportError("wandb") from e
from ..integrations.wandb.wandb_utils import _parse_test_name, get_wandb_run
from ..utils.analytics_collector import analytics

run = get_wandb_run(run)
# Log just a test description and a metric.
Expand Down Expand Up @@ -229,17 +231,15 @@ def single_binary_result(test_results: List):
return all(res.passed for res in test_results)


def build_test_input_dto(client, p, pname, ptype, project_key, uploaded_uuids):
def build_test_input_dto(client, p, pname, ptype, project_key, uploaded_uuid_status: Dict[str, bool]):
if issubclass(type(p), Dataset) or issubclass(type(p), BaseModel):
if str(p.id) not in uploaded_uuids:
p.upload(client, project_key)
uploaded_uuids.append(str(p.id))
return TestInputDTO(name=pname, value=str(p.id), type=ptype)
if _try_upload_artifact(p, client, project_key, uploaded_uuid_status):
return TestInputDTO(name=pname, value=str(p.id), type=ptype)
else:
return TestInputDTO(name=pname, value=pname, is_alias=True, type=ptype)
elif issubclass(type(p), Artifact):
if str(p.meta.uuid) not in uploaded_uuids:
p.upload(client, None if "giskard" in p.meta.tags else project_key)

uploaded_uuids.append(str(p.meta.uuid))
if not _try_upload_artifact(p, client, None if "giskard" in p.meta.tags else project_key, uploaded_uuid_status):
return TestInputDTO(name=pname, value=pname, is_alias=True, type=ptype)

kwargs_params = [
f"kwargs[{pname}] = {repr(value)}" for pname, value in p.params.items() if pname not in p.meta.args
Expand All @@ -261,7 +261,7 @@ def build_test_input_dto(client, p, pname, ptype, project_key, uploaded_uuids):
pname,
p.meta.args[pname].type,
project_key,
uploaded_uuids,
uploaded_uuid_status,
)
for pname, value in p.params.items()
if pname in p.meta.args
Expand Down Expand Up @@ -426,25 +426,25 @@ def upload(self, client: GiskardClient, project_key: str):
if self.name is None:
self.name = "Unnamed test suite"

uploaded_uuids: List[str] = []
uploaded_uuid_status: Dict[str, bool] = dict()

# Upload the default parameters if they are model or dataset
for arg in self.default_params.values():
if isinstance(arg, BaseModel) or isinstance(arg, Dataset):
arg.upload(client, project_key)
uploaded_uuids.append(str(arg.id))
_try_upload_artifact(arg, client, project_key, uploaded_uuid_status)

self.id = client.save_test_suite(self.to_dto(client, project_key, uploaded_uuid_status))

self.id = client.save_test_suite(self.to_dto(client, project_key, uploaded_uuids))
project_id = client.get_project(project_key).project_id
print(f"Test suite has been saved: {client.host_url}/main/projects/{project_id}/test-suite/{self.id}/overview")
print(f"Test suite has been saved: {client.host_url}/main/projects/{project_key}/test-suite/{self.id}/overview")
analytics.track("hub:test_suite:uploaded")
return self

def to_dto(self, client: GiskardClient, project_key: str, uploaded_uuids: Optional[List[str]] = None):
def to_dto(self, client: GiskardClient, project_key: str, uploaded_uuid_status: Optional[Dict[str, bool]] = None):
suite_tests: List[SuiteTestDTO] = list()

# Avoid to upload the same artifacts several times
if uploaded_uuids is None:
uploaded_uuids = []
if uploaded_uuid_status is None:
uploaded_uuid_status = dict()

for t in self.tests:
params = dict(
Expand All @@ -455,7 +455,7 @@ def to_dto(self, client: GiskardClient, project_key: str, uploaded_uuids: Option
pname,
t.giskard_test.meta.args[pname].type,
project_key,
uploaded_uuids,
uploaded_uuid_status,
)
for pname, p in t.provided_inputs.items()
if pname in t.giskard_test.meta.args
Expand Down Expand Up @@ -678,3 +678,19 @@ def format_test_result(result: Union[bool, TestResult]) -> str:
return f"{{{'passed' if result.passed else 'failed'}, metric={result.metric}}}"
else:
return "passed" if result else "failed"


def _try_upload_artifact(artifact, client, project_key: str, uploaded_uuid_status: Dict[str, bool]) -> bool:
artifact_id = serialize_parameter(artifact)

if artifact_id not in uploaded_uuid_status:
try:
artifact.upload(client, project_key)
uploaded_uuid_status[artifact_id] = True
except: # noqa NOSONAR
warning(
f"Failed to upload {str(artifact)} used in the test suite. The test suite will be partially uploaded."
)
uploaded_uuid_status[artifact_id] = False

return uploaded_uuid_status[artifact_id]
5 changes: 2 additions & 3 deletions giskard/datasets/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
TransformationFunctionType,
)
from giskard.settings import settings

from ...utils.analytics_collector import analytics
from ..metadata.indexing import ColumnMetadataMixin
from ...utils.file_utils import get_file_name

Expand All @@ -36,7 +38,6 @@
except ImportError:
pass


SAMPLE_SIZE = 1000

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -228,7 +229,6 @@ def __init__(
}

self.data_processor = DataProcessor()

logger.info("Your 'pandas.DataFrame' is successfully wrapped by Giskard's 'Dataset' wrapper class.")

@property
Expand Down Expand Up @@ -741,7 +741,6 @@ def to_wandb(self, run: Optional["wandb.wandb_sdk.wandb_run.Run"] = None) -> Non
except ImportError as e:
raise GiskardImportError("wandb") from e
from ...integrations.wandb.wandb_utils import get_wandb_run
from ...utils.analytics_collector import analytics

run = get_wandb_run(run)

Expand Down
Loading

0 comments on commit 8c1f44b

Please sign in to comment.