From 6cf02b328ee198178bc549d1de3517e30d23fafa Mon Sep 17 00:00:00 2001 From: Emeli Dral Date: Wed, 23 Aug 2023 00:33:45 +0100 Subject: [PATCH] Added examples of snapshots usage and tests criticality customization (#730) Co-authored-by: Emeli Dral --- .../how_to_specify_test_critycality.ipynb | 151 ++++++++++++++ .../how_to_use_snapshots.ipynb | 197 ++++++++++++++++++ 2 files changed, 348 insertions(+) create mode 100644 examples/how_to_questions/how_to_specify_test_critycality.ipynb create mode 100644 examples/how_to_questions/how_to_use_snapshots.ipynb diff --git a/examples/how_to_questions/how_to_specify_test_critycality.ipynb b/examples/how_to_questions/how_to_specify_test_critycality.ipynb new file mode 100644 index 0000000000..deadccaab4 --- /dev/null +++ b/examples/how_to_questions/how_to_specify_test_critycality.ipynb @@ -0,0 +1,151 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3eab4051-27cc-4d53-b9d5-bc4c4b69ba8c", + "metadata": {}, + "source": [ + "# How to specify test criticality?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ef729d6-5eae-4189-9f12-529ea76817a7", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from sklearn import datasets\n", + "\n", + "from evidently.test_suite import TestSuite\n", + "from evidently.tests import *" + ] + }, + { + "cell_type": "markdown", + "id": "4333330d-c83c-44dc-9107-9fc2c933f7b0", + "metadata": {}, + "source": [ + "## Toy Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75d19dbf-ba6c-4c90-b25c-f14772878da1", + "metadata": {}, + "outputs": [], + "source": [ + "adult_data = datasets.fetch_openml(name='adult', version=2, as_frame='auto')\n", + "adult = adult_data.frame\n", + "\n", + "adult_ref = adult[~adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n", + "adult_cur = adult[adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n", + "\n", + "adult_cur.iloc[:2000, 3:5] = np.nan" + ] + }, + { + "cell_type": "markdown", + "id": "5c488a2b-2113-40cc-8212-e0db0afbce2e", + "metadata": {}, + "source": [ + "## Test suite with default criticality " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27447dec-f355-40cc-829f-136eae5baabc", + "metadata": {}, + "outputs": [], + "source": [ + "data_integrity_column_tests = TestSuite(tests=[\n", + " TestColumnNumberOfMissingValues(column_name='education'),\n", + " TestColumnShareOfMissingValues(column_name='education'),\n", + " TestColumnNumberOfDifferentMissingValues(column_name='education'),\n", + " TestColumnAllConstantValues(column_name='education'),\n", + " TestColumnAllUniqueValues(column_name='education'),\n", + " TestColumnRegExp(column_name='education',reg_exp='^[0..9]'),\n", + " TestCategoryShare(column_name='education', category='Some-college', lt=0.5),\n", + " TestCategoryShare(column_name='age', category=27., lt=0.5)\n", + "])\n", + "\n", + "data_integrity_column_tests.run(reference_data=adult_ref, current_data=adult_cur)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "517c9f2a-2d5c-4b59-abeb-624bdfbbb119", + "metadata": {}, + "outputs": [], + "source": [ + "data_integrity_column_tests.show(mode='inline')" + ] + }, + { + "cell_type": "markdown", + "id": "664803a0-14d4-4c9a-b8d0-1a1932b0f3e5", + "metadata": {}, + "source": [ + "## Test suite with custom criticality" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42c679b3-c514-429d-85f7-e51f5091e5b8", + "metadata": {}, + "outputs": [], + "source": [ + "data_integrity_column_tests = TestSuite(tests=[\n", + " TestColumnNumberOfMissingValues(column_name='education'),\n", + " TestColumnShareOfMissingValues(column_name='education'),\n", + " TestColumnNumberOfDifferentMissingValues(column_name='education'),\n", + " TestColumnAllConstantValues(column_name='education', is_critical=False),\n", + " TestColumnAllUniqueValues(column_name='education', is_critical=False),\n", + " TestColumnRegExp(column_name='education',reg_exp='^[0..9]'),\n", + " TestCategoryShare(column_name='education', category='Some-college', lt=0.5),\n", + " TestCategoryShare(column_name='age', category=27., lt=0.5)\n", + "])\n", + "\n", + "data_integrity_column_tests.run(reference_data=adult_ref, current_data=adult_cur)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "deffbb70-c7b9-411c-9798-7273545b6b4f", + "metadata": {}, + "outputs": [], + "source": [ + "data_integrity_column_tests.show(mode='inline')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/how_to_questions/how_to_use_snapshots.ipynb b/examples/how_to_questions/how_to_use_snapshots.ipynb new file mode 100644 index 0000000000..4e2d4f3550 --- /dev/null +++ b/examples/how_to_questions/how_to_use_snapshots.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c2dddbfb-e26f-47d2-bdad-8c5083e85ff6", + "metadata": {}, + "source": [ + "# How to use Snapshots?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b18ee6f-4602-4c37-ad3b-965efbf91c4e", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from sklearn import datasets\n", + "\n", + "from evidently.report import Report\n", + "from evidently.metric_preset import DataDriftPreset" + ] + }, + { + "cell_type": "markdown", + "id": "6d078542-9c09-48e7-b749-e3ec99e27fc4", + "metadata": {}, + "source": [ + "## Toy Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66256f1e-f713-47c3-b06f-f62487c4e01c", + "metadata": {}, + "outputs": [], + "source": [ + "bcancer_data = datasets.load_breast_cancer(as_frame=True)\n", + "bcancer = bcancer_data.frame\n", + "\n", + "bcancer_ref = bcancer.sample(n=300, replace=False)\n", + "bcancer_cur = bcancer.sample(n=200, replace=False)" + ] + }, + { + "cell_type": "markdown", + "id": "53421e97-f2a6-4e8d-9bb6-e6e1e1a83f91", + "metadata": {}, + "source": [ + "## Data Drift Report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5899d62-cf2a-4942-abe0-3ff54bdabda3", + "metadata": {}, + "outputs": [], + "source": [ + "data_drift_report = Report(\n", + " metrics=[DataDriftPreset()]\n", + ")\n", + "\n", + "data_drift_report.run(reference_data=bcancer_ref, current_data=bcancer_cur)" + ] + }, + { + "cell_type": "markdown", + "id": "601555e3-3b06-436f-a331-889cbc51a2e5", + "metadata": {}, + "source": [ + "## Report Formats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72f47a4a-0de3-4c6f-b286-41d4652f25af", + "metadata": {}, + "outputs": [], + "source": [ + "data_drift_report.show(mode='inline')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f341f7bd-f95e-4d48-aa0f-d2241fe23d72", + "metadata": {}, + "outputs": [], + "source": [ + "data_drift_report.json()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e93d1da-a0f2-4ee3-b38d-98c009d19782", + "metadata": {}, + "outputs": [], + "source": [ + "data_drift_report.as_dict()" + ] + }, + { + "cell_type": "markdown", + "id": "05ae3843-37ff-4de5-a1f5-b34be2639d2c", + "metadata": {}, + "source": [ + "## Report saving options" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77fbc338-a992-4973-9f09-578605afab09", + "metadata": {}, + "outputs": [], + "source": [ + "data_drift_report.save_html('data_drift_report.html')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ad9643c-7ef2-47b6-97d9-6d83da8f3d61", + "metadata": {}, + "outputs": [], + "source": [ + "data_drift_report.save_json('data_drift_report.json')" + ] + }, + { + "cell_type": "markdown", + "id": "f74ebd6d-a0cf-486d-a1a1-d0bbd41a43b6", + "metadata": {}, + "source": [ + "## Snapshot save and load " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ad8db16-c235-44e0-b201-5469e394c753", + "metadata": {}, + "outputs": [], + "source": [ + "data_drift_report.save('snapshot.json')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcb8c8ab-b106-4579-a4b4-f6b2641c5f88", + "metadata": {}, + "outputs": [], + "source": [ + "loaded_report = Report.load('snapshot.json')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2052c76e-4b89-461f-901b-c7dc3ca1dde0", + "metadata": {}, + "outputs": [], + "source": [ + "loaded_report.show(mode='inline')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}