From 6cf02b328ee198178bc549d1de3517e30d23fafa Mon Sep 17 00:00:00 2001
From: Emeli Dral <emeli.dral@gmail.com>
Date: Wed, 23 Aug 2023 00:33:45 +0100
Subject: [PATCH] Added examples of snapshots usage and tests criticality
 customization (#730)

Co-authored-by: Emeli Dral
---
 .../how_to_specify_test_critycality.ipynb     | 151 ++++++++++++++
 .../how_to_use_snapshots.ipynb                | 197 ++++++++++++++++++
 2 files changed, 348 insertions(+)
 create mode 100644 examples/how_to_questions/how_to_specify_test_critycality.ipynb
 create mode 100644 examples/how_to_questions/how_to_use_snapshots.ipynb

diff --git a/examples/how_to_questions/how_to_specify_test_critycality.ipynb b/examples/how_to_questions/how_to_specify_test_critycality.ipynb
new file mode 100644
index 0000000000..deadccaab4
--- /dev/null
+++ b/examples/how_to_questions/how_to_specify_test_critycality.ipynb
@@ -0,0 +1,151 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3eab4051-27cc-4d53-b9d5-bc4c4b69ba8c",
+   "metadata": {},
+   "source": [
+    "# How to specify test criticality?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ef729d6-5eae-4189-9f12-529ea76817a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "from sklearn import datasets\n",
+    "\n",
+    "from evidently.test_suite import TestSuite\n",
+    "from evidently.tests import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4333330d-c83c-44dc-9107-9fc2c933f7b0",
+   "metadata": {},
+   "source": [
+    "## Toy Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "75d19dbf-ba6c-4c90-b25c-f14772878da1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "adult_data = datasets.fetch_openml(name='adult', version=2, as_frame='auto')\n",
+    "adult = adult_data.frame\n",
+    "\n",
+    "adult_ref = adult[~adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
+    "adult_cur = adult[adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
+    "\n",
+    "adult_cur.iloc[:2000, 3:5] = np.nan"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c488a2b-2113-40cc-8212-e0db0afbce2e",
+   "metadata": {},
+   "source": [
+    "## Test suite with default criticality "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27447dec-f355-40cc-829f-136eae5baabc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_integrity_column_tests = TestSuite(tests=[\n",
+    "    TestColumnNumberOfMissingValues(column_name='education'),\n",
+    "    TestColumnShareOfMissingValues(column_name='education'),\n",
+    "    TestColumnNumberOfDifferentMissingValues(column_name='education'),\n",
+    "    TestColumnAllConstantValues(column_name='education'),\n",
+    "    TestColumnAllUniqueValues(column_name='education'),\n",
+    "    TestColumnRegExp(column_name='education',reg_exp='^[0..9]'),\n",
+    "    TestCategoryShare(column_name='education', category='Some-college', lt=0.5),\n",
+    "    TestCategoryShare(column_name='age', category=27., lt=0.5)\n",
+    "])\n",
+    "\n",
+    "data_integrity_column_tests.run(reference_data=adult_ref, current_data=adult_cur)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "517c9f2a-2d5c-4b59-abeb-624bdfbbb119",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_integrity_column_tests.show(mode='inline')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "664803a0-14d4-4c9a-b8d0-1a1932b0f3e5",
+   "metadata": {},
+   "source": [
+    "## Test suite with custom criticality"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "42c679b3-c514-429d-85f7-e51f5091e5b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_integrity_column_tests = TestSuite(tests=[\n",
+    "    TestColumnNumberOfMissingValues(column_name='education'),\n",
+    "    TestColumnShareOfMissingValues(column_name='education'),\n",
+    "    TestColumnNumberOfDifferentMissingValues(column_name='education'),\n",
+    "    TestColumnAllConstantValues(column_name='education', is_critical=False),\n",
+    "    TestColumnAllUniqueValues(column_name='education', is_critical=False),\n",
+    "    TestColumnRegExp(column_name='education',reg_exp='^[0..9]'),\n",
+    "    TestCategoryShare(column_name='education', category='Some-college', lt=0.5),\n",
+    "    TestCategoryShare(column_name='age', category=27., lt=0.5)\n",
+    "])\n",
+    "\n",
+    "data_integrity_column_tests.run(reference_data=adult_ref, current_data=adult_cur)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "deffbb70-c7b9-411c-9798-7273545b6b4f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_integrity_column_tests.show(mode='inline')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/how_to_questions/how_to_use_snapshots.ipynb b/examples/how_to_questions/how_to_use_snapshots.ipynb
new file mode 100644
index 0000000000..4e2d4f3550
--- /dev/null
+++ b/examples/how_to_questions/how_to_use_snapshots.ipynb
@@ -0,0 +1,197 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c2dddbfb-e26f-47d2-bdad-8c5083e85ff6",
+   "metadata": {},
+   "source": [
+    "# How to use Snapshots?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2b18ee6f-4602-4c37-ad3b-965efbf91c4e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "from sklearn import datasets\n",
+    "\n",
+    "from evidently.report import Report\n",
+    "from evidently.metric_preset import DataDriftPreset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6d078542-9c09-48e7-b749-e3ec99e27fc4",
+   "metadata": {},
+   "source": [
+    "## Toy Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "66256f1e-f713-47c3-b06f-f62487c4e01c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bcancer_data = datasets.load_breast_cancer(as_frame=True)\n",
+    "bcancer = bcancer_data.frame\n",
+    "\n",
+    "bcancer_ref = bcancer.sample(n=300, replace=False)\n",
+    "bcancer_cur = bcancer.sample(n=200, replace=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "53421e97-f2a6-4e8d-9bb6-e6e1e1a83f91",
+   "metadata": {},
+   "source": [
+    "## Data Drift Report"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c5899d62-cf2a-4942-abe0-3ff54bdabda3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_drift_report = Report(\n",
+    "    metrics=[DataDriftPreset()]\n",
+    ")\n",
+    "\n",
+    "data_drift_report.run(reference_data=bcancer_ref, current_data=bcancer_cur)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "601555e3-3b06-436f-a331-889cbc51a2e5",
+   "metadata": {},
+   "source": [
+    "## Report Formats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72f47a4a-0de3-4c6f-b286-41d4652f25af",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_drift_report.show(mode='inline')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f341f7bd-f95e-4d48-aa0f-d2241fe23d72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_drift_report.json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e93d1da-a0f2-4ee3-b38d-98c009d19782",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_drift_report.as_dict()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "05ae3843-37ff-4de5-a1f5-b34be2639d2c",
+   "metadata": {},
+   "source": [
+    "## Report saving options"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "77fbc338-a992-4973-9f09-578605afab09",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_drift_report.save_html('data_drift_report.html')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ad9643c-7ef2-47b6-97d9-6d83da8f3d61",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_drift_report.save_json('data_drift_report.json')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f74ebd6d-a0cf-486d-a1a1-d0bbd41a43b6",
+   "metadata": {},
+   "source": [
+    "## Snapshot save and load "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ad8db16-c235-44e0-b201-5469e394c753",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_drift_report.save('snapshot.json')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dcb8c8ab-b106-4579-a4b4-f6b2641c5f88",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loaded_report = Report.load('snapshot.json')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2052c76e-4b89-461f-901b-c7dc3ca1dde0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loaded_report.show(mode='inline')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}