From 8a5298654d4596ba6ed676729f99fc81d0abf81c Mon Sep 17 00:00:00 2001 From: BenoitDherin Date: Thu, 18 Jan 2024 19:59:02 +0000 Subject: [PATCH 1/2] remove unpaired lab/solution notebooks --- ..._dataset_api.ipynb => 2_dataset_api.ipynb} | 0 .../labs/adv_logistic_reg_TF2.0.ipynb | 2859 ----------------- .../labs/adv_tfdv_facets.ipynb | 595 ---- .../basic_intro_logistic_regression.ipynb | 1104 ------- .../labs/feat.cols_tf.data.ipynb | 1083 ------- .../labs/int_logistic_regression.ipynb | 1460 --------- .../intro_logistic_regression_TF2.0.ipynb | 481 --- .../labs/load_diff_filedata.ipynb | 1327 -------- .../labs/load_images_tf.data.ipynb | 744 ----- .../labs/tfrecord-tf.example.ipynb | 1670 ---------- .../labs/what_if_mortgage.ipynb | 766 ----- .../labs/write_low_level_code.ipynb | 760 ----- ..._dataset_api.ipynb => 2_dataset_api.ipynb} | 0 .../solutions/2b_loading_filedata.ipynb | 992 ------ .../solutions/2c_loading_images.ipynb | 608 ---- .../solutions/2d_loading_tfrecords.ipynb | 1259 -------- 16 files changed, 15708 deletions(-) rename notebooks/introduction_to_tensorflow/labs/{2a_dataset_api.ipynb => 2_dataset_api.ipynb} (100%) delete mode 100644 notebooks/introduction_to_tensorflow/labs/adv_logistic_reg_TF2.0.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/labs/adv_tfdv_facets.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/labs/basic_intro_logistic_regression.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/labs/feat.cols_tf.data.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/labs/int_logistic_regression.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/labs/intro_logistic_regression_TF2.0.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/labs/load_diff_filedata.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/labs/load_images_tf.data.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/labs/tfrecord-tf.example.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/labs/what_if_mortgage.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/labs/write_low_level_code.ipynb rename notebooks/introduction_to_tensorflow/solutions/{2a_dataset_api.ipynb => 2_dataset_api.ipynb} (100%) delete mode 100644 notebooks/introduction_to_tensorflow/solutions/2b_loading_filedata.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/solutions/2c_loading_images.ipynb delete mode 100644 notebooks/introduction_to_tensorflow/solutions/2d_loading_tfrecords.ipynb diff --git a/notebooks/introduction_to_tensorflow/labs/2a_dataset_api.ipynb b/notebooks/introduction_to_tensorflow/labs/2_dataset_api.ipynb similarity index 100% rename from notebooks/introduction_to_tensorflow/labs/2a_dataset_api.ipynb rename to notebooks/introduction_to_tensorflow/labs/2_dataset_api.ipynb diff --git a/notebooks/introduction_to_tensorflow/labs/adv_logistic_reg_TF2.0.ipynb b/notebooks/introduction_to_tensorflow/labs/adv_logistic_reg_TF2.0.ipynb deleted file mode 100644 index 832745d5..00000000 --- a/notebooks/introduction_to_tensorflow/labs/adv_logistic_reg_TF2.0.ipynb +++ /dev/null @@ -1,2859 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "dUeKVCYTbcyT" - }, - "source": [ - "# Advanced Logistic Regression in TensorFlow 2.0 \n", - "\n", - "\n", - "\n", - "## Learning Objectives\n", - "\n", - "1. Load a CSV file using Pandas\n", - "2. Create train, validation, and test sets\n", - "3. Define and train a model using Keras (including setting class weights)\n", - "4. Evaluate the model using various metrics (including precision and recall)\n", - "5. Try common techniques for dealing with imbalanced data like:\n", - " Class weighting and\n", - " Oversampling\n", - "\n", - "\n", - "\n", - "## Introduction \n", - "This lab how to classify a highly imbalanced dataset in which the number of examples in one class greatly outnumbers the examples in another. You will work with the [Credit Card Fraud Detection](https://www.kaggle.com/mlg-ulb/creditcardfraud) dataset hosted on Kaggle. The aim is to detect a mere 492 fraudulent transactions from 284,807 transactions in total. You will use [Keras](../../guide/keras/overview.ipynb) to define the model and [class weights](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/Model) to help the model learn from the imbalanced data. \n", - "\n", - "PENDING LINK UPDATE: Each learning objective will correspond to a __#TODO__ in the [student lab notebook](https://training-data-analyst/courses/machine_learning/deepdive2/image_classification/labs/5_fashion_mnist_class.ipynb) -- try to complete that notebook first before reviewing this solution notebook." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "kRHmSyHxEIhN" - }, - "source": [ - "Start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "JM7hDSNClfoK" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TensorFlow version: 2.1.0\n" - ] - } - ], - "source": [ - "import os\n", - "import tempfile\n", - "\n", - "import matplotlib as mpl\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd\n", - "import seaborn as sns\n", - "import sklearn\n", - "import tensorflow as tf\n", - "from sklearn.metrics import confusion_matrix\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.preprocessing import StandardScaler\n", - "from tensorflow import keras\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the next cell, we're going to customize our Matplot lib visualization figure size and colors. Note that each time Matplotlib loads, it defines a runtime configuration (rc) containing the default styles for every plot element we create. This configuration can be adjusted at any time using the plt.rc convenience routine. " - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "c8o1FHzD-_y_" - }, - "outputs": [], - "source": [ - "mpl.rcParams[\"figure.figsize\"] = (12, 10)\n", - "colors = plt.rcParams[\"axes.prop_cycle\"].by_key()[\"color\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Z3iZVjziKHmX" - }, - "source": [ - "## Data processing and exploration" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "4sA9WOcmzH2D" - }, - "source": [ - "### Download the Kaggle Credit Card Fraud data set\n", - "\n", - "Pandas is a Python library with many helpful utilities for loading and working with structured data and can be used to download CSVs into a dataframe.\n", - "\n", - "Note: This dataset has been collected and analysed during a research collaboration of Worldline and the [Machine Learning Group](http://mlg.ulb.ac.be) of ULB (Université Libre de Bruxelles) on big data mining and fraud detection. More details on current and past projects on related topics are available [here](https://www.researchgate.net/project/Fraud-detection-5) and the page of the [DefeatFraud](https://mlg.ulb.ac.be/wordpress/portfolio_page/defeatfraud-assessment-and-validation-of-deep-feature-engineering-and-learning-solutions-for-fraud-detection/) project" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "pR_SnbMArXr7" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TimeV1V2V3V4V5V6V7V8V9...V21V22V23V24V25V26V27V28AmountClass
00.0-1.359807-0.0727812.5363471.378155-0.3383210.4623880.2395990.0986980.363787...-0.0183070.277838-0.1104740.0669280.128539-0.1891150.133558-0.021053149.620
10.01.1918570.2661510.1664800.4481540.060018-0.082361-0.0788030.085102-0.255425...-0.225775-0.6386720.101288-0.3398460.1671700.125895-0.0089830.0147242.690
21.0-1.358354-1.3401631.7732090.379780-0.5031981.8004990.7914610.247676-1.514654...0.2479980.7716790.909412-0.689281-0.327642-0.139097-0.055353-0.059752378.660
31.0-0.966272-0.1852261.792993-0.863291-0.0103091.2472030.2376090.377436-1.387024...-0.1083000.005274-0.190321-1.1755750.647376-0.2219290.0627230.061458123.500
42.0-1.1582330.8777371.5487180.403034-0.4071930.0959210.592941-0.2705330.817739...-0.0094310.798278-0.1374580.141267-0.2060100.5022920.2194220.21515369.990
\n", - "

5 rows × 31 columns

\n", - "
" - ], - "text/plain": [ - " Time V1 V2 V3 V4 V5 V6 V7 \\\n", - "0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 \n", - "1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 \n", - "2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 \n", - "3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 \n", - "4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 \n", - "\n", - " V8 V9 ... V21 V22 V23 V24 V25 \\\n", - "0 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 \n", - "1 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 \n", - "2 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 \n", - "3 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 \n", - "4 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 \n", - "\n", - " V26 V27 V28 Amount Class \n", - "0 -0.189115 0.133558 -0.021053 149.62 0 \n", - "1 0.125895 -0.008983 0.014724 2.69 0 \n", - "2 -0.139097 -0.055353 -0.059752 378.66 0 \n", - "3 -0.221929 0.062723 0.061458 123.50 0 \n", - "4 0.502292 0.219422 0.215153 69.99 0 \n", - "\n", - "[5 rows x 31 columns]" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "file = tf.keras.utils\n", - "raw_df = pd.read_csv(\n", - " \"https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv\"\n", - ")\n", - "raw_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, let's view the statistics of the raw dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "-fgdQgmwUFuj" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TimeV1V2V3V4V5V26V27V28AmountClass
count284807.0000002.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+052.848070e+05284807.000000284807.000000
mean94813.8595751.165980e-153.416908e-16-1.373150e-152.086869e-159.604066e-161.687098e-15-3.666453e-16-1.220404e-1688.3496190.001727
std47488.1459551.958696e+001.651309e+001.516255e+001.415869e+001.380247e+004.822270e-014.036325e-013.300833e-01250.1201090.041527
min0.000000-5.640751e+01-7.271573e+01-4.832559e+01-5.683171e+00-1.137433e+02-2.604551e+00-2.256568e+01-1.543008e+010.0000000.000000
25%54201.500000-9.203734e-01-5.985499e-01-8.903648e-01-8.486401e-01-6.915971e-01-3.269839e-01-7.083953e-02-5.295979e-025.6000000.000000
50%84692.0000001.810880e-026.548556e-021.798463e-01-1.984653e-02-5.433583e-02-5.213911e-021.342146e-031.124383e-0222.0000000.000000
75%139320.5000001.315642e+008.037239e-011.027196e+007.433413e-016.119264e-012.409522e-019.104512e-027.827995e-0277.1650000.000000
max172792.0000002.454930e+002.205773e+019.382558e+001.687534e+013.480167e+013.517346e+003.161220e+013.384781e+0125691.1600001.000000
\n", - "
" - ], - "text/plain": [ - " Time V1 V2 V3 V4 \\\n", - "count 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 \n", - "mean 94813.859575 1.165980e-15 3.416908e-16 -1.373150e-15 2.086869e-15 \n", - "std 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00 1.415869e+00 \n", - "min 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01 -5.683171e+00 \n", - "25% 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01 -8.486401e-01 \n", - "50% 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01 -1.984653e-02 \n", - "75% 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00 7.433413e-01 \n", - "max 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00 1.687534e+01 \n", - "\n", - " V5 V26 V27 V28 Amount \\\n", - "count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 284807.000000 \n", - "mean 9.604066e-16 1.687098e-15 -3.666453e-16 -1.220404e-16 88.349619 \n", - "std 1.380247e+00 4.822270e-01 4.036325e-01 3.300833e-01 250.120109 \n", - "min -1.137433e+02 -2.604551e+00 -2.256568e+01 -1.543008e+01 0.000000 \n", - "25% -6.915971e-01 -3.269839e-01 -7.083953e-02 -5.295979e-02 5.600000 \n", - "50% -5.433583e-02 -5.213911e-02 1.342146e-03 1.124383e-02 22.000000 \n", - "75% 6.119264e-01 2.409522e-01 9.104512e-02 7.827995e-02 77.165000 \n", - "max 3.480167e+01 3.517346e+00 3.161220e+01 3.384781e+01 25691.160000 \n", - "\n", - " Class \n", - "count 284807.000000 \n", - "mean 0.001727 \n", - "std 0.041527 \n", - "min 0.000000 \n", - "25% 0.000000 \n", - "50% 0.000000 \n", - "75% 0.000000 \n", - "max 1.000000 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "raw_df[\n", - " [\n", - " \"Time\",\n", - " \"V1\",\n", - " \"V2\",\n", - " \"V3\",\n", - " \"V4\",\n", - " \"V5\",\n", - " \"V26\",\n", - " \"V27\",\n", - " \"V28\",\n", - " \"Amount\",\n", - " \"Class\",\n", - " ]\n", - "].describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "xWKB_CVZFLpB" - }, - "source": [ - "### Examine the class label imbalance\n", - "\n", - "Let's look at the dataset imbalance:" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "HCJFrtuY2iLF" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Examples:\n", - " Total: 284807\n", - " Positive: 492 (0.17% of total)\n", - "\n" - ] - } - ], - "source": [ - "neg, pos = np.bincount(raw_df[\"Class\"])\n", - "total = neg + pos\n", - "print(\n", - " \"Examples:\\n Total: {}\\n Positive: {} ({:.2f}% of total)\\n\".format(\n", - " total, pos, 100 * pos / total\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "KnLKFQDsCBUg" - }, - "source": [ - "This shows the small fraction of positive samples." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "6qox6ryyzwdr" - }, - "source": [ - "### Clean, split and normalize the data\n", - "\n", - "The raw data has a few issues. First the `Time` and `Amount` columns are too variable to use directly. Drop the `Time` column (since it's not clear what it means) and take the log of the `Amount` column to reduce its range." - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Ef42jTuxEjnj" - }, - "outputs": [], - "source": [ - "cleaned_df = raw_df.copy()\n", - "\n", - "# You don't want the `Time` column.\n", - "cleaned_df.pop(\"Time\")\n", - "\n", - "# The `Amount` column covers a huge range. Convert to log-space.\n", - "eps = 0.001 # 0 => 0.1¢\n", - "cleaned_df[\"Log Ammount\"] = np.log(cleaned_df.pop(\"Amount\") + eps)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "uSNgdQFFFQ6u" - }, - "source": [ - "Split the dataset into train, validation, and test sets. The validation set is used during the model fitting to evaluate the loss and any metrics, however the model is not fit with this data. The test set is completely unused during the training phase and is only used at the end to evaluate how well the model generalizes to new data. This is especially important with imbalanced datasets where [overfitting](https://developers.google.com/machine-learning/crash-course/generalization/peril-of-overfitting) is a significant concern from the lack of training data." - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "xfxhKg7Yr1-b" - }, - "outputs": [], - "source": [ - "# TODO 1\n", - "# Use a utility from sklearn to split and shuffle our dataset.\n", - "train_df, test_df = #TODO: Your code goes here.\n", - "train_df, val_df = #TODO: Your code goes here.\n", - "\n", - "# Form np arrays of labels and features.\n", - "train_labels = #TODO: Your code goes here.\n", - "bool_train_labels = #TODO: Your code goes here.\n", - "val_labels = #TODO: Your code goes here.\n", - "test_labels = #TODO: Your code goes here.\n", - "\n", - "train_features = np.array(train_df)\n", - "val_features = np.array(val_df)\n", - "test_features = np.array(test_df)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "8a_Z_kBmr7Oh" - }, - "source": [ - "Normalize the input features using the sklearn StandardScaler.\n", - "This will set the mean to 0 and standard deviation to 1.\n", - "\n", - "Note: The `StandardScaler` is only fit using the `train_features` to be sure the model is not peeking at the validation or test sets. " - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "IO-qEUmJ5JQg" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Training labels shape: (182276,)\n", - "Validation labels shape: (45569,)\n", - "Test labels shape: (56962,)\n", - "Training features shape: (182276, 29)\n", - "Validation features shape: (45569, 29)\n", - "Test features shape: (56962, 29)\n" - ] - } - ], - "source": [ - "scaler = StandardScaler()\n", - "train_features = scaler.fit_transform(train_features)\n", - "\n", - "val_features = scaler.transform(val_features)\n", - "test_features = scaler.transform(test_features)\n", - "\n", - "train_features = np.clip(train_features, -5, 5)\n", - "val_features = np.clip(val_features, -5, 5)\n", - "test_features = np.clip(test_features, -5, 5)\n", - "\n", - "\n", - "print(\"Training labels shape:\", train_labels.shape)\n", - "print(\"Validation labels shape:\", val_labels.shape)\n", - "print(\"Test labels shape:\", test_labels.shape)\n", - "\n", - "print(\"Training features shape:\", train_features.shape)\n", - "print(\"Validation features shape:\", val_features.shape)\n", - "print(\"Test features shape:\", test_features.shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "XF2nNfWKJ33w" - }, - "source": [ - "Caution: If you want to deploy a model, it's critical that you preserve the preprocessing calculations. The easiest way to implement them as layers, and attach them to your model before export.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "uQ7m9nqDC3W6" - }, - "source": [ - "### Look at the data distribution\n", - "\n", - "Next compare the distributions of the positive and negative examples over a few features. Good questions to ask yourself at this point are:\n", - "\n", - "* Do these distributions make sense? \n", - " * Yes. You've normalized the input and these are mostly concentrated in the `+/- 2` range.\n", - "* Can you see the difference between the ditributions?\n", - " * Yes the positive examples contain a much higher rate of extreme values." - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "raK7hyjd_vf6" - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "pos_df = pd.DataFrame(\n", - " train_features[bool_train_labels], columns=train_df.columns\n", - ")\n", - "neg_df = pd.DataFrame(\n", - " train_features[~bool_train_labels], columns=train_df.columns\n", - ")\n", - "\n", - "sns.jointplot(\n", - " pos_df[\"V5\"], pos_df[\"V6\"], kind=\"hex\", xlim=(-5, 5), ylim=(-5, 5)\n", - ")\n", - "plt.suptitle(\"Positive distribution\")\n", - "\n", - "sns.jointplot(\n", - " neg_df[\"V5\"], neg_df[\"V6\"], kind=\"hex\", xlim=(-5, 5), ylim=(-5, 5)\n", - ")\n", - "_ = plt.suptitle(\"Negative distribution\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "qFK1u4JX16D8" - }, - "source": [ - "## Define the model and metrics\n", - "\n", - "Define a function that creates a simple neural network with a densly connected hidden layer, a [dropout](https://developers.google.com/machine-learning/glossary/#dropout_regularization) layer to reduce overfitting, and an output sigmoid layer that returns the probability of a transaction being fraudulent: " - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3JQDzUqT3UYG" - }, - "outputs": [], - "source": [ - "METRICS = [\n", - " keras.metrics.TruePositives(name=\"tp\"),\n", - " keras.metrics.FalsePositives(name=\"fp\"),\n", - " keras.metrics.TrueNegatives(name=\"tn\"),\n", - " keras.metrics.FalseNegatives(name=\"fn\"),\n", - " keras.metrics.BinaryAccuracy(name=\"accuracy\"),\n", - " keras.metrics.Precision(name=\"precision\"),\n", - " keras.metrics.Recall(name=\"recall\"),\n", - " keras.metrics.AUC(name=\"auc\"),\n", - "]\n", - "\n", - "\n", - "def make_model(metrics=METRICS, output_bias=None):\n", - " if output_bias is not None:\n", - " output_bias = tf.keras.initializers.Constant(output_bias)\n", - " # TODO 1\n", - " model = keras.Sequential(\n", - " # TODO: Your code goes here.\n", - " # TODO: Your code goes here.\n", - " # TODO: Your code goes here.\n", - " # TODO: Your code goes here.\n", - " )\n", - "\n", - " model.compile(\n", - " optimizer=keras.optimizers.Adam(lr=1e-3),\n", - " loss=keras.losses.BinaryCrossentropy(),\n", - " metrics=metrics,\n", - " )\n", - "\n", - " return model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "SU0GX6E6mieP" - }, - "source": [ - "### Understanding useful metrics\n", - "\n", - "Notice that there are a few metrics defined above that can be computed by the model that will be helpful when evaluating the performance.\n", - "\n", - "\n", - "\n", - "* **False** negatives and **false** positives are samples that were **incorrectly** classified\n", - "* **True** negatives and **true** positives are samples that were **correctly** classified\n", - "* **Accuracy** is the percentage of examples correctly classified\n", - "> $\\frac{\\text{true samples}}{\\text{total samples}}$\n", - "* **Precision** is the percentage of **predicted** positives that were correctly classified\n", - "> $\\frac{\\text{true positives}}{\\text{true positives + false positives}}$\n", - "* **Recall** is the percentage of **actual** positives that were correctly classified\n", - "> $\\frac{\\text{true positives}}{\\text{true positives + false negatives}}$\n", - "* **AUC** refers to the Area Under the Curve of a Receiver Operating Characteristic curve (ROC-AUC). This metric is equal to the probability that a classifier will rank a random positive sample higher than than a random negative sample.\n", - "\n", - "Note: Accuracy is not a helpful metric for this task. You can 99.8%+ accuracy on this task by predicting False all the time. \n", - "\n", - "Read more:\n", - "* [True vs. False and Positive vs. Negative](https://developers.google.com/machine-learning/crash-course/classification/true-false-positive-negative)\n", - "* [Accuracy](https://developers.google.com/machine-learning/crash-course/classification/accuracy)\n", - "* [Precision and Recall](https://developers.google.com/machine-learning/crash-course/classification/precision-and-recall)\n", - "* [ROC-AUC](https://developers.google.com/machine-learning/crash-course/classification/roc-and-auc)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "FYdhSAoaF_TK" - }, - "source": [ - "## Baseline model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "IDbltVPg2m2q" - }, - "source": [ - "### Build the model\n", - "\n", - "Now create and train your model using the function that was defined earlier. Notice that the model is fit using a larger than default batch size of 2048, this is important to ensure that each batch has a decent chance of containing a few positive samples. If the batch size was too small, they would likely have no fraudulent transactions to learn from.\n", - "\n", - "\n", - "Note: this model will not handle the class imbalance well. You will improve it later in this tutorial." - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "ouUkwPcGQsy3" - }, - "outputs": [], - "source": [ - "EPOCHS = 100\n", - "BATCH_SIZE = 2048\n", - "\n", - "early_stopping = tf.keras.callbacks.EarlyStopping(\n", - " monitor=\"val_auc\",\n", - " verbose=1,\n", - " patience=10,\n", - " mode=\"max\",\n", - " restore_best_weights=True,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "1xlR_dekzw7C" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: \"sequential_8\"\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "dense_16 (Dense) (None, 16) 480 \n", - "_________________________________________________________________\n", - "dropout_8 (Dropout) (None, 16) 0 \n", - "_________________________________________________________________\n", - "dense_17 (Dense) (None, 1) 17 \n", - "=================================================================\n", - "Total params: 497\n", - "Trainable params: 497\n", - "Non-trainable params: 0\n", - "_________________________________________________________________\n" - ] - } - ], - "source": [ - "model = make_model()\n", - "model.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Wx7ND3_SqckO" - }, - "source": [ - "Test run the model:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "LopSd-yQqO3a" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0.89924395],\n", - " [0.7323974 ],\n", - " [0.9322966 ],\n", - " [0.8881701 ],\n", - " [0.88115484],\n", - " [0.6485833 ],\n", - " [0.79132897],\n", - " [0.7073316 ],\n", - " [0.8343261 ],\n", - " [0.8008822 ]], dtype=float32)" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.predict(train_features[:10])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "YKIgWqHms_03" - }, - "source": [ - "### Optional: Set the correct initial bias." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "qk_3Ry6EoYDq" - }, - "source": [ - "These are initial guesses are not great. You know the dataset is imbalanced. Set the output layer's bias to reflect that (See: [A Recipe for Training Neural Networks: \"init well\"](http://karpathy.github.io/2019/04/25/recipe/#2-set-up-the-end-to-end-trainingevaluation-skeleton--get-dumb-baselines)). This can help with initial convergence." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "PdbfWDuVpo6k" - }, - "source": [ - "With the default bias initialization the loss should be about `math.log(2) = 0.69314` " - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "H-oPqh3SoGXk" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loss: 1.7441\n" - ] - } - ], - "source": [ - "results = model.evaluate(\n", - " train_features, train_labels, batch_size=BATCH_SIZE, verbose=0\n", - ")\n", - "print(f\"Loss: {results[0]:0.4f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "hE-JRzfKqfhB" - }, - "source": [ - "The correct bias to set can be derived from:\n", - "\n", - "$$ p_0 = pos/(pos + neg) = 1/(1+e^{-b_0}) $$\n", - "$$ b_0 = -log_e(1/p_0 - 1) $$\n", - "$$ b_0 = log_e(pos/neg)$$" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "F5KWPSjjstUS" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([-6.35935934])" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "initial_bias = np.log([pos / neg])\n", - "initial_bias" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "d1juXI9yY1KD" - }, - "source": [ - "Set that as the initial bias, and the model will give much more reasonable initial guesses. \n", - "\n", - "It should be near: `pos/total = 0.0018`" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "50oyu1uss0i-" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0.00196099],\n", - " [0.00737071],\n", - " [0.00182639],\n", - " [0.00342294],\n", - " [0.00442886],\n", - " [0.00714428],\n", - " [0.0061818 ],\n", - " [0.00631511],\n", - " [0.0088356 ],\n", - " [0.01214694]], dtype=float32)" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model = make_model(output_bias=initial_bias)\n", - "model.predict(train_features[:10])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "4xqFYb2KqRHQ" - }, - "source": [ - "With this initialization the initial loss should be approximately:\n", - "\n", - "$$-p_0log(p_0)-(1-p_0)log(1-p_0) = 0.01317$$" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "xVDqCWXDqHSc" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loss: 0.0275\n" - ] - } - ], - "source": [ - "results = model.evaluate(\n", - " train_features, train_labels, batch_size=BATCH_SIZE, verbose=0\n", - ")\n", - "print(f\"Loss: {results[0]:0.4f}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "FrDC8hvNr9yw" - }, - "source": [ - "This initial loss is about 50 times less than if would have been with naive initilization.\n", - "\n", - "This way the model doesn't need to spend the first few epochs just learning that positive examples are unlikely. This also makes it easier to read plots of the loss during training." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "0EJj9ixKVBMT" - }, - "source": [ - "### Checkpoint the initial weights\n", - "\n", - "To make the various training runs more comparable, keep this initial model's weights in a checkpoint file, and load them into each model before training." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "_tSUm4yAVIif" - }, - "outputs": [], - "source": [ - "initial_weights = os.path.join(tempfile.mkdtemp(), \"initial_weights\")\n", - "model.save_weights(initial_weights)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "EVXiLyqyZ8AX" - }, - "source": [ - "### Confirm that the bias fix helps\n", - "\n", - "Before moving on, confirm quick that the careful bias initialization actually helped.\n", - "\n", - "Train the model for 20 epochs, with and without this careful initialization, and compare the losses: " - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Dm4-4K5RZ63Q" - }, - "outputs": [], - "source": [ - "model = make_model()\n", - "model.load_weights(initial_weights)\n", - "model.layers[-1].bias.assign([0.0])\n", - "zero_bias_history = model.fit(\n", - " train_features,\n", - " train_labels,\n", - " batch_size=BATCH_SIZE,\n", - " epochs=20,\n", - " validation_data=(val_features, val_labels),\n", - " verbose=0,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "j8DsLXHQaSql" - }, - "outputs": [], - "source": [ - "model = make_model()\n", - "model.load_weights(initial_weights)\n", - "careful_bias_history = model.fit(\n", - " train_features,\n", - " train_labels,\n", - " batch_size=BATCH_SIZE,\n", - " epochs=20,\n", - " validation_data=(val_features, val_labels),\n", - " verbose=0,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "E3XsMBjhauFV" - }, - "outputs": [], - "source": [ - "def plot_loss(history, label, n):\n", - " # Use a log scale to show the wide range of values.\n", - " plt.semilogy(\n", - " history.epoch,\n", - " history.history[\"loss\"],\n", - " color=colors[n],\n", - " label=\"Train \" + label,\n", - " )\n", - " plt.semilogy(\n", - " history.epoch,\n", - " history.history[\"val_loss\"],\n", - " color=colors[n],\n", - " label=\"Val \" + label,\n", - " linestyle=\"--\",\n", - " )\n", - " plt.xlabel(\"Epoch\")\n", - " plt.ylabel(\"Loss\")\n", - "\n", - " plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "dxFaskm7beC7" - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtwAAAJQCAYAAAC927PKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3XdUVVeix/HvoUgVEESkKHako2CLMWpirzEaM8bE0WTSM5MyyZj6UsaZmDeTiVOScTJRExOjqaZoNGqavaHYQAW7gAgWEOn3nvcHhqfRWIFzgd9nrawlh3vP/gFrZv3Y7LO3YZomIiIiIiJSM5ysDiAiIiIiUp+pcIuIiIiI1CAVbhERERGRGqTCLSIiIiJSg1S4RURERERqkAq3iIiIiEgNUuEWEREREalBKtwiIiIiIjVIhVtEREREpAa5WB2gJjRt2tRs1aqV1TFEREREpB5LTk7OM00z8FKvq5eFu1WrVmzcuNHqGCIiIiJSjxmGceByXqclJSIiIiIiNUiFW0RERESkBqlwi4iIiIjUoHq5hltERESkJpWXl3P48GFKSkqsjiK1wN3dnbCwMFxdXa/q/SrcIiIiIlfo8OHDNG7cmFatWmEYhtVxpAaZpsmxY8c4fPgwrVu3vqp7aEmJiIiIyBUqKSkhICBAZbsBMAyDgICAa/prhgq3iIiIyFVQ2W44rvVnrcItIiIiIlKDVLhFRERE6phjx46RkJBAQkICzZs3JzQ0tOrjsrKyy7rHpEmT2LVr12WPOWHChKoxEhISCA8PJzQ09Gq/hF+UkZGBh4cHCQkJxMfH07NnT9LT0wFYt24djz32WLWPWdP00KSIiIhIHRMQEEBKSgoAL774It7e3jzxxBPnvMY0TUzTxMnpwvOrs2bNuqIxZ8+eXfVvm81Gr169uPfeey/7/RUVFbi4XF71jIiIqPr63njjDaZOncqMGTPo1q0b3bp1u6LcjkAz3CIiIiL1REZGBlFRUYwfP57o6Giys7O59957SUpKIjo6mpdffrnqtddffz0pKSlUVFTg5+fHU089RXx8PD169ODo0aMXHeePf/wjYWFhTJw4EYCcnBxuueUWkpKS6Nq1K2vXrgXgueeeY8KECfTs2ZOJEydSXFzMr3/9a2JjY+ncuTPLly+/5NdUUFBAkyZNAFi2bBk333wzAGvXrqVHjx506tTpnFnwbdu20aVLFxISEoiLi2Pv3r1X/H2sbprhFhEREbkGL321g9Ssgmq9Z1SIDy8Mj76q9+7cuZPZs2eTlJQEwNSpU/H396eiooK+ffsyZswYoqKiznlPfn4+vXv3ZurUqTz++OPMnDmTp5566oL3X7NmDbNnzyY5Obnq2u9+9zv+8Ic/0L17d/bv38+wYcPYvn17VZ7ly5fj7u7Oq6++ipubG9u2bWPHjh0MGTKE9PR0GjVqdM4Yu3btIiEhgYKCAkpLS1m3bt15OSIjI1mxYgUuLi4sXryY5557jg8//JA333yTJ554gttuu43S0lJM07yq72N1UuEWERERqUfatm1bVbYB5s6dy4wZM6ioqCArK4vU1NTzCreHhweDBw8GIDExkRUrVlzw3gUFBUyYMIFZs2ZVzTpD5czz2evBT5w4QXFxMQAjR47E3d0dgJUrV/Lkk08CEB0dTUhISNWs/NnOXlIyZ84c7r//fhYsWHDOa06ePMmECRPYs2fPOdevu+46pkyZwoEDB7jlllto167dJb5jNU+FW0REROQaXO1MdE3x8vKq+nd6ejp///vfWb9+PX5+ftxxxx0X3E/67BlmZ2dnKioqLnjvhx56iDFjxtC7d+9zrpumyfr168+bqf55nqsxYsQIHnjggfOuP/vsswwcOJAHH3yQjIwMBg0aBMCdd95Jjx49WLhwIYMGDWLmzJnccMMN15ThWmkNt4iIiEg9VVBQQOPGjfHx8SE7O5tvvvnmqu81b948du7cec468J/069ePN954o+rjn2anf65Xr17MmTMHgLS0NLKzsy85A71y5Uratm173vX8/PyqXVLeeeedqut79+6lXbt2PPLIIwwbNoytW7de8muraZrhFhEREamnOnfuTFRUFB07diQ8PJyePXte9b2effZZysvL6dKlyznX169fzxtvvMEDDzzArFmzqtaKn13Af/Lb3/6W++67j9jYWFxdXZk9e/YFZ8V/WsNtmiZubm689dZb571m8uTJ3HXXXbz00ktVy2EAPvjgA+bOnYurqyshISG8+OKLV/01VxfDERaSV7ekpCRz48aNVscQERGReiotLY3IyEirY0gtutDP3DCMZNM0k37hLVW0pEREREREpAapcIuIiIiI1CAVbhERERGRGqTCLSIiIiJSg1S4q1G5zW51BBERERFxMCrc1eTVxTuZMGO9QxwfKiIiIiKOQ4W7mrRo4smavcdYsDXb6igiIiJSz/Xt2/e8Q2ymTZt2wRMZz+bt7X3etfnz55OQkHDOf05OTixatKhaMwP06dOHiIgIEhISiIyMPGd/7SFDhnDy5MlqH9MRqHBXk9u6tCA6xIc/f51GUdmFj0MVERERqQ7jxo1j3rx551ybN28e48aNu+J7jRo1ipSUlKr/HnzwQXr16sXAgQMv6/2maWK3X/6y2jlz5pCSksKqVauYPHkyZWVlAHz99df4+fldcf66QIW7mjg7Gbw0Iprs/BL+/cMeq+OIiIhIPTZmzBgWLlxYVVb3799PVlYWvXr1orCwkJtuuonOnTsTGxvLF198cdn33b17Ny+//DLvvfceTk6VNfEvf/kLXbp0IS4ujhdeeKFqvIiICCZMmEBMTAyHDh1i7ty5xMbGEhMTw+TJky85VmFhIV5eXjg7OwPQqlUr8vLyALj55ptJTEwkOjq6ahbcZrMxceJEYmJiiI2N5fXXX7/8b5jFdLR7NUpq5c/NCSH8Z/lebk1sQcsAT6sjiYiISC247T9rzrs2LC6YO3u0orjMxsRZ68/7/JjEMG5NasHx02U88H7yOZ/78L4eFx3P39+frl27smjRIkaOHMm8efMYO3YshmHg7u7O/Pnz8fHxIS8vj+7duzNixAgMw7joPcvLy7n99tt57bXXaNmyJQBLliwhPT2d9esrn1MbMWIEy5cvp2XLlqSnp/Puu+/SvXt3srKymDx5MsnJyTRp0oQBAwbw+eefc/PNN583zvjx43FzcyM9PZ1p06ZVFe6zzZw5E39/f4qLi+nSpQujR49m//79ZGZmsn37doA6tfxEM9zV7KnBkbg4GUxZmGp1FBEREanHzl5WcvZyEtM0eeaZZ4iLi6Nfv35kZmaSk5Nzyfs9//zzREdHc9ttt1VdW7JkCUuWLKFTp0507tyZnTt3kp6eDkB4eDjdu3cHYMOGDfTp04fAwEBcXFwYP348y5cvv+A4c+bMYevWrRw8eJC//vWvHDhw4LzX/OMf/yA+Pp7u3btz6NAh0tPTadOmDXv37uW3v/0tixcvxsfH58q+YRbSDHc1a+7rzsM3tuN/F+9i+e5cbugQaHUkERERqWEXm5H2aOR80c/7ezW65Iz2hYwcOZLHHnuMTZs2UVRURGJiIlBZaHNzc0lOTsbV1ZVWrVpRUlJy0Xv98MMPfPrpp2zatOmc66Zp8vTTT3Pfffedc33//v14eXldceazBQYG0rlzZ9atW0d4ePg5WZYtW8aaNWvw9PSkT58+lJSU0KRJE7Zs2cI333zD9OnT+eijj5g5c+Y1ZagtmuGuAXdf35rwAE9e+mqH9uYWERGRGuHt7U3fvn256667znlYMj8/n2bNmuHq6sr3339/wRnks504cYJJkyYxe/ZsGjdufM7nBg4cyMyZMyksLAQgMzOTo0ePnnePrl278uOPP5KXl4fNZmPu3Ln07t37ouMWFRWxefNm2rZte871/Px8mjRpgqenJzt37mTt2rUA5OXlYbfbGT16NFOmTDnvlwNHphnuGuDm4sz/DIvi7nc38u7q/fymVxurI4mIiEg9NG7cOEaNGnXOjiXjx49n+PDhxMbGkpSURMeOHS96j+nTp3P06NHzthR8+umnue2220hLS6NHj8oZeG9vb95///3z1l0HBwczdepU+vbti2maDB06lJEjR15wvPHjx+Ph4UFpaSkTJ06smpn/yaBBg5g+fTqRkZFERERULVvJzMxk0qRJVTuivPLKK5fxHXIMhqMf1GIYhhfwJlAG/GCa5pxLvScpKcncuHFjjWe7GNM0mfTOBpL3n+C7J/oQ2NjN0jwiIiJSfdLS0oiMjLQ6htSiC/3MDcNINk0z6VLvtWRJiWEYMw3DOGoYxvafXR9kGMYuwzAyDMN46szlW4BPTNO8BxhR62GvkmEYPD8sipIKG3/5ZqfVcURERETEIlat4X4HGHT2BcMwnIE3gMFAFDDOMIwoIAw4dOZltlrMeM3aBnpzV8/WfLTxMCmH6s7WNSIiIiJSfSwp3KZpLgeO/+xyVyDDNM29pmmWAfOAkcBhKks3XCSvYRj3Goax0TCMjbm5uTUR+6o8fGM7Ahu78eKXO7DbHXv5joiIiIhUP0fapSSU/5/JhsqiHQp8Bow2DOPfwFe/9GbTNN8yTTPJNM2kwEDH2YqvsbsrTw3qSMqhk3y2OdPqOCIiIiJSyxypcF+QaZqnTdOcZJrmA5fzwKQjGtUplE4t/Zi6aCenSsqtjiMiIiIitciRCncm0OKsj8POXKvznJwMXhwezbHTpfzzuwyr44iIiIhILXKkwr0BaG8YRmvDMBoBvwK+tDhTtYlv4cfYxBbMXLmPjKOFVscRERGROuzYsWMkJCSQkJBA8+bNCQ0Nrfq4rKzssu4xadIkdu3adUXjLly4kMTERKKjo0lISGDy5MlXE/+CHn/8caKjo3nqqad+8TUZGRkkJCRc8LqHhwcJCQnEx8fTs2fPqiPo161bx2OPPVZtOa+GJQffGIYxF+gDNDUM4zDwgmmaMwzDeBj4BnAGZpqmucOKfDXlyUERfL0tm5cXpPLupC4YhmF1JBEREamDAgICSElJAeDFF1/E29ubJ5544pzXmKaJaZo4OV14fnXWrFlXNOaWLVt49NFHWbhwIR06dMBms/HWW29d9vsrKipwcblw9TRNk5kzZ3L8+PFfzHspERERVd+TN954g6lTpzJjxgy6detGt27druqe1cWqXUrGmaYZbJqmq2maYaZpzjhz/WvTNDuYptnWNM0/WZGtJjX1duPR/h1YvjuXb9POPxZVRERE5FpkZGQQFRXF+PHjiY6OJjs7m3vvvZekpCSio6N5+eWXq157/fXXk5KSQkVFBX5+fjz11FPEx8fTo0ePCx7f/uqrr/L888/ToUMHAJydnatOp/ziiy/o1q0bnTp1YsCAAVXvf+6555gwYQI9e/Zk4sSJVFRU8Pjjj9O1a1fi4uJ4++23ARg6dCinTp2ic+fOfPLJJ9xxxx18/vnnVWN7e3tf0fehoKCAJk2aALBs2TJuvvlmANauXUuPHj3o1KnTObPg27Zto0uXLiQkJBAXF8fevXuvaLxL0dHutWxCj3Dmrj/IywtSub59U9xdnS/9JhEREXFci56CI9uq957NY2Hw1Kt6686dO5k9ezZJSZUHIE6dOhV/f38qKiro27cvY8aMISoq6pz35Ofn07t3b6ZOncrjjz/OzJkzz1vasX37dp599tkLjnnDDTcwYsQIDMNg+vTpvPbaa7z66qtVeZYvX467uztvvvkmzZo1Y/369ZSWltK9e3cGDBjAl19+SdOmTatmqM8u25dr165dJCQkUFBQQGlpKevWrTvvNZGRkaxYsQIXFxcWL17Mc889x4cffsibb77JE088wW233UZpaSnVfRK7Cnctc3V24oXhUdw5Yz0zVu7job7trI4kIiIi9Ujbtm2ryjbA3LlzmTFjBhUVFWRlZZGamnpe4fbw8GDw4MEAJCYmsmLFiisa8+DBg4wdO5YjR45QWlpaNQsOMHLkSNzd3QFYsmQJaWlpzJs3D6gs+unp6YSEhFzV13q2s5eUzJkzh/vvv58FCxac85qTJ08yYcIE9uzZc8716667jilTpnDgwAFuueUW2rWr3n5Wrwq3YRjDgeHV/U2qbr3aBzIwOoh/fZfBLZ1DCfb1sDqSiIiIXK2rnImuKV5eXlX/Tk9P5+9//zvr16/Hz8+PO+64g5KSkvPe06hRo6p/Ozs7U1FRcd5roqOjSU5OJjo6+rzPPfTQQzzzzDMMGTKEZcuWMXXq/39Pzs5jmiZvvvkmN9100znv//l4Li4u2O12AGw22wXzXMyIESOqlruc7dlnn2XgwIE8+OCDZGRkMGhQ5cHnd955Jz169GDhwoUMGjSImTNncsMNN1zRmBfjSLuUXDPTNL8yTfNeX19fq6Nc0nNDo7CZJlMX7bQ6ioiIiNRTBQUFNG7cGB8fH7Kzs/nmm2+u+l5/+MMf+OMf/0hGRuUWxzabjenTpwOVM9WhoaGYpsm77777i/cYOHAgb775ZlWB3rVrF8XFxee9rlWrViQnJwMwf/58bDbbFWVduXIlbdu2Pe/6TzkB3nnnnarre/fupV27djzyyCMMGzaMrVu3XtF4l1KvCndd0sLfk/tvaMMXKVls2P/zU+5FRERErl3nzp2JioqiY8eOVQ8vXq1OnTrx2muvMXbsWKKiooiNjeXAgQNA5U4po0aNokuXLgQFBf3iPe677z7at29PQkICMTExPPDAAxecvb7vvvtYunQp8fHxbN68GTc3t0vm+2kNd3x8PC+88MIFd1CZPHkyTz75JJ07dz5nnfYHH3xQtdXh7t27ueOOOy7nW3LZjOpeFO4IkpKSzI0bN1od45KKy2zc9NoP+Hk24qvfXo+zk7YJFBERqQvS0tKIjIy0OobUogv9zA3DSDZNM+kX3lJFM9wW8mjkzDNDI0nNLmDehoNWxxERERGRGqDCbbGhscF0a+3PX7/ZRX5RudVxRERERKSaqXBbzDAMXhwRTX5xOa8v2211HBERERGpZircDiAy2Ic7uofz3toD7DxSYHUcEREREalGKtwO4vH+HWjs7sJLX6ZW++lGIiIiImIdFW4H4efZiCcGRLBm7zEWbT9idRwRERERqSb1qnAbhjHcMIy38vPzrY5yVcZ1bUlksA9/WphGcdmVbfAuIiIiDUffvn3PO8Rm2rRpFzxd8Wze3t4XvH7kyBF+9atf0bZtWxITExkyZAi7d1fPs2UrVqyo2uP6Qofc/KRPnz5caFvnPn36EBERQUJCApGRkefsrz1kyBBOnjxZLTlrUr0q3HXppMkLcXYyeGlENJkni5n+4x6r44iIiIiDGjduHPPmzTvn2rx58xg3btwV38s0TUaNGkWfPn3Ys2cPycnJvPLKK+Tk5Fz2+386hv1C5syZw9NPP01KSgoeHh5XnO+ne6SkpLBq1SomT55MWVkZAF9//TV+fn5Xdc/aVK8Kd33QtbU/I+JDmP7jHg4dL7I6joiIiDigMWPGsHDhwqriuX//frKysujVqxeFhYXcdNNNdO7cmdjYWL744ouL3uv777/H1dWV+++/v+pafHz8Re+1f/9+IiIimDBhAjExMRw6dIglS5bQo0cPOnfuzK233kphYSFvv/02H330Ec8//zzjx4/nhx9+YNiwYVXjPPzww+ccsX4phYWFeHl54ezsDFQeAZ+XlwfAzTffTGJiItHR0VWz4DabjYkTJxITE0NsbCyvv/76ZY9VnVwsGVUu6ukhHVmamsOfv07j33ckWh1HRERELmXW0POvRd8MXe+BsiKYc+v5n0+4HTqNh9PH4KMJ535u0sKLDufv70/Xrl1ZtGgRI0eOZN68eYwdOxbDMHB3d2f+/Pn4+PiQl5dH9+7dGTFiBIZx4ROtt2/fTmLihfvGL90LID09nXfffZfu3buTl5fHlClTWLZsGV5eXrz66qv87W9/43/+539YuXIlw4YNY8yYMfzwww8X/bp+yfjx43FzcyM9PZ1p06ZVFe6zzZw5E39/f4qLi+nSpQujR49m//79ZGZmsn37dgDLlp9ohtsBBft68PCN7Vi0/QirMvKsjiMiIiIO6OxlJWcvJzFNk2eeeYa4uDj69etHZmbmZS8P+bmL3Ss8PJzu3bsDsHbtWlJTU+nZsycJCQm8++67HDhwoBq+ykpz5sxh69atHDx4kL/+9a8XvPc//vEP4uPj6d69O4cOHSI9PZ02bdqwd+9efvvb37J48WJ8fHyqLdOV0Ay3g7r7+tZ8uOEQL321g4W/64Wrs343EhERcVgXm5Fu5Hnxz3sFXHJG+0JGjhzJY489xqZNmygqKqqapZ4zZw65ubkkJyfj6upKq1atKCkp+cX7REdH88knn1zwcxe7l5eXV9XrTNOkf//+zJ0796KZXVxczlnvfbFcFxIYGEjnzp1Zt24d4eHhVdd/+OEHli1bxpo1a/D09KRPnz6UlJTQpEkTtmzZwjfffMP06dP56KOPmDlz5hWNWR3U4hyUu6szzw2NZHdOIe+vrb7fEEVERKR+8Pb2pm/fvtx1113nPCyZn59Ps2bNcHV15fvvv7/kTPONN95IaWnpObt/bN26lRUrVlz2vbp3786qVavIyMgA4PTp0xfc5SQ8PJzU1FRKS0s5efIk33777RV9zUVFRWzevJm2bduecz0/P58mTZrg6enJzp07Wbt2LQB5eXnY7XZGjx7NlClT2LRp0xWNV100w+3A+kcF0at9U/62dDcj4kMI8HazOpKIiIg4kHHjxjFq1KhzdiwZP348w4cPJzY2lqSkJDp27HjRexiGwfz583n00Ud59dVXcXd3p1WrVkybNu2y7xUYGMg777zDuHHjKC0tBWDKlCl06NDhnNe1aNGCsWPHEhMTQ+vWrenUqdNlfZ3jx4/Hw8OD0tJSJk6ceN6a80GDBjF9+nQiIyOJiIioWuqSmZnJpEmTqmbVX3nllcsar7oZ9fFUw6SkJPNC+zjWRRlHTzFo2gpuTQrjlVvirI4jIiIiQFpaGpGRkVbHkFp0oZ+5YRjJpmkmXeq9WlLi4No1a8zE61oxb8Mhth2umwf6iIiIiDRkKtx1wO/6tSfAqxEvfLmd+vgXCREREZH6rF4V7rp+tPsv8XF35Q+DOrLp4Ek+T8m0Oo6IiIiAJsEakGv9Wderwl3Xj3a/mDGdw4gP8+WVr3dSWFphdRwREZEGzd3dnWPHjql0NwCmaXLs2DHc3d2v+h7apaSOcHIyeHFENKPeXM2/vsvgqcEXf+JYREREak5YWBiHDx8mNzfX6ihSC9zd3QkLC7vq96twV5Nym515Gw5xW1ILGrnUzB8OOrVswpjEMGas3MttXVrQuqnXpd8kIiIi1c7V1ZXWrVtbHUPqiHq1pMRK6/Ye5/nPtzP9xz01Os4fBkXg5uLMHxek1ug4IiIiIlI9VLiryfXtmzIsLph/fZdBxtHCGhunWWN3HrmpPd/tPMp3O3NqbBwRERERqR4q3NXoheHReDRy5unPtmK319xDFL++rhVtAr3444I0SitsNTaOiIiIiFw7Fe5qFNjYjeeGRrJh/wk+WH+wxsZp5OLE/wyLYl/eaWat2l9j44iIiIjItVPhrmZjEsOY0COcmNCa3ZqwT0Qz+kUG8c9v08kpKKnRsURERETk6qlwVzPDMHh5ZAwJLfxqfKznh0VSbjN5ddHOGh9LRERERK6OCncNKS6z8ez8bSzenl1jY4QHeHHPDa35bHMmyQeO19g4IiIiInL1VLhriKuzQcqhkzz/xQ7yi8trbJwH+7SjuY87L36Ziq0GH9QUERERkaujwl1DXJydeHV0HMdPlzF1UVqNjePl5sLTQzqyLTOfjzceqrFxREREROTq1KvCbRjGcMMw3srPz7c6CgAxob7cfX1r5q4/xJo9x2psnBHxIXRt5c+fv04jLbugxsYRERERkStXrwq3aZpfmaZ5r69vze4QciUe69eBlv6e/HFBKqZZM0s+DMPgtbHxeDZy4c4Z69mbW3MH74iIiIjIlalXhdsReTRy5p/jOvGfOxMxDKPGxmnh78n7v+mGaZrc8fY6Mk8W19hYIiIiInL5VLhrQXwLP1r4e2KaJvlFNfcAZbtm3sy+uyunSisY/9+1HD2l/blFRERErKbCXYt+//EW7py5rkZ3E4kO8eWdSV05eqqUCTPWc7KorMbGEhEREZFLU+GuRX0imrH1cD6zVu2r0XESw5vw3wlJ7M09za9nrqewtKJGxxMRERGRX6bCXYuGxwVzY8dmvLZkN4eOF9XoWD3bNeWN8Z3ZnlXA3e9soKTcVqPjiYiIiMiFqXDXIsMw+OPNMTgZ8Ozn22ts15Kf9I8K4m9j41m//zgPvJ9MWYW9RscTERERkfOpcNeyUD8P/jCoIzuzCzhSUPMPNY5MCOXPo2L5flcuj32YQoVNpVtERESkNrlYHaAhuqN7OKM6h+Lj7lor443r2pLTpRVMWZiGRyNn/nd0HE5ONbdFoYiIiIj8P81wW8DZycDH3ZVym50lO47Uypi/6dWGR25qzyfJh3m5Bg/hEREREZFzqXBbaM7aA9z7XjI/7s6tlfEe7deeu69vzTur9/O3pbtrZUwRERGRhk6F20LjurWkbaAXz3y2jdO1sHWfYRg8NzSSX3VpwT+/y2D6j3tqfEwRERGRhk6F20JuLs5MHR1H5sniWptxNgyDP42KZXh8CFMX7eS9NftrZVwRERGRhkqF22JdWvkzvltLZq3aR8qhk7UyprOTwd/GxtMvshnPf7GDzzYdrpVxRURERBoiFW4HMHlwR3q1D8SlFncOcXV24l+3d+a6tgE8+clWFm+vnYc3RURERBoaoz7tVmEYxnBgeLt27e5JT0+3Ok6dcLq0gjtmrGNHZgFv/zqJGzoEWh1JREREpE4wDCPZNM2kS72uXs1wm6b5lWma9/r6+lod5aoUlJTz3Ofb2JtbWGtjerm58M7ErrRt5s29721kw/7jtTa2iIiISENQrwp3XVdSbuPLlCye/mwbdnvt/eXB19OV9+7uSoifB3fN2sC2w/m1NraIiIhIfafC7UCaNXbn2aGRrNt3nA83HqrVsZt6u/H+3d3w8XBlwsx1pOecqtXxRUREROorFW4HMzYUs2HJAAAgAElEQVSpBd3b+PPnr9M4WlBSq2OH+HnwwT3dcHV2Yvzb6zhw7HStji8iIiJSH6lwOxjDMHjlljhKK+z8+eu0Wh8/PMCL93/TjTKbnfFvryM7v7jWM4iIiIjUJyrcDqh1Uy+m3ZbAEwMjLBm/Q1BjZt/VlZNF5dzx9jryCkstySEiIiJSH6hwO6ghscGENfHENE3KKuy1Pn5cmB8zJ3Yh82QxE2asJ7+4vNYziIiIiNQHKtwOzGY3ueudDbz01Q5Lxu/a2p//3JlE+tFTTJq1ntOlFZbkEBEREanLVLgdmLOTQdtAb+asO8j6fdbsj927QyD/HNeJlEMnufe9jZSU2yzJISIiIlJXqXA7uMcHdCCsiQdPf7bVsrI7KCaYv4yJZ1XGMR7+YDPlttpf4iIiIiJSV6lwOzjPRi78aVQse3JP8+b3GZblGJ0Yxssjo1mWlsMTH2/BVosH84iIiIjUZSrcdUDvDoHc0imUTzdlWrqkY0KPVvxhUARfpGTx3OfbME2VbhEREZFLcbE6gFyeF4ZHYziBu6uzpTke7NOO06UVvPH9HrwaufDs0EgMw7A0k4iIiIgjU+GuI3w9XQEoq7CTll1AfAs/y7I8MSCCwpIK3l65j8burjzSr71lWUREREQcnZaU1DF/WpjKuP+u5fCJIssyGIbBC8OjGd05jNeX7ebtFXstyyIiIiLi6FS465h7bmgDwPOfb7d0DbWTk8Gro2MZHNOcKQvTmLf+oGVZRERERByZCncdE9bEkycGRPD9rly+3JJlaRYXZyf+/qtO9O4QyNPzt/FFSqaleUREREQcUb0q3IZhDDcM4638/Hyro9SoX1/XivgWfrz8VSonTpdZmqWRixPT70ikSyt/HpmXwktf7dDhOCIiIiJnqVeF2zTNr0zTvNfX19fqKDXK+cxyjtAmHpwosrZwA3g0cubdSV35dY9wZq3az9B/rGDr4ZNWxxIRERFxCEZ93Es5KSnJ3Lhxo9Uxapxpmg63Jd+K9Fye/HgreYWl/PbG9jzYty2uzvXq9zoRERERAAzDSDZNM+lSr1MTqsMMwyC/uJxXF++kqKzC6jgA9GofyDeP3sDQuGBeX7abMf9ezZ7cQqtjiYiIiFhGhbuO23XkFP/+YQ/TlqVbHaWKr6crf/9VJ/51eycOHC9i6D9W8O7q/dh1HLyIiIg0QCrcdVzX1v6M69qSt1fsZdthx3pYdFhcCN88egPd2wTwwpc7mDBzPdn5xVbHEhEREalVKtz1wFODO9LU240nPt5CQUm51XHOEeTjzqyJXfjTqBiSD5xgwOvL+XxzpqV7iIuIiIjUJhXuesDXw5W/3hrPntxCnp2/3eo45zEMg/Hdwln0SC/aN/Pm0Q9TePiDzZZvaSgiIiJSG7RLST3ybVoOHYN9CPXzsDrKL6qw2fnP8r1MW7YbP89G/O+YOPpGNLM6loiIiMgV0y4lDdBNkUGE+nlgs5vMWXeACpvd6kjncXF24qG+7fj8oZ408XRl0qwNPDN/G6dLHWOXFREREZHqpsJdD61Iz+XZ+dt58pOtDrszSHSIL18+fD333tCGuesPMuQfK0g+cNzqWCIiIiLVToW7HuoT0YwnBnRg/uZMnv9iu8M+oOju6swzQyKZe093Kmwmt05fw/8u3klZhePNzIuIiIhcLRXueuqhvu24v3db5qw7yNRFOx22dAN0bxPA4kd7MSYxjDd/2MPIN1ax68gpq2OJiIiIVAsV7nrKMAwmD4rgzu7hvLtmPweOFVkd6aIau7vyv2PieevORI4WlDD8nyt5a/kebA66JEZERETkcmmXknrObjfZk1tI+6DGVke5bHmFpTzz2TaWpObQtbU/r90aTwt/T6tjiYiIiJxDu5QIAE5ORlXZnrf+IB9uOGhxoktr6u3Gf+5M5C9j4kjNKmDQtOV8tOGQQy+LEREREfklKtwNhN1usnjHEZ76bBtfbcmyOs4lGYbBrUktWPxoL2LDfPnDp1u5Z3YyuadKrY4mIiIickVUuBsIJyeDf49PpEu4P499mMK3aTlWR7osYU08+eA33XluaCTL03MZNG053+w4YnUsERERkcumwt2AeDRyZsbEJKJCfHhgziZWZ+RZHemyODkZ/KZXGxb89nqCfNy5771knvh4C6dKyq2OJiIiInJJKtwNTGN3V96d1JXWAV7syCqwOs4V6RDUmM8f6snDfdvx2abDDJq2gjV7jlkdS0REROSitEtJA1VSbsPd1RkAm93E2cmwONGVST5wgt9/lMKB40Xc3bM1TwyMqPp6RERERGqDdimRi/qpnG4+eIL+r//IntxCixNdmcTwJnz9SC/Gd2vJ2yv3MfyfK9memW91LBEREZHzqHA3cD4erhQUl3PH2+s4dNyxD8f5Oc9GLky5OZZ3JnUhv7ic4f9ayQPvJ7PtsIq3iIiIOA4V7gaubaA3s+/qxunSCu6YsY6jBSVWR7pifSKaseSxG3ioTztWZuQx/F8ruXPGOtbuPaa9u0VERMRy9WoNt2EYw4Hh7dq1uyc9Pd3qOHXKpoMnuOPtdYQ18eCj+3rg59nI6khXpaCknDlrDzJj5V7yCstIDG/Cg33acmPHZhhG3VqnLiIiIo7tctdw16vC/RM9NHl1Vu/J46MNh3h1TBxuLnX7AcSSchsfbTzEf37cS+bJYjo2b8wDfdoyNDYYF2f9YUdERESunQq3Cvc1OXG6DHdXZzwa1e3iXW6z82VKFv/+cQ8ZRwsJD/DkvhvaMjoxtM7/UiEiIiLWUuFW4b5q5TY7w/+5kmY+7vx3QmK9KKZ2u8mS1Bz+/UMGWw7nE+Tjxm+ub8Pt3Vri5eZidTwRERGpg7QtoFw1V2cn7urZmuW7c3lkbgoVNrvVka6Zk5PBoJjmfP5QT96/uxttA73509dp9Hz1O15fupsTp8usjigiIiL1lGa45RfNWrWPl75K5ZZOofz11nic6tjhOJey+eAJ3vxhD0tTc/Bs5Mz4bi35Ta82BPm4Wx1NRERE6oDLneHW39LlF03q2ZrTpRX8dcluwgO8eKRfe6sjVatOLZvw3wlJ7Dpyin//kMHMVft5d/UBRieGcX/vNoQHeFkdUUREROoBzXDLRZmmyaxV+xkWF0yzej7ze/BYEf9ZvoePkw9TYbMzLC6EB/q0JTLYx+poIiIi4oD00KQKd7WrsNn5YVcu/aKCrI5So44WlDBj5T7eX3uA02U2burYjAf7tiMxvInV0URERMSB6KFJqXZz1h3kN7M3MmvVPquj1KhmPu48PSSS1U/dxOP9O7Dp4AlG/3s1t/1nDT/uztXplSIiInJFNMMtl63CZufBOZtYkprD/46JY2xSC6sj1Yqisgrmrj/Ef5fv5UhBCTGhPjzUpx0Do5vXuwdJRURE5PJpSYkKd40orbDxm3c3siojj3+O68zQuGCrI9Wa0gobn2/O5N8/7GH/sSLaBHrxQO+23NwpFFedXikiItLgqHCrcNeYorIKfj1zPbtzClkxuS8+7q5WR6pVNrvJou3ZvPH9HtKyCwjxdefeG9pwW5eWdf5kThEREbl8Ktwq3DWqoKScg8eKiAn1tTqKZUzT5Ifdubz5fQYb9p+giacrYxLD+FXXlrQN9LY6noiIiNQwFW4V7lrz3pr9xIT60qllw93FY/2+47yzeh9LduRQYTfp2tqf27u2ZFBMc9xdNestIiJSH+ngG6kVRWUV/HfFPvKLy5l3b/cGu2d119b+dG3tT+6pUj5JPsy8DQd59MMU/L5y5ZZOYYzr2oL2QY2tjikiIiIW0Ay3XLNDx4u4dfoaymx23hzfme5tAqyOZDm73WTN3mN8sP4gS3YcodxmkhTehHFdWzI0Lliz3iIiIvWAlpSocNeqfXmnufvdDRw8VsRLI6MZ3y3c6kgO41hhKZ9uOszc9YfYl3caH3cXRnUKZVy3lnRs3jD/IiAiIlIfqHCrcNe6gpJyHp2XQr/IIG7v1tLqOA7HNE3W7j3OvA0HWbTtCGU2O51a+jGua0uGxQXj2UgrvEREROoSFW4VbkvY7WbVYTCrMvLoENSYwMZuFqdyPMdPl/HZpsPM23CIjKOFNHZzYWSnEMZ1bUl0SMPd+UVERKQuUeFW4bZUcZmN61/9jkYuTvznzkTiwvysjuSQTNNk44ETzF13kIXbsimtsBMf5su4ri0ZHh+Cl5tmvUVERByVCrcKt+V2ZOVz7+xk8gpLmTo6llGdwqyO5NDyi8qZv7lyrfeunFN4NXJmREIot3dtSWyYZr1FREQcjQq3CrdDOFZYyoNzNrFu33Hu6dWaZ4ZEYhiG1bEcmmmabDp4krnrD7JgaxYl5XZiQn34VZeWjEwIoXEDO9lTRETEUalwq3A7jHKbnSkLUjGBl0fGWB2nTskvLufLlEzmrDvIziOn8HB1ZkR8COO6tSQ+zFe/vIiIiFhIhVuF2+H89EBlWnYBLk6GDoK5AqZpsuVwPvPWH+TLLVkUldno2Lwxt3dryciEUHw9NOstIiJS21S4VbgdkmmajHpzNek5p5j2q070jwqyOlKdc6qknC+3ZDF3/UG2Zxbg7urEsLgQftWlBYnhTTTrLSIiUktUuFW4HVbWyWLuey+ZbZn5/L5/Bx6+sZ1K4lXadjifuRsO8sXmTE6X2Qj182BYXDDD4kKICfXR91VERKQGqXCrcDu0knIbT3+2jfmbMxkS25zXbk3Ao5GOO79ap0srWLz9CAu2ZrEiPY8Ku0mrAE+GxYUwLD6YiKDGKt8iIiLVTIVbhdvhmabJ2yv28ePuXGZN6oKrs5PVkeqFk0VlfLPjCF9tyWb1njzsJrRr5l01892umbfVEUVEROoFFW4V7jrjp4cp8wpLyThaSPc2AVZHqjfyCktZtP0IC7ZksX7/cUwTIoN9GBYXzPC4EFoGeFodUUREpM5S4VbhrnOe+nQrHycf5vmhkfz6ulZaAlHNcgpKWLg1mwVbs9h08CQA8WG+DIsLYWhcMCF+HhYnFBERqVtUuFW465zC0goe+zCFpak53JoYxpRRMbi5aF13TTh8ouhM+c5mW2Y+AInhTRgeF8yQ2GCa+bhbnFBERMTxqXCrcNdJdrvJtG/T+ce36SS08OOtOxNV/mrY/rzTLNyWzVdbsth55BSGAd1a+zM8PoTBMcH4ezWyOqKIiIhDUuFW4a7TFm/PZtqydObe050mKny1JuPoKb7aUrnsZE/uaZydDK5rG8DwuBAGRjfH11MH7IiIiPykQRZuwzCGA8PbtWt3T3p6utVx5BrZ7CbOTgZlFXaW786lnw7JqTWmaZKWfYoFW7NYsDWbg8eLcHU2uKF9IMPig+kXGURjd5VvERFp2Bpk4f6JZrjrl5kr9/HyglQm9WzFs0MicdH2gbXKNE22Zebz1ZYsFm7NJiu/hEYuTvSNCGR4fAg3dmyGZyMXq2OKiIjUOhVuFe56o8Jm589f72Tmqn30bBfAv8Z11jITi9jtJpsPneCrLdks3JZN7qlSPFyduSmyGcPiQujdIVAHGImISIOhwq3CXe98vPEQz87fTpCvG29P6EJE88ZWR2rQbHaT9fuOs2BrFou2H+H46TLcXZ24vl0gA6KCuCmyGQHeblbHFBERqTGXW7j1d2CpM25NakG7Zt78/qMtVkcRwNnJoEfbAHq0DeClEdGs23ecpak5LE3NYVlaDk5G5VaDA6Ka0z8qiFZNvayOLCIiYgnNcEud89PDlKZpsjQ1h36RQTg56ZAcR2GaJjuyCliamsOS1BzSsgsAaN/MmwHRQfSPak5cqK9+ZiIiUudpSYkKd723KiOP8W+vo39UEK/floC3m/5g44gOHS+qmvlev/84NrtJkI8b/SKD6B8VRI+2ATrgSERE6iQVbhXues80Td5ZvZ8pC9No09SL/05I0rIFB3eyqIzvdh5laWoOP+7OpajMhrebC70jKtd994lohq+HthsUEZG6QYVbhbvBWJ2Rx4MfbMJmN/nLmDgGxQRbHUkuQ0m5jdV78s7Mfh8lr7AUFyeD7m0CGBAdRL/IIEL8PKyOKSIi8otUuFW4G5RDx4t49MMU7u/dlv46IKfOqdxu8CRLUo+wNDWHvbmnAYgN9aV/VBADooOICGqMYWjdt4iIOA4VbhXuBsduN6sexJuz7gCtA7y4rl1Ti1PJ1cg4Wnhm5vsImw+dxDShhb8H/SObMyA6iKTwJjoASURELKfCrcLdYJXb7Az/50p2HjnF3de35smBEbi76qG8uuroqRK+Tatc970yI4+yCjt+nq7c2LEZA6Kac0OHpjrpUkRELKHCrcLdoBWX2fjz12m8t/YAEUGNmfarBCKDfayOJdfodGkFy3fnsiQ1h+92HiW/uBw3Fyd6tW9K/6ggbuwYRGBjHbYjIiK1Q4VbhVuA73cd5Q+fbOVUSTkrJ99IU518WG+U2+xs2HecJWe2HMw8WYxhQKcWfvSLCmJAVBBtA7217ltERGqMCrcKt5xx/HQZqzLyGB4fAkBBSTk+7tp6rj4xTZPU7AKWpR5lWVoO2zLzAWgV4En/qModTxK17ltERKqZCrcKt1zAivRcHpqziZdHxjAyIUSzn/VUdn4xy9KOsiw1hzV7jlFms9PE05W+HZvRPzKIXh0CdVCSiIhcMxVuFW65gIPHinj8oxQ2HjjBsLhg/nRzLL6emu2uzwrPrPtelprDd7uOcrKonEbOTlzXLoB+kZWz38193a2OKSIidZAKtwq3/AKb3WT6j3t4felumnq78drYeHpq+8AGocJmZ+OBEyxLzWFpWg4HjhUBEBfmW3XUfMfm2u9bREQujwq3CrdcwrbD+Tz64WYmXteKO3u0sjqO1DLTNCv3+06rfOgy5cx+36F+HvSPqizfXVv746p13yIi8gtUuFW45TKUlNtwc3HCMAyWpeYQ4udBVIi2D2yIjp4q4bu0yocuV6TnUVphp7G7C30jmtEvKojeHQLx9dDyIxER+X8q3CrccgVsdpMBr//IweNF/H5ABPf0aoOzk5YVNFTFZTZWpOeyLC2Hb9OOcux0GS5OBt3bBNAvsrKAhzXxtDqmiIhYTIVbhVuu0PHTZTzz2TYW7zhC19b+/G1svEqVYLObpBw6wdLUoyxNPcKe3NMARAb70P9M+Y4N9dW6bxGRBkiFW4VbroJpmnySfJiXvkrFAJY+3ls7WMg59uYWVh01v/HAcewmNPdx56bIZvSPCqJH2wDcXJytjikiIrVAhVuFW67BoeNFfL0tm/t6twUqd7fQoSnyc8dPl/H9zsryvTw9l6IyG95uLvSOCGRAVBB9Ippp3beISD2mwq3CLdUkNauA+97fyJ9HxdKrfaDVccRBlZTbWL0njyU7cliWlkNeYeW67x5tA6p2PQn29bA6poiIVCMVbhVuqSapWQX8bt5mMo4WMvG6Vjw1uCPurloyIL/MbjfZfOgkS1KPsHRHDnvzKtd9x4b6MiAqiP7RQUQEab9vEZG6ToVbhVuqUUm5jamLdvLO6v20b+bN67clEBPqa3UsqSMyjhayNDWHJalH2HzwJAAt/T3pHxXEgKggEsObaMmSiEgdpMKtwi014MfduTz58RZuTQrjyYEdrY4jddDRghKWpVXueLIq4xhlNjtNPF256cxJlze0D8Sjkf6CIiJSF6hwq3BLDTlxugwvNxcauTiRcugkAV6NaOGv7QPlyhWWVrB8dy5Ldhzhu51HKSipwN3VievbVT50eVNkMwK83ayOKSIiv0CFW4VbapjdbjLo78vJOlnCSyOiuaVzqNbkylUrt9nZsO84S1Irj5rPPFmMkwGJ4U0YENWc/lFBtGrqZXVMERE5iwq3CrfUgkPHi/j9R1tYv/84g2OaM+XmGM1IyjUzTZMdWQVn1n3nkJZdAECHIO8z676bExvqi5NOQxURsZQKtwq31BKb3eQ/y/fw+tLd+Li7Mv/BnrQM0BITqT6Hjhex9MzM9/r9x7HZTYJ83M5sN9icHm0CaOSihy5FRGqbCrcKt9SynUcK+HDDIf5nWBSGYWC3m5qBlGp3sqiM73YeZcmO/z9sp7GbC306NmNITHP6dmymbStFRGqJCrcKt1go62Qxd85Yx1ODI+kfFWR1HKmnzj5sZ2lqDsdOl+HVyJl+UUEMjQ2md0SgjpkXEalBKtwq3GKhjKOnePiDzew8copbOofywrBofD11xLfUnAqbnXX7jrNgaxaLtx/hRFE5jd1c6B8dxLC4YK5vF6hlJyIi1UyFW4VbLFZWYedf36Xzxg97aOrdiKmj4+gb0czqWNIAlNvsrN5zjAVbsvhmxxEKSirw9XBlYHQQQ+NCuK5tAK46aEdE5JqpcKtwi4PYdjif33+cQmyoH6+Njbc6jjQwZRV2VmbksmBLNktScygsraCJpyuDYoIZFhdMt9b+OuVSROQqqXCrcIsDKa2wUW4z8XZzIS27gLzCUnq1D7Q6ljQwJeU2lu/OZcHWbJal5VBUZqOpdyMGxwQzNC6YLq38cdaDviIil02FW4VbHNRDH2xi4dZsbu/WkmeGROLt5mJ1JGmASsptfL/zKAu2ZvPtzhxKyu00a+zGkNjKme/OLZtolx0RkUtQ4VbhFgdVUm7jb0t3898Vewnx9eAvY+K4rl1Tq2NJA3a6tILvdh5lwdYsvt+VS1mFnWBfd4bGVs58J7Tw0ymqIiIXoMKtwi0OLvnAcZ74eCv78k4z49dJ3BSp7QPFeqdKyvk2rbJ8/7g7l3KbSaifB8PighkWF0JMqI/Kt4jIGSrcKtxSBxSX2Zi1eh+/ub4NjVycOF1agZeWmIiDyC8uZ2lqDgu2ZrEyPY8Ku0l4gCdDYyvLd2RwY5VvEWnQVLhVuKWOKSgpZ/C0FQyMbs6TAyPwaKQDS8RxnDhdxpLUIyzYms3qPcew2U3aBHoxLDaYYfEhdAhqbHVEEZFap8Ktwi11TFFZBVMX7WT2mgO0burFX2+NIzHc3+pYIuc5VljK4h1HWLAlm3X7jmE3oUOQN6M6hXFL51CCfNytjigiUitUuFW4pY5anZHHk59sJSu/mHt6teHJgRE6pEQc1tFTJSzefoQvUrJIPnACJwN6tQ9kTGIY/aOCcHfVX2pEpP5S4VbhljqssLSCPy1M4/CJImbf1VXrZKVO2Jd3mk+TD/PZpsNk5Zfg4+7C8PgQRieG0Uk7nYhIPaTCrcIt9UBZhZ1GLk5knSxm3vqDPHRjO9xcNGMojs1uN1mz9xifJB9m0fZsSsrttAn0YkxiGLd0CqO5r5aciEj9oMKtwi31yKxV+3jpq1Qighrz11vjiQ3ztTqSyGU5VVLO19uy+ST5MBv2Vy45ub59IKM7hzIwurmWnIhInabCrcIt9cz3O4/y1GdbySss46E+bXn4xvY0ctHabqk79ued5rNNh/l0UyaZJ4tp7O7CsLgQxiSG0bmllpyISN2jwq3CLfVQflE5Ly3YwWebMvndje14fECE1ZFErpjdbrK2asnJEYrLbbRp6sXoxMpdToJ9PayOKCJyWVS4VbilHvtuZw6J4f74erhyJL+EAO9G2slE6qTC0oqqJSfr9x3HMOD6dk0ZkxjGgKjm2o9eRByaCrcKtzQAFTY7w/+1CmcneO3WBCKa6/ARqbsOHDvNp5sy+TT5cOWSEzcXhsUHn1ly0kRLTkTE4ahwq3BLA7F4ezbPzt9OQUk5j/XvwL292uCi2W6pw+x2k3X7jlftclJUZqN1Uy9Gdw5lVOcwQv205EREHIMKtwq3NCDHCkt57vPtLNp+hIQWfrx1ZyLNdNqf1AOnSytYtP0InyQfYu3eyiUnPdtWLjkZGK0lJyJiLRVuFW5pYEzT5Kut2cxdd5B37+qqHUyk3jl0vIhPNx3m002HOXS8GG83F4bGBjMmKYykcC05EZHaV28Kt2EYbYBnAV/TNMdczntUuKUhM00TwzAoKCnn2fnbeXJABC0DPK2OJVJt7HaTDfsrl5ws3Fa55KRVgCfD4kIYHNucqGAflW8RqRUOUbgNw5gJDAOOmqYZc9b1QcDfAWfgbdM0p17GvT5R4Ra5fOv3HefudzZgM02eGRLJ+G4tVUKk3jldWsHi7Uf4bPNh1uw5ht2EVgGeDIkNZkhsMNEhKt8iUnMcpXDfABQCs38q3IZhOAO7gf7AYWADMI7K8v3Kz25xl2maR8+8T4Vb5AplnSxm8qdbWZGeR6/2TXl1dBwheuBM6qljhaUsSc3h623ZrN5zDJvdJDzAk8ExwQyNDSYmVOVbRKqXQxTuM0FaAQvOKtw9gBdN0xx45uOnAUzT/HnZ/vl9Llq4DcO4F7gXoGXLlokHDhyolvwidZ1pmsxZd5A/f51Gr/ZN+c+dl/z/BZE67/jpMpamHmHhtiOszsijwm7Swt+DITHBDI4NJj7MV+VbRK6ZIxfuMcAg0zR/c+bjO4Fupmk+/AvvDwD+ROWM+NuXKuagGW6RCzl4rAhXF4NgXw9yCkowQDuZSINwsqisauZ7ZXpl+Q7182BIbHMGxwbTqYWOlReRq3O5hdulNsJcC9M0jwH3W51DpK47+8HJZ+dvZ+OB47w0IpoR8SEqG1Kv+Xk2YmxSC8YmtSC/qJylaZXl+53V+/nvin2E+LozODaYIbHN6dSiCU5O+t+DiFQvKwp3JtDirI/DzlwTkVry9JCOPPHxFh6Zl8I3O47wx5ExBHi7WR1LpMb5eroyJjGMMYlh5BeX8+2Z8v3emgPMWLmP5j7uDI5tzpDYYBJbqnyLSPWwYkmJC5UPTd5EZdHeANxumuaO6hpTS0pELq3CZuetFXuZtjSdxu4uzJjYhYQWflbHErFEQUk536UdZeG2bH7cnUtZhZ0gHzcGxwQzOKY5Sa38cVb5FpGfcYg13IZhzAX6AE2BHOAF0zRnGIYxBJhG5c4kM03T/FN1jqvCLXL5dv0fe/cdX3V593/89c3eO4EswghDCHsIqCz3oLjrbh1UbeL5G3wAACAASURBVGtr1Y777q/tfbd3l6OtWuuebd0bZ1X2EAh7hh1IAiQheyfnfH9/XCcJ4GIk+Z7xfj4e53Fm8BPF5H0+53Nd14Fa7vt4Kw9cOYr4yFCnyxFxXG1TK3O3lvLBhv3MLyijuc1Namw45+f15vy8dCb0U/gWEcMrArdTFLhFTkxLm5tb/5nPDZP7Mn1wmtPliDiurrmNeZ7wPa+glKZWNykx4ZyX14sLPOE7JFinuooEKgVuBW6R41Zc1ciNz65g28E6rhqfzS8vPIXYCHW9RcAcsjOvoJQPNxxg7tZSGltdJEeHcW5eby7IS+fU/kmEKnyLBBQFbgVukRPS3Obir59s54mFO0mPj+S+y0cwOTfF6bJEvEpDSxvzC8r4YMN+5m4tpaHFRWx4CKcPTGH64DSmDU7VtpsiASAgA7dlWTOBmbm5ubO3b9/udDkiPm1VYSX3vLaOiNBg3r/jdO3WIPIVGltcLNxexvyCUuZtLeNATRMAeZlxnvCdxqjsBM19i/ihgAzc7dThFukajS0uyuuayU6Koqaple0Haxmbk+R0WSJey7ZttuyvZV5BKfMLSllVWInbhsSoUKYOSmX6kDSmDEwlMTrM6VJFpAsocCtwi3SpP36whScW7eKW0/tx9zmDiQgNdrokEa9X1dDCwu3lzN9ayvxtZVTUtxBkweg+iUwfnMq0wWkMy4jT4VMiPkqBW4FbpEvVN7fxhw+28O/lexmQGs0DV47Svt0ix8HlttlQXM3crab7vb6oGoC02HCmD05j+pBUTstN0UJlER+iwK3ALdItFm4r4+dvrKe0tpl7LxvBZWOznC5JxCeV1TazYFsZ87aWsnB7GbVNbYQEWYzvm8SMISaAD0iNUfdbxIspcCtwi3Sb6sZW7v1oK9+fnktmQiS2bSsUiJyEVpeb1YWVzCswAbzgYC0AWYmRTB+cxowhaUzsn0xkmEa5RLyJArcCt0iPsG2bW57PZ1R2ArdPG6BDQES6QHFVY8euJ0t2lNPY6iI8JIhJA5I7Anh2UpTTZYoEPAVuBW6RHtHU6uKnr69nzroSRmUn8JcrR9I/NcbpskT8RlOrixW7K5hXUMq8raXsOdQAwIDUaM/sdxrj+yYRFqI3uyI9TYFbgVukR81ZV8L/e3sjLW1u/vvCU7ju1D4aMxHpBrvL65m3tZR5BaUs31VBi8tNdFjwYYfupNE7XofuiPQEBW4FbpEed6C6iZ+9sZ5tB2r55K4p2m1BpJvVN7exdOch5m4tZUFBKSXV5tCdU9LjmD7Y7Ps9OjtBo14i3SQgA7dOmhRxnm3b7K9uIiMhkpY2N4t3lDFjSC+nyxLxe7Zts+1gXcfoSX5hJS63TVxECFMGpTJ9cBpTB6eSEhPudKkifiMgA3c7dbhFvMMLy/bw63c2MWtUBr/9Vh7xUep4i/SU6sZWluwo94yflFFe14xlwYisBNP9HpzG8Mx4gnTkvMgJU+BW4BZxXJvLzSPzdvLQ3O2kxoRz/xUjOX1gitNliQQct9tmU0mN6X4XlLJ2XxW2DcnRYUz1hO8pA1P1pljkOClwK3CLeI31RVXc+cpadpXV88sLTmH2lP5OlyQS0CrqW1i4rYx5BaUs2FZGVUMrwUEWY/okMG1wGtMHp3FKeqwWPot8AwVuBW4Rr9LY4uLej7dy+dgshmXE67AcES/hctus3Vdl9v0uKGVjcQ0AveMimD4klWmD0zgtN4WY8BCHKxXxPgrcCtwiXu2/3txAenwE39dhOSJepbSmifmeI+cXbS+nrrmN0GCLCf2SOrYdHJAarTfMIihwK3CLeLE2l5u7X1vHO2t1WI6IN2t1ucnfU9nR/d52sA6A7CRz5Pz0wTpyXgKbArcCt4jXaz8sp7nNxS8vOIXrJuaoaybixYoqG5hfUMb8glKW7Dh0xJHzE/snMy4nkbzMeCJCFcAlMChwK3CL+IT2w3LWFFby2d1TSYvTCXkivuDwI+cXFJSxq7wegLDgIPIy4xjXN4mxOYmMzUnU3t/itxS4FbhFfIZt2+wqr2dAagy2bbNs1yEmD9D2gSK+pLyumVWFlawurCS/sJINRdW0uNwA9E2OYmyOCeDj+iaSmxqj/b/FLyhwK3CL+KQ560q446U1OixHxMc1tbrYWFzNKk8AX11YyaH6FgDiIkIYk5PIuJxExuYkMTI7nqgw7YIivicgA7eOdhfxfW0uN/+Yv5OHPttOig7LEfEbtm2z51AD+XsqWFVYyarCSraXmkWYIUEWQzPiOkZQxuUk0Tte42Xi/QIycLdTh1vE960vquInr6xlZ1k9d509iB+dOdDpkkSki1U1tLB6rwnf+XsqWVdURVOrGUPJTIjsGEEZm5PIkN5xBGsMRbyMArcCt4jPa2p18acPtzJjSBpTBqU6XY6IdLNWl5vNJTXkF1ayqrCC/D2VlNY2AxAdFszoPokdXfDRfRKIjdDImThLgVuBW8TvPPipGRX7wXQdliMSCGzbpqiysWMEJb+wkq0HarBtCLJgcO84xuYkMC4niZHZCeQkRWkxpvSoYw3cWqEgIj7Btm0KK+p5c3UxcwtK+asOyxHxe5ZlkZ0URXZSFBePzgSgtqmVNXurOkL4W6uL+dfnewGICQ9haEYceRnxDM8y1/1TYzSKIo47pg63ZVkDgCLbtpsty5oGjABesG27qpvrOyHqcIv4Lx2WIyKHc7ltCg7UsqG4io3FNWwsqWbL/pqOWfDI0GBOSY9leGY8wzLjycuIZ2CvGEL1KZl0gS4dKbEsay0wDugLfAC8AwyzbfuCk6yzWyhwi/i39sNylu0s56M7pzBAnW4ROUyby83Osno2FlezsaSaTcU1bCqppr7FBUBYSBCn9I7tCOB5mXEM6hWrEzLluHV14F5t2/YYy7J+CjTZtv2wZVlrbNse3RXFdjUFbhH/Z9s2m0pqyMuMB2DdvipGZic4XJWIeCu322b3IRPCN5XUmDBeXE1NUxtgtiYc1CuWvMw48jLjGZYRz9D0OCLDFMLlq3X1DHerZVlXA98BZnoe09JgEXGMZVkdYXvpjnKueWo53xqZwe9m6bAcEfmioCCLAakxDEiNYdYoMw9u2zb7KhrZWGLC94biaj7dUsqr+UXmayzITYshL8OMowzPjGdoRhwx4VoCJ8fnWDvcQ4HbgGW2bb9kWVY/4Erbtv/c3QWeCHW4RQLL0Yfl3Hv5CG0jKCInxLZt9lc3dXTAN3q64e3bE1oW9EuO9gRwszBzWEa83ugHqG7bFtCyrEQg27bt9SdaXHdT4BYJTBuKqvnJq2vZUVrHbVMH8Ivzhzhdkoj4idKaJk8nvKZjLKW4qrHj+T5JUQzPjGd4lumE5ymEB4SunuGeD3wLM4KyCigFlti2fddJ1tktFLhFAldTq4v7Py5gcO9YrhiX7XQ5IuLHKupbOkZRNpVUs76omqLKzhCek+wJ4Z4gnpcZT5wO6/ErXR2419i2PdqyrFsw3e3fWJa13rbtEV1RbFdT4BaRdi+v2EtxVSN3zBhIWIi2AROR7lVZ38IGTwjfUGSuD++E902OYnhWAsMz4xiemUBeZpxOzPRhXb1oMsSyrHTgSuCXJ1VZN7IsayYwMzc31+lSRMRLbN5fwwvLCplXUMpfrxzFwF6xTpckIn4sMTqMKYNSj1hHUtEewouq2FBczerCSuasK+l4vn9KNHmZ8YzwdMGHZSiE+5tj7XBfAfwKM0Zyu2VZ/YH7bNu+rLsLPBHqcIvI4T7edID/fnMDtc1t/Ozcwdx0Wj8d/ywijjpU13xEF3xDcTX7q5sAz8LMlGhGZMZ7gngCwzLiiNbuKF6n2xZN+gIFbhE5WnldM794YwOfbjnIG7dPZmxOotMliYgcoay2uWMmfH2R2SXlQE1nCO+fEs2IrISObvjQdIVwp3X1DHcW8DBwmuehRcCPbdsuOqkqu4kCt4h8Gdu2Wb23qiNsbyqpZmh6nI6GFxGvVVprtijcUFTDhmIzknKwpnOLwtzUGIZ7AvjoPomckh6n9So9qKsD9yfAi8A/PQ9dB1xr2/bZJ1VlN1HgFpFvsvVADRc+tJgZQ9L446XDSYkJd7okEZFjUlrTdEQXfH1xNWWefcLDQoIYlhHHqOwERmUnMDo7keykSDUWuklXB+61tm2P+qbHvIUCt4h8E7fb5unFu7nv4wJiI0L446XDOWdYb6fLEhE5bu2H9azdV2Uue6tYX1xFU6sbgKTosI4APio7gZHZCcRHalFmV+jqwP0Z8Czwkuehq4Ebbds+86Sq7CYK3CJyrAoO1HLXq2vZVFLD1ROy+cMlw9UJEhGf1+ZyU3CwlrX7qliz1wTxnWV1tMe+/qnRng54AqOyExmSHktosEZRjldXB+4czAz3JMAGlgJ32La972QL7Q4K3CJyPFra3Dw8dzsRocH8YLq2FRUR/1TT1Mr6fdWs3VfZ0Q0vr2sBIDwkiLzM+CM64VmJGkX5Jt2+S4llWXfatv23E/ribqbALSIn47MtB1m68xA/PXcwEaHBTpcjItItbNumqLKxcxRlXxUbi6tpbjOjKCkx4YzKNiF8dJ9ERmTFa3/wo3T1wTdf5i7AKwO3iMjJWFdUzdOLd7NwWxl//fYo8jLjnS5JRKTLWZZFdlIU2UlRzByZAUCry83W/bWs3VfJGk8I/3RLqef1ZleUUdkJjOpjuuCDe8USolGUb3QyHe59tm1nd3E9XUIdbhE5WQu3lfHT19dxqK6FH585kNunDdAvFREJSNUNrawrqjqiE15Rb0ZRIkODGZ4ZT26vGLITo8hKjCQ7yVwnR4f5/UhKT4yU7LVtu88JfXE3U+AWka5Q3dDKr97ZyLvrSnjsujGcl5fudEkiIo6zbZt9FY2sOWwWfE95PZUNrUe8LjI0mKzESM8liuwkc52VGEl2YhQJUaE+H8i7JHBbllWLWST5haeASNu2vfJ4IwVuEelKn+86xKn9krAsi+0HaxmQGqOj4UVEjlLX3EZRZQNFFY0UVTawr9Jz7blf09R2xOujw4I7uuHtQTzrsC65L2xd2CUz3LZtx3ZdSSIivmli/2QADlQ3MeuRJYzNSeTey0eQHh/pcGUiIt4jJjyEIb3jGNI77kufr25sNYG8spGiykb2VbTfbuDzXRXUNR8ZyGMjQkxn/Ms65ElRxPjQsfYnPFLizdThFpHuYNs2L67Yy/+9t4WQYIvfzcpj1qgMn/9IVETEabZtU93Y2tENL6psZF9lZyDfV9FIY6vriK9JiArtGE+ZMSSNK8b1/NLCntilREQkoFiWxbWn5nDagBTuenUtd76ylk+2HOTBb4/SgkoRkZNgWRYJUWEkRIUxPOuLO0PZtk1FfcuXBvFtB2vJSY52oOpj51eB27KsmcDM3FwdXCEi3advSjSv3jqJxxfuoqy2WWFbRKSbWZZFckw4yTHhjMxOcLqc46aREhGRLrChqJq31hTz8/MHEx6iw3JERALBsY6UqC0jItIFFu8o55klu7n0H0vZVVbndDkiIuJFFLhFRLrA7dMG8NQN4yipauSihxfzxqoip0sSEREvocAtItJFzhraiw9+fAZ5mfHc/do6Ptiw3+mSRETEC/jVokkREaelx0fy0uyJvJa/j3OG9gKgqdVFRKjmukVEApU63CIiXSw4yOKqCX0ICQ6ivK6ZMx9YwDOLd+OPi9RFROSbKXCLiHSjYMvilPRYfvveZm55Pp+K+hanSxIRkR6mwC0i0o0So8N48oZx/GbmUBZtL+f8BxeybOchp8sSEZEepMAtItLNLMvixtP68eb3JxMdFsIzS3Y7XZKIiPQgLZoUEekheZnxzLnjdFpdbgCKqxoJssxCSxER8V/qcIuI9KDo8BASosIA+MUb6zn/wUV8svmgw1WJiEh3UuAWEXHIb2flkZUYyewX8vmfdzfR3OZyuiQREekGCtwiIg7plxLNG7dP5qbT+vHc0j1c8shSiqsanS5LRES6mAK3iIiDwkOC+fXMoTx1wzgiw4JJiAx1uiQREeliCtwiIl7grKG9eP22SUSHh9DY4uLej7ZS19zmdFkiItIFFLhFRLyEZVkALNtVzmMLdnLRQ4vYUFTtcFUiInKyFLhFRLzMjCG9eGn2RJrb3Fz66BKe1rHwIiI+za8Ct2VZMy3LeqK6Wh0hEfFtp/ZP5oMfncHUQWn87r3N/OmjrU6XJCIiJ8jyx67JuHHj7Pz8fKfLEBE5abZt88/PC5kyMJW+KdHYtt0xeiIiIs6yLGuVbdvjvul1OmlSRMSLWZbFDZP6AiZ8//DFNeSmxXDHjFxCgv3qQ0oREb+ln9YiIj6i1WUTGRbMg59t55qnlrO/Wnt2i4j4AgVuEREfERYSxP1XjOSv3x7JpuJqHQsvIuIjFLhFRHzMJaOzeO9HZ5CVGMl/vble+3WLiHg5zXCLiPig9mPh95Q3EBMegstts7+6kazEKKdLExGRo6jDLSLio8JDghncOxaAJxbu4ry/LeK99SUOVyUiIkdT4BYR8QPfGpXBoF4x/PDFNfz6nY00t7mcLklERDwUuEVE/EBmQiSv3DqJ2Wf044VlhVz+6DL2HmpwuiwREUGBW0TEb4QGB/HLC4fyxPVjKalqpKyu2emSREQELZoUEfE75wzrzekDU4gKMz/iP918kCmDUgkLUY9FRMQJ+ukrIuKH2sP21gM13PJCPlc+voziKh2UIyLiBAVuERE/NqR3HI9cM4YdpXVc+NAi5m7VQTkiIj1NgVtExM9dOCKd9+44nYz4SG56Lp+/frLN6ZJERAKKAreISADomxLNm9+fzDWn9iE9PsLpckREAooWTYqIBIiI0GD+cMnwjvvvrS8hLiKUKYNSHaxKRMT/qcMtIhKA3G6bpxbt5jvPruAv/ynA5badLklExG8pcIuIBKCgIIsXZ5/KZWOyeGjuDq57ajmltU1OlyUi4pcUuEVEAlRUWAj3XzGS+y4fwZp9lVz40GIq61ucLktExO9ohltEJMBdMS6bEVkJzN1aSmJ0mNPliIj4HXW4RUSEwb1juX3aAADWF1Vxy/P5HNLR8CIiXUKBW0REjrC7vJ6F28u48KHFrNxT4XQ5IiI+T4FbRESOMGtUJm/ePpnw0CCueuJzHluwE7d2MREROWF+Fbgty5ppWdYT1dXVTpciIuLT8jLjmXPH6Zw7rBd/+nArb60pdrokERGfZdm2/3Utxo0bZ+fn5ztdhoiIz7Ntm/c37Oe8Yb0JCQ6iqdVFRGiw02WJiHgFy7JW2bY97pte51cdbhER6VqWZXHRiAxCgoMoq23mzAcW8Mzi3fhjs0ZEpLsocIuIyDEJCw7ilPQ4fvveZm7/12qqG1udLklExCcocIuIyDGJjwrlyRvG8ssLTuHTLQeZ+fBiNhZrzYyIyDdR4BYRkWNmWRazp/TnlVsn0epy88i8HU6XJCLi9XTSpIiIHLexOYl88KMzsCxz/2BNE1FhwcRGhDpbmIiIF1KHW0RETkhidBgJUWHYts0P/r2aCx9azLp9VU6XJSLidRS4RUTkpFiWxc/PH0Kby81ljy7lcR2UIyJyBAVuERE5aeP7JvHhj6dw1im9+OOHW/nOsyuoqG9xuiwREa+gwC0iIl0iPiqUR68bw+8vyeNQXQvhIfoVIyICCtwiItKFLMvi2lNzmHPH6USHh9DY4uKxBTtpaXM7XZqIiGMUuEVEpMsFB5ntS/6z+QB/+nArlz+2lD3l9Q5XJSLiDAVuERHpNrNGZfLYdWPYU17PhQ8t4u01xU6XJCLS4xS4u5KtVfkiIkc7Ly+dD++cwtCMOO58Za0OyxGRgKPA3VUW3AcvX+t0FSIiXikzIZKXZk/krrMHcdGIdABsNSlEJEAocHeVoGAoeB9KtzpdiYiIVwoJDuJHZw4kJzka27b50ctreXrxbgVvEfF7CtxdZcwNEBwGK59yuhIREa/X3OamqdXF797bzE3PreRQXbPTJYmIdBsF7q4SnQLDLoV1L0NzrdPViIh4tYjQYJ64fiy/nTWMJTsPcd6Di1iyo9zpskREuoUCd1eaMBtaamH9K05XIiLi9SzL4oZJfXnnB6cRHxnKna+spbHF5XRZIiJdToG7K2WOhfRRsOIp7VgiInKMTkmP490fnsZzN44nMiwYl9umpKrR6bJERLqMAndXsiwYfwuUbYHCJU5XIyLiM6LCQhiWEQ/A4wt3cu5fF/LuuhKHqxIR6RoK3F0t7zKISIAVTzpdiYiIT5o5IoPcXjH86KU1/Oz1dTS0tDldkojISVHg7mphUTD6Otj6HtTsd7oaERGfk50Uxau3TuKH03N5bVURFz28mC37a5wuS0TkhClwd4fxN4O7DVY/73QlIiI+KTQ4iHvOHcy/bz6V5lazhaCIiK9S4O4OSf0h9yzIfxZcrU5XIyLisybnpjDvnmmM7pMIwKv5+6iob3G4KhGR46PA3V3Gz4a6A2a0RERETlhYiPlVVVzVyP97eyPnP7iQZTsPOVyViMixU+DuLgPPhoQ+ZotAERE5aZkJkbx5+2Siw0K45qnPeeA/BbS53E6XJSLyjRS4u0tQMIy7GQoXQ+kWp6sREfELeZnxzLnjdC4fk8XDc3dw43MrsXXugYh4OQXu7jT6eggOh5XqcouIdJXo8BDuu2IkD141iotHZWJZltMliYh8LQXu7hSdDHmXwrqXoUlbWomIdKVZozK5bGwWAG+sKuKe19ZR06SF6iLifRS4u9v42dBSB+tfcboSERG/daCmiTdXF3H+3xaxZEe50+WIiBxBgbu7ZY2FjNFmrERzhiIi3eIH03N54/bJhIcGce1Ty/nNOxtpbNHe3SLiHRS4e8L42VC2FfYsdroSERG/NbpPIu/fcQY3ntaXFz4vZMWeCqdLEhEB/CxwW5Y107KsJ6qrq50u5Uh5l0JkIqx80ulKRET8WmRYML+ZOYxPfjKVqYNSAVi28xDNbep2i4hz/Cpw27Y9x7bt78XHxztdypFCI2H0dbDlPagpcboaERG/l5sWA0BJVSM3PLOcWX9fwuYSLV4XEWf4VeD2auNuBtsNq55zuhIRkYCRkRDJo9eOpbyuhVmPLOaReTt0WI6I9DgF7p6S1M+cPrnqOXBp2yoRkZ5y1tBefPKTKZwzrDf3fVzAVU98rtAtIj1KgbsnjZ8NdQdhyxynKxERCSiJ0WE8cs0YHr56NGcN7UVIsPn1p1MqRaQnKHD3pNwzISFHJ0+KiDhk5sgMbps6AICF28q49qnlFFU2OFyViPg7Be6eFBQM42+GwiVwcJPT1YiIBLTqxlbW7avivL8t4tWV+9TtFpFuo8Dd00ZfDyER6nKLiDhs5sgMPrpzCsMy4vjZG+u55fl8SmubnC5LRPyQAndPi0qCvMtg3SvQpC2qRESclJ0UxUuzJ/Lri4ayeEc5n20pdbokEfFDCtxOGH8ztNbDupedrkREJOAFBVncdHo/Pr1rKleNzwbMYTmV9S0OVyYi/kKB2wmZYyFjjBkr0cygiIhXyE6KwrIsGltc/ODF1Zzzt4XM26qOt4icPAVup0yYDeUFsHuh05WIiMhhIsOC+efNE0iKCuPG51byizfWU9fc5nRZIuLDFLidMuxSiEzS4kkRES80LCOed+84jdumDuDV/H2c97eFVDfo0DIROTEhThcQsEIjYMz1sPTvUF0M8ZlOVyQiIocJDwnmF+cP4eyhaSwoKCM+KhQwh+VYluVwdSLiS9ThdtK4m8B2m+PeRUTEK43NSeKucwYDsLmkhvMfXMS6fVUOVyUivkSB20mJfWHgOSZwt2k1vIiIt6tvaaO6sZVLH13KX/5TQEub2+mSRMQHKHA7bcJsqC+FLe86XYmIiHyD8X2T+OjOKVw8KpOH5u7gkn8soeBArdNliYiXU+B22oAzTad75dNOVyIiIscgPjKUB64cyePXj+VgTRPvrS9xuiQR8XJaNOm0oCAYdzN88is4uAl6DXO6IhEROQbnDuvNuJxEYiPMYsqlO8sJDwlibE6Sw5WJiLdRh9sbjL4OQiJgxZNOVyIiIschOSacsBDzq/Rvn2znskeXcc9r6yiva3a4MhHxJgrc3iAqCfIuh/WvQlO109WIiMgJePbG8dw+bQDvrC1m+v3zeW7JbtpcWlQpIgrc3mPCLdBaD2tfcroSERE5AdHhIfz8vCF8+OMpjMxK4H/mbObTLQedLktEvIACt7fIGA2Z48zJk7btdDUiInKCctNi+OfNE/jnzRM4d1hvABZuK6OsVmMmIoFKgdubjL8FDm2H3QucrkRERE6CZVmcMTAVy7JoanVx5ytrmfGAxkxEApUCtzcZdglEJWvxpIiIH4kIDea12yYxKtuMmVz08GLy91Q4XZaI9CAFbm8SGgGjr4eCD6C6yOlqRESkiwxIjeGFmybwj2vHUNPYyhWPL2NnWZ3TZYlID1Hg9jbjbjIz3Kuec7oSERHpQpZlccHwdD69eyoPXTWaAakxACzfdUhjJiJ+ToHb2yTmwKBzYdXz0NbidDUiItLFosJCmDkyA4A95fVc/eTnXPTwYlZqzETEbylwe6Pxs6G+FLa863QlIiLSjXKSozrHTB5bxl2vrKW0tsnpskSkiylwe6MBMyCpvxZPioj4OcuyOC/PjJn8YPoA3lu/nwseXERji8vp0kSkCylwe6OgIBh3M+z7HA5scLoaERHpZlFhIfz03CF8dOcZ/L8LhxIZFgxAwYFahysTka6gwO2tRl8LIZHmIBwREQkI/VNjuHh0JgDzC0o5928L+YnGTER8ngK3t4pMhOGXwfpXobHK6WpERKSHndovmR9Oz+X99fs58/4FPL1Yh+aI+CoFbm82fja0NsC6l5yuREREelhkWDD3nDuYj38yhdE5ifzuvc1859kVTpclIicgxOkC5GtkjIKs8WasZMKtZrZbREQCSr+UaJ6/cTwfbzqI27YBaHO5qWhoIS02wuHqjFGBHQAAIABJREFURORYKMF5u/Gz4dAO2D3f6UpERMQhZjeT3lwwPB2Afy/fywyNmYj4DAVubzd0FkQlw8qnna5ERES8xJRBqYz1jJlc+NBilu865HRJIvI1FLi9XWgEjLkBCj6Aqn1OVyMiIl6gX0o0z904nsevH0tdcxvffuJz7vt4q9NlichXUOD2BeNuMternnW2DhER8RqWZXHusN58etdUfnLWICYPSAGgqqGFosoGh6sTkcMpcPuChD4w6DxY9Ty0NTtdjYiIeJHIsGB+fNZATss1gfvRBTuZcf8C/nfOJsrr9DtDxBsocPuK8bdAQzlsftfpSkRExIt9Z1JfLhmdyfNL9zD13nn85T8F1DS1Ol2WSEBT4PYV/adDUn9Y+aTTlYiIiBfLSIjkz5eP4JO7pjJtcBoPzd3B/7yzyemyRAKa9uH2FUFBpsv98X/D/vWQPsLpikRExIsNSI3hkWvHcFtRNbER5tf9rrI6lu+u4IqxWYQEq+cm0lP0f5svGXUNhESqyy0iIsdseFY8fVOiAXhrTTH/9eYGzv7rQt5bX4LbbTtcnUhgUOD2JZGJMOIKWP8aNFY6XY2IiPiYu84exJM3jCMsOIgfvriGmX9fzMJtZU6XJeL3FLh9zfhboK0R1r7kdCUiIuJjLMvi7KG9+ODHZ/DXb4+kurGV+QUK3CLdTYHb16SPhKwJsPIpcOs4XxEROX7BQRaXjM5i7t3TuOucQQAs3VHOzc+tZMv+GoerE/E/Cty+aMJsqNgJu+Y5XYmIiPiwsJAgYsLNgsqDtU2s2FPBBQ8t4s6X17D3kA7PEekqCty+aOgsiEoxXW4REZEucMnoLBb9bDq3ThnAR5sOMOOB+TzwnwKnyxLxCwrcvigkHMZ+B7Z9BFV7na5GRET8REJUGL84fwgLfzqdqyZkkxobDkCby011gw7PETlRCty+auyN5jr/WWfrEBERv5MWF8H/XTycGyb1BeDNNcWcce9cHpm3g4aWNmeLE/FBCty+KiEbBp0Pq1+AtmanqxERET82MiuB8X2TuO/jAqbeN58Xlu2hpU0L90WOlQK3L5twCzSUw6a3na5ERET82ODesTz93fG8ftsk+iVH8+t3NnHbv1Y5XZaIz9DR7r6s3zRIzjUnT478ttPViIiInxvXN4lXbp3I/G1lhHuOhq9tamXF7gpmDEnDsiyHKxTxTupw+7KgIHMQTtFKKFnrdDUiIhIALMti+uA0JuemAPDyin3c/Hw+M/++mDnrSmhzadRE5GheH7gty7rYsqwnLct6xbKsc5yux+uMvBpCo7RFoIiIOOK7p/Xl3stG0NDi4o6X1jDjgQX86/NCbNt2ujQRr9GtgduyrGcsyyq1LGvjUY+fZ1lWgWVZOyzL+sXX/Rm2bb9t2/Zs4DZAcxNHi0yA4VfAhtehsdLpakREJMCEBgdx5fhsPv3JVB67bixJ0WF8svlgx3hJU6vL4QpFnNfdHe7ngPMOf8CyrGDgEeB8YChwtWVZQy3LGm5Z1ntHXdIO+9L/5/k6OdqE2dDWCGv+7XQlIiISoIKCLM7L681b35/MI9eOAWBfRQOn/uEzfv/+Zg5UNzlcoYhzunXRpG3bCy3L6nvUwxOAHbZt7wKwLOtlYJZt238ELjr6z7DMW+Q/AR/atr26O+v1Wb2HQ/ZEWPow9D0NMkY7XZGIiAQoy7I6josHmDY4lWeW7OG5pXu4eFQmt07tT25arIMVivQ8J2a4M4F9h90v8jz2Ve4AzgIutyzrtq96kWVZ37MsK9+yrPyysrKuqdSXnP9nCAqGp86Gzx8Dzc6JiIjDspOiePCq0cy/ZxrXTOjDnPUlXPDQYqoaWpwuTaRHWd29qMHT4X7Ptu08z/3LgfNs277Fc/964FTbtn/YVf/McePG2fn5+V31x/mOhgp4+3Zz5PvgC2HW3yEqyemqREREADhU18zKPZWcl9cbgD9+sIWJ/ZOZNjhVWwqKT7Isa5Vt2+O+6XVOdLiLgezD7md5HpOTFZUEV78M5/4Rtv8HHjsD9i53uioREREAkmPCO8J2VUML764r4cbnVnL+g4t4e02xthQUv+VE4F4JDLQsq59lWWHAVcC7DtThnywLJn0fbv4PBIfAs+fDogfArR9iIiLiPRKiwljw0+ncf8VIXG6bO19Zy9T75rOxuNrp0kS6XHdvC/gSsAwYbFlWkWVZN9u23Qb8EPgY2AK8atv2pu6sIyBljoFbF8LQWfDZb+Hfl0FdqdNViYiIdAgLCeLysVl8fOcUnrphHLlpMfRJjgJgc0kNlfWa9Rb/0O0z3E4I2BnuL2PbsPp5+PDnEBEPlz4B/ac5XZWIiMhXsm2b8x9cROGhBq6akM0tZ/QnMyHS6bJEvsCbZ7ilJ1kWjP0uzJ4LEQnwwsUw9//A1eZ0ZSIiIl/KsiwevGo05w/vzT+XFTL13nnc9cpadpTWOl2ayAlR4A4UvYbB9+bB6Gth4X3w/Eyo1lpVERHxToN7x/KXK0ex4GfTuWFSXz7adIB1+8x8t8vtf5/Oi3/zq5ESy7JmAjNzc3Nnb9++3elyvNf6V+G9n0BwGFz8KAw+75u/RkRExEGV9S3ERIQQGhzEEwt38vGmg9w2dQBnDkkjKEhbCoozAnKkxLbtObZtfy8+Pt7pUrzbiCvhewsgPhNe+jZ8/Eto08IUERHxXonRYYQGm9iSEhPOwZomZr+Qz7l/W8jrq4pobnM5XKHIV/OrwC3HISUXbv4UJnwPlv0dnjkXKnY7XZWIiMg3unRMFvPvmcaDV40iOMjintfWcder65wuS+Qr+dVISTvtUnKcNr8L7/7Q7Ggy80HIu9TpikRERI6Jbdss2l5OTEQIY/okUlzVyM9eX8dV4/twzrBehIcEO12i+LFjHSkJ6YlixMsN/Rakj4Q3bobXb4TdC+G8P0KotmASERHvZlkWUwaldtwvqmig8FADd7y0hqToMK4Ym8VVE/rQLyXawSol0KnDLZ1crTD3d7DkQUgbBlc8B6mDnK5KRETkuLjdNot2lPPS8r18suUgQRas/OVZJESFOV2a+Jlj7XArcMsXbf8U3roVWhvgwgdg1DVOVyQiInJCSmuayC+s5ILh6QD84MXVZCZEcrW63tIFAnKXEukiA8+C2xZD5lh4+3Z481ZornO6KhERkeOWFhfREbZbXW7cbpunF+9m+v3zuebJz5mzrkQ7nEi3U4dbvprbBQvvhwV/gqT+cPmzkD7C6apEREROSmlNE6+tKuKlFXspqmzkf781jO9M7ott21iW9vSWY6eREgXurrNnMbxxCzRUwLm/h/G3mCPjRUREfFj7rPfIrHgSosJ4LX8fb64u5ppTtcOJHJuAHCmxLGumZVlPVFdXO1NA/rMmlPqbvqebEZN+U+CDe+DVG6CxyumqRERETkpQkMXUQakdiylDgi32VZodTib9cS5/+GALu8o0UiknTx3urlKxCx6ZCAnZcO1rZgTD37jd5pCcz/4X4jLMiEnWN76pExER8RlH73AyND2OOXec3vGcjpGXw2mkxImRksJl8PLVYAXB1S9D9oSer6EnFOWb/bprSuDM38CkH0KQX31YIiIiQmlNE6W1zeRlxlPd2MoFDy7iwhHp2uFEOgTkSInjciaZ49LD4+D5mbDpbacr6h5Z4+DWRTD4AvjkV/DilVBf7nRVIiIiXSotLoK8zHgAaptaGZ4Z37HDydVPaIcTOXYK3F0tJRdu+RTSR4G7zelquk9kAlz5AlxwvzmZ8rHTYec8p6sSERHpFlmJUTx2/ViW/WIGPz13cMes995DDQA0tSp4y1fTSEl3cbsgyLO6ed9KyBgNwSHO1tRd9q83IyaHdpi9uyfcCsMuhpBwpysTERHpFm63zZp9VYzNSQTMgTo7DtYxc2Q6M0dmkJOskZNAoBlupwN3u8o98Pfx0H8aXP4MhMc6XFA3aamHtS/CiiegfBtEp8LY78K4m8wCSxERET/28oq9vL6qiPzCSgBGZsVzw6S+XDY2y+HKpDspcHtL4AazXeD7d0OvoXDNq/4dQG0bds2D5U/Ato9Ml/+Umabr3Wei9u8WERG/VlzVyHvrSpizvoQpA1P52XlDaHW5eS2/iHOH9SI5Rp/++hMFbm8K3ADbP4XXvgMR8SZ0985zuqLuV7EbVj4Fa/4JTdXQe7gJ3sMvh9BIp6sTERHpVm0uNyHBQSzdUc41Ty0nOMji9NwUZo7M4JxhvYiLCHW6RDlJCtzeFrgBDmyAf18JY66H6f/tdDU9p6Ue1r9qxk1KN0NkIoy5wZxYmdDH6epERES6lW3bbD1Qy7vrSpizroSiykbCQoJ4/47TGdjLT0dNA4QCtzcGbjDb50UmmX2rGyogKsnpinqObUPhElj+OGx9H7DN1oITvmdOsdS4iYiI+DnbNostP9tykLvPHkxQkMW9H22lqLKRb43MYMqgVMJCtImcrwjIwG1Z1kxgZm5u7uzt27c7Xc7XqymBx6fC6Gthxq8D7+CY6iJY+TSsfh4aDkHqKTBhNoz4NoTHOF2diIhIj7n/4wL+tbyQqoZW4iJCOD8vnUvHZHJq/2SnS5NvEJCBu51Xd7jbudrgg3tg1bMw7FK4+FEIjXC6qp7X2gQb34AVj8P+dRAeb96EjL8Fkgc4XZ2IiEiPaHW5Wby9nDnrSvh40wEuGJ7OfVeMxLZt1hdVMzwzXsfKeyEFbm8P3GBGLJY8CJ/+BrInwlUvQnSAvpu1bShaacZNNr9t9jEfeLZZZDlgRuB9AiAiIgGrqdVFbVMbqbHhbCqp5sKHFpOZEMlFI9OZOSKDYRlxWBrD9AoK3L4QuNttfBPeug1GXwcX/cXpapxXe8BspbjqWag7CEkDzJz3qGsgIs7p6kRERHpMfXMb/9l8gDnr9rNwWxltbpv+KdE8dv1YBmnBpeMUuH0pcAMUr4KUwWZ+2e1WRxegrQU2v2PGTYpWQlgMjLzazHqnDna6OhERkR5VWd/CR5sO8PGmA/zj2jFEhYXwxqoiSmubOS+vN/1SdLplT1Pg9rXA3a65Dv51KZx6G+Rd6nQ13qN4Nax4Eja+Dq4Wc3LnhFth0LnmcB0REZEAdPer63hjdREA/VKimT44jXOG9WKiFlz2CAVuXw3cDRXw8jWwdxmc9T9w2p3aLu9w9eWw6jnIfwZqiiEhxyywHH1dYG2xKCIi4lFU2cDcraV8tqWUZbsOcdqAZJ69cQIAH208wJicBNJiA3Bjhh6gwO2rgRvMzh3vfN/s3jH2u3DBAxAc4nRV3sXVBlvfM4fpFC6BkEgYfhnkXQ59z9C/LxERCUgNLW1U1LeQlRhFaU0TE/7wGQDDM+OZPiSNGUPSGKEdT7qMArcvB24wc9zz/g8WPQDjZ8OF9ztdkfc6sNEE741vQEsdRCXDKd+CYZdA39M1ciIiIgHJtm227K9lXkEpc7eWsnpvJbYNf75sON8e34f65jbctk2sjpg/YQrcvh642619EXImQ2Jfpyvxfq2NsONT2PQWFHwErfUQnWrCd96l0GeSwreIiASsivoWFmwr5bTcFNJiI3hpxV5+9fZGJvRLYsaQNKYPSaN/SrS2HDwOCtz+Erjbud3w8X/BqGshfYTT1Xi/lgbY8YkJ39s+htYGiOkFQ2eZznf2RO0EIyIiAa3gQC1vrSlm7taDbDtYB0Df5Cg++PEZRIVpNPNYKHD7W+CuKYGnzoKmarjiOXMojByblnoTuje9Bdv/A21NEJveGb6zJih8i4hIQCuqbGDe1lJ2lNbxv7PyAPjxy2tobHFx5ilpTB+cRlqcFl4eTYHb3wI3mND94pVwcDNccB+Mv9npinxPcx1s+8gTvj8BVzPEZsCwi2HYpZA1TrvCiIiIAL9/fzPvr99PSXUTAHmZcVx7ag5XT+jjcGXeIyADt2VZM4GZubm5s7dv3+50Od2juRZev8l0aqf9N0z7udMV+a6mms7wveNTs793fLan830pZI5R+BYRkYBm2zYFB2v5bEsp87aWcvrAFO48axBNrS5+/c5Gpg5K44xBKcQF6MLLgAzc7fy2w93O1Qb/+SWcMtPswiEnr6kaCj70hO/PwN0K8X08ne9LIGO0wreIiAQ827axLItNJdVc8+RyqhtbCQmyGJWdwKQByVw5LpvspCiny+wxCtz+HLiPtu5lyD0LolOcrsQ/NFZBwQew8U3YNQ/cbeaAnWGXmEv6SIVvEREJeG0uN2v2VTFvaylLdx5iQ3E1b31/MiOyEvh81yGW7ChnYv9kxuYkEhHqn7uEKXAHSuCu2Q8PjYbY3nDt65CS63RF/qWhAra+bzrfu+aD7YKk/p3hu1eewreIiAhQ19xGZGgwwUEWjy/YyZ8/2orbhrDgIEZlJzCxfxI/mJFLeIj/hG8F7kAJ3AD7VsJLV5n57mGXwLibIHuCgmBXqz9kTrfc9BbsXmjCd3JuZ/hOG6p/5yIiIh61Ta3kF1by+a5DfL6rgrKaJpb8YgaWZfGP+TtoanExcUAyY/r4bgdcgTuQAjdA1V5Y+rAZL7GC4O6tEBoJtq0Q2B3qy2HLHNj0JuxZDLbbjJ30GgYpgyB1sLlOGQgR8U5XKyIi4riWNjdhIWYb3lv/mc8nmw92dsD7JDBzRDrXT+rrbJHHSYE70AJ3u+Y6KN0C2ePNYTlPTje7bYy7GXrnOV2df6orhS3vwq4FUL4NDu00iy7bxfSG1EGQ4gnhqYPMdWy63gyJiEjAqm1qJX9PJct2HeLzXYcYmZXA7y7Ow+22ueWFfIZnxjOxfzKj+yR4bQdcgTtQA/fhmmvhg5+ZLmxbkzngZfzNMPRiCNXm9d3G1QqVhVBeYAJ42TZzXb4Nmms6XxceZzrgKYM6L6mDIbEfBOuELxERCSztO6CU1TZz8/Mr2VhcbTrgIUGMzk7ghzNyOWNgqtNlHkGBW4G7U0MFrHsJ8p+BQzvgyhfMXtMaN+lZtg21BzrDd/k2KPOE8tr9na8LCjULM9s74SmDze3kgRAe41z9IiIiPai6sZX8PRUdM+B3nzOIaYPTWFVYwb0fFTCxfzKTBiQzKtu5DrgCtwL3F9m2WeyXMxmCQ2HBvVC41HS9B52vrqqTmmqgfLsniBeY22UFULHLLM5sF5dluuIdM+Kernh0qt48iYhIQFi0vYw/f7SVTSU12DakxISz8pdnYjnwe/BYA7cSViCxLOg/tfN+ZKIJeK9cZ+aJx3wHxn4H4jKcqzFQRcRB1lhzOVxbC1Tu7uyEt1/W/Ata6g77+nizY0pML7Mfe3SaCeExqea6/X5kIgQF9ez3JiIi0oXOGJjKGQNTqW5sZeXuCsrrmh0J28dDHe5A52qD7R+bcZMdn8GQC+Gqf5vnNHLivWwbaoqPnBGv2GV2T6kvNdeHd8bbBYVAVMpRYdxziUk78n50KoSE9fz3JiIi4iPU4ZZjExxiQvaQC6FiN7hazOMVu+DfV5iu9+jrICrJ2TrlSJYF8VnmMmDGF593u6Gx0hO+y8xOKh1hvAzqysz1oR3mdlvjl/9zIuK/olue8sWAHh6rN2giIiJfQoFbOiX167zdVGM6oZ/8Cub+nw7U8TVBQRCdbC6c8vWvtW1oqe/sjNd5Qnn7pT2sl26B+oUmyH+Z0GhIO8XsRd57uLlOGwqRCV3+7YmIiPgSjZTI1zu4yYybrHsFXM1wd4HpdmvcJHC1tUBD+ZGd8vpSqC6G0s1wYAM0VXW+Pr6PCd/tl97DzS4sQd65p6qIiMix0i4lCtxdq7kWildB/2nm/r8uMycrjr/ZhCiRdrYNNSXmzdrBjZ7LJrPzSvtceUiEpxue57l4wrhGl0RExIcEZOC2LGsmMDM3N3f29u3bnS7Hf7W1wJwfdx6ok32qOcly6CwdqCNfrbXJbHl4YOORYbzhUOdr4jIP64Z7wnhyrrasFBERrxSQgbudOtw9pKEC1r5oRk4qdsIlT8DIb5vRgv1rIWu8WVgn8lVsG+oOdnbB28N4eQG428xrgsPNXuPtc+HtYTw6xdnaRUQk4ClwK3D3HLcbdi8wIwKxvU0Af+8n5rmEPiZ4Z42HkVdrAZ0cm7YWs9Xh4SMpBzaaWfF2Mb2PDOBpQzx7jSfpkxYREekRCtwK3M5pbYT966FopeeSDzVF8PM95uCVtS/B/nWQNc4E8YQ+WoApx6au1DOOcthISllB53aW7UKjzTx4ZKK5jko2QTwqqfP68NuRSdrWUEREjpv24RbnhEZCn1PNpV1dqQk/AGVbYdVzsPxRcz86DfqdAZc/Y+67XdrBQr5cTJq5DJje+Zir1ewnXlZg5sEbK6Ch8rDbFVC111wfvnvK0YJCjwrhXxfWPY9FJujvqoiIfCN1uMUZrlazhVx7B9ztgsueNM89fY7ZF7q9A541HpIH6khyOXmuNhO6Gyo6w/gR14c8tyuPfM7d+hV/oGUOB2oP6CEREBwKwWGeS6iZQT/6sZDww54/mdce9bj+HxER6VEaKVHg9l0L74fCpVCcD03V5rG8y+Hyp83tXfOh9whtISc9w7bNtphHBPPKzvvtnfTGKjPa0nFpNddtzZ23O66bwXZ3fa0R8Sb4Rx7Wof/K+wnmfni8grqIyAnSSIn4rin3mGu324wKFK2E2F7msfpyeGGWuZ00wNMBHwcDz4bEvo6UK37OsiAizly68u+Y23VkOG9rPiqUH31pPSq8t7/W83WtTeYNascbg0qze1BjZecb1y/9/oIgIuFrwnnil4d3zbyLiBwzBW7xXkFBkDrIXNqFx8F33+8cRdk5F9a/DN/6uwlDBzfDZ7+F5AHmNMPkXHM7NkNdPPEuQcEQFGnWPHQ3V1tnGO8Yl6n88vt1B6B0i7nfUvs19Yd8MaBHJZvtGqNTISrFczul83ZIePd/ryIiXkiBW3xLSBj0Pd1cwHzcX73PdNvAhIqqvbBrnjmUp931b5uFdkWrYOsc0x1vD+PRqerUiX8LDoHoZHM5Hm0th828Vx7ZPT86rFftheLV0FDeuYf60cLjPKE89YthPDr1i4E9JOzkv3fpXi0NpgFSshris6HPJIjPdLoqEa+jwC2+zbLMtoLtcibB95eacZTaEji004yl9B5hnj+4AZb+/chFcOFxcPsS8+cU5UPFLk8g79+5s4pIIAoJ69wZ5ljZtgnp9YegvswE8HrPpeN2mSegrzIz8F8X0DtCeap5w9BxO+XI8K6A3jMaK2HvcihcAnuXQcmaL/73i8+GPhPNJXuiOaNBu/lIgNOiSQk8rjao3usJ455Afu4fzC/rD34GKx7vfG1UsumEf/d9sxvEwU1m9japP4THOPc9iPiLjoB+eCgvM4G94/ZRgd12ffmfFdPLvHE++hLfBxKye2Z8x9/UHjCL2AuXmoB9cBNgm200M8eaJkefyWYtTVUh7P2881J3wPwZ4fGQPb4zgGeOhbAoR78tka6iXUoUuOVEtDZB5R6z2OzQTnPdcAi+/S/z/MvXwtb3zO3YdNMJzxgF5/7ePFa1z+z+0D7iIiJdy+32jLkc6uyWN5RDXZkZL6vaay7VRV/czjE67agwng0JOZ5Qnq0QaNtQuRsKl3kC9lLziR+Yw6Syx0POaZAz2YTmr3sDY9uHBfBlpitetsU8FxQC6SPN+El7CI9J7f7vT6QbKHArcEt3KNsGpZs6u+MVO83H3te9bp5/Ypr5iDUyCRJzzC/znMlw6q3m+epiLR4T6Qlul+nOdgTwvZ23q/aaN8dfCOSpneG7I5TndIbzsGhnvpfu4nabEHx4B7t2v3kuMtEE4pzJpoOdPsJ8yncyGirMvHd7AC9eZXbZAdO8OHwMJWWg1tZ4u+Y6qCn2XPZDbG+zc1hEnNOV9SgFbgVucULBR+YkzapC0ymvLISM0Z17iN+Xa7pycRnmF3liDuSeBcMvN8/XlJiPxTXvKNK93G4z8tAevqsKjwzk1fvMdouHi0rxdMWPCuPx2eZxb/9ky9UK+9cdGbDbT1+NzTDjITmTTRc7ZXD37+zU1mzqaQ/ge5eZxbhgxvmyJ3pOLZ5kOuJqVPScpprDwnSJ51Jsmkbt95u/bLtRC3oNg+wJkH2quU7s59dvnhS4FbjF29g2rH+lM4hX7jG/5IdeDOf9wYyz/L63+bi1/aPuxBwYOgsGzDAdu8ZK84vIj394iXgFtxvqDh42plL4xQ55e3e2XXC4Z6vEhM79yyMOu93xeAJEHHY/Ir573mS3NJgDxAqXmUWORSuhtcE8lzTAE7BPM4E2sa/zP1ds26yp2buscw68Yqd5LjjcjLG0B/DsCVrUfiJs2+zm1R6gOwL1UWH6C1uCWmbxdFwGxGV6Lhmd17G9zf8X+1bAvuXm71pzjfnS6NTO8J19KqSPgtCIHv/Wu4sCtwK3+ArbNr/oWhth3UsmjFcVdl6fdiec9iOo2A0PjYKwmM4wnpADI66EzDGmW9RSb34JOf2LU8Tfud1QX9rZHa/e17lFYlOVOXm0sbLzurX+6/+89lNCIxKOMbR7Hg+N7Pz/vbHKhJ32DnbJGs/YjAW98jo72H0mdx4m5u3qSs331B7A96/t3BUl9ZTDxlAmQFiseRPUfojU4ddtzV/9nKvZbIF5xHX7801f89zh163mTVNIhAmTIZ5LaKTpzId4rr/0fsQXv+5Y/pygkCN/1tu2+bv2hTB9VKD+wt9FywTmuAzPJeuw25lmm8eY3se3C5DbbT7t3be8M4S3v3kKCjVrnw4P4bG9T+qviZMUuBW4xV+0B/L6Q7Dh1SO745WFMOvvkHcp7F4Ez18EwWHmh2Os5zLlHvNxbO0Bs8NAbLp5XMFcpOe072l+eAjvCOdf9Zjn8a/alQU6u+qhkebnArYJYhljOjvY2aeaoO4PWhrMnt/tYyj7VnzFaMMJCAr1BNww8+/1C9fh5ufrEdftz4eZTyHbGk0Ab230hPX2281HPee5/1VbYh4LK8gE8NAIU0djpfkzj37mM5zYAAAUW0lEQVRNbPo3hOleJz+ffyzqyzvD974V5r9j+3kZCX08AdwTwtOGmfMDfEBABm7LsmYCM3Nzc2dv377d6XJEup9tg+02nZWqfWYHldr9UHvQc30ALv6H2bJr3Svw1vc6vzY43HS5rn4Feg01hwLtWXhYWE83z0ckKJiLOMW2obn2iyH86HDeXAupQzw7iIwLnB1X3C5zMmpxvukyHxGCjzUsex5z4jRiV5snmHsurU1fcv/oEP8Vr41I+OKoR0wv7w2ubS1wYIMngHsu7Yt2Q6Mha2xnCM8a57UjRAEZuNupwy3yJRoqoKygM4jXHTDXZ//OBOslD8Env/ri1/1kE8RnwbqXoeDDziDe3inPOd17f6CLiIhvaD85uqMLvhwObOz8hCd1yGFd8FPNSdFe0AxS4FbgFjl+LfUmhNceMMG87iBM+J75uPHzR2Hl0+a59gU1VhD8qtx02D/6b9j2YWcQj003QX3i7ea19eXmY29/21pNRES6R3OdGT3pmAVf0bmzTmTSkXPgGaMd+WRHgVuBW6T7NNeZMF5fZhYsAaz5F+yc2xnWa/ZDVBLctfn/t3fv8XHVZR7HP0+uTZvQpi2UllKKpQIWKJQqIuKrcrPFagWVFlkUEN2CKPqSteruoourL9FFXVxFAYugBREQhFVuCioqF6ELlIJcDUrpDdKmDU2TJnn2j+dM50wyk6akk8nl+3695jVnznNm8ptfz3Se85vn/E7El50Cz94Z85Zn6ssnHBwztEDMpGBl2ZimABMRkbTOTnj12VQZykPwyjMRO+gD2Sl4+5ESbiXcIqXlHlOQZUa0n7kT1j2ZTcg3r4lppjJX8bzs7bB2Rfb5I8fBfsfDyT+Mxw/+MEbSt4+gT4rpplTOIiIyfGUuqFRTH6Pd/ay3Cbe+qUSkOMxyy0fe+K64FfL+K+Ny3JtXZ29jpmTjf/hmjKinzTgZPnhVLN9wRpz4NGJ0jKKPGB3z9k49KpL/Vctj3YgkphF0EZHBb+TYnr9bBggl3CIyMOxxQNwK+ezTkXBnylXSCXlHO7z6fFzQYWtTXHDBO+HI8yLhbnsNrjwm9/XKq2HOEjj6szFCcuNZSUKeSsqnHRtznLdtifl/08l8VW1pZjUQEZFBRwm3iAwOZeXZ+u5Jh+XGyitg8X3Zx+7Q1pyKV8GHfh6XK966MZuUTzw04u2tMa3aplXZpD0zzdZes6DxBbhqXpcGGZz0A5i5KOY3v31JTMG128Qod9ltYsyBXLtHUbpDREQGDyXcIjL0mEF1XfZxRVXPPznuNhE+9tvcde2py3bX7wOn3xJJeiYh37oJ9nhTxDvb47bqYXhqdfaS36fdBNOPi+kUf3lebjJeNwlmnR5z5bZujjmEdTEiEZEhSQm3iEg+6Rrv6jqY9s7C206cCWfdEcvpyytnSl5qJ8AB707KYV6OxHzLq3DgeyLhfuxn8OsL4ip3dROTK8NNhLkXQ+3uUS7TvC5J1Ceq/lxEZJBRwi0isiuZxUk8I8dm1+01K25p7a1xCW6AKUfCu74WyXhmFpdVy2NkHmD5NfCn72SfO3JcjJCf/Zu4rPPz98ZlvavrYq7zihFRYz7liNh+S2McCGRiqj0XEelXSrhFREohPUq950FxK+TNZ8PUo2Hzy8kJoy/HCHnliIg/ugxW3JD7nJp6WNIQy7edD0/dmvrbNXGVtnP+FI9/dQGsfiwS8sqRcT92Xzj2wogv/0ny92qy29TtCVPfHvH1yTy41XVxwmnlSJXGiIikKOEWERnoxuwdt0IWfB+O+4+Y93zbFtjWAp0d2fjhZ0TCnolt2xIj4BnVdXGFtm0t0NIY962bs/G/XBmztKRNeRucdXss/+xDcTGKDCuPEpqFP4nHN5wB27ZGMl69W9xPOixKaiAumFRRkxuvqtNIvIgMGUq4RUQGu4oqGL1X4fh+x8atkOO+1PPrf+zemLWlfWs2abdUMjzv4ihbad2UnFi6KUbIM7a1xAww61LxQxZmE+5rF0JHW+7fnH0WzP92HDhcdlR29Lx6t1jef17c2lvhiV9AdW0k+hCj67vvD2PfEFNCNvwxeVFLxQ+Ig5itm+Clh1J/ONlmwowYxW/ZEKP/3eIHwahx8b7XrkxO1N0tflmoGRMHNBrlF5GEEm4REelZWVmMgFeNBMZ2j/eUzAN86Prcx+4xq0tm+cw7oDWZ+SWTkE9IZoDpaIPx+8W6LY1Rq751U8wcs/+8KHW5ZXH3v3n8V+CoT0VN/LWndI/P/3Yk9Y0vwE/f3z1+8hVwyCmRTF+zoHt80XVwwIlxaenrFnaPf/iX8IY58Nxv4HcXRxJeUx+3EWNg9pmR0G96OS74lFlfMwbKK3voTBEZjHRpdxERGbw62qHp79DaHBc7IvlOq5sEdROilGXdyu2rty+MmRJzpLc2w7onk1Dq+3DcNBg1Hlo25o/vfkDuCLd3RBlOy4Z4zsEfiBlonr83Tnht2ZiNtTbBJx6KUfgHLoM7Pp/7nqrq4Nz7YwT+iZtg5S3ZkfNM0j7z1DgPoHldjPLXjIHKUSrDEelnurS7iIgMfeUVUTpSSOUI2OvwwvHqWtj7LYXjNWNgn7cVjo8cC/seXTg+7Z3dp5TsaM+W5Bz4Hhg3PZLxrZmkfEMk1RDLrzyTXZ8pvTlkUdzfdwk8+IPsa5dXxUmrSxqipOWer8Kzd2VPeK2ogZH1sOB7sf1j10f9fcWI7Cw2I8fBjPdFfM2KuNJq5Yh43YoRUDUqdxYeEdkhJdwiIiL9qTz11Tt6ctwKefPZcYMYYd/WEol3ZoaaQxZGPXnLhuwJsd6RrR+vqY+R/MyJsM3r4bX12dd/+lfw5K2kfgKA+qnZhPvOL8Lf/pDbpj1mwLl/juWl82DtE8l0k0kN/eTZsPCnsfzj+VEGhCXl7xYHKJmEf+ncGKU3Y3t9/PQTYO7XYvmKY6MOPxM3i4OUOcmvApfPiV82rCwOCKpqIz7r9Diwue+SqPmvrk1ODq6LEqX6qdDZGb82VNXl/puIFIH2MBERkcHALFVLn8g3x3vakefGrZBTrolEvqMtSdhbImHPOOGr8Nq6KM1p3xrx6tQMN29aEBd+am+JxNc9ynEyJs+G0XsDnpTkOIzfPxufMCNKb7aX63juAci4aclBRBJ3jxH4jNo9k7/bGW1rXhMHHwBtm+F3X+v+nud8EeYsiW2/dWCsq6jJJubv+BwcemrU1999YZKo12ZP2J12DOz+xjiAWffXeE5VbTKvvsdBTmVNtOe19bnvzT1q9ytr4vmb12T7JfP+6veJ+JbGaMP2viPbf5Uj4kCl6R/JUz3bD5MOi3KjDQ1xjoJ3xsFFJj79+DhPYM0KWP90dn1nR9wf9k+xrzX8EdY+mcSTmJVn96enbotyqszzyirixOYjPxHxF34X05hWVEF5dbSpug6mvDXiGxqgvS03XjEid/8eQlTDLSIiIkNTR3sk3q2bo16/dXMkvPX7wNYm+L9l0NaczLCTxGeeCtOPi2T02oWxrq05DjgATvohzFwEL94PV83t/jcXLoMD58Mzd8G1H+wez5xQ+8RNcONZ3eNn3wOTD4dHrobbPtU9fu6DsMcBcP/34c4vdI9/ZmUctPz+G3DvV7vHl7wYpVJ3/Tv8+dLu8Qsb49eK//0MPLw0N1ZRA/+2JpZ/8XF4PDkh2soi6a6dABck8/JfuwieuT33+fX7wvnJFKM/ng8N9+XG9zwYFiezCi2dFwl9JiEvr4zyr5Mvj/jNi+OApaI63u+7L+n+XvpBb2u4lXCLiIiI7EjHtki+M6OwWxph1SPZZN07Y7tpx0RC3/RSnDSbLpcxi3jdnrDx7/D3B7PlP5n7N7wzauQ3NCRTUlrua+z7jhhJbvxbHBRsL7cpi5NmpxwZI+Qb/xHTcVpZ7m3CQVFC07wuTuLNPM/KYgR79OR4za2b4pePzPPKyuO+alS0o7Mztsu0u7MTOrdlL+r12ivRN+1t0NEa92Vl2XMq/nZfJMwdqXhNPcxMZv25//vRR5lYR1v84pEpJ7rxo7DxxThpuG4inPbzXf5P3htKuJVwi4iIiEgR9Tbh1vxBIiIiIiJFpIRbRERERKSIhlTCbWbvMbPLm5qaSt0UERERERFgiCXc7n6bu3989OjRpW6KiIiIiAgwxBJuEREREZGBRgm3iIiIiEgRKeEWERERESkiJdwiIiIiIkWkhFtEREREpIiUcIuIiIiIFJESbhERERGRIlLCLSIiIiJSREq4RURERESKSAm3iIiIiEgRKeEWERERESkiJdwiIiIiIkWkhFtEREREpIiUcIuIiIiIFJESbhERERGRIlLCLSIiIiJSREq4RURERESKSAm3iIiIiEgRKeEWERERESkic/dSt2GXM7P1wIsl+NPjgVdK8HeHCvVf36j/+kb913fqw75R//WN+q9v1H+vzz7uvvuONhqSCXepmNnD7j671O0YrNR/faP+6xv1X9+pD/tG/dc36r++Uf8Vl0pKRERERESKSAm3iIiIiEgRKeHetS4vdQMGOfVf36j/+kb913fqw75R//WN+q9v1H9FpBpuEREREZEi0gi3iIiIiEgRKeF+Hcxsrpk9bWbPmdnn88Srzez6JP6gmU3t/1YOTGa2t5nda2ZPmtlKMzs/zzZzzKzJzB5NbheWoq0DlZk1mNmKpG8ezhM3M7s02f8eN7NZpWjnQGRm+6f2q0fNbJOZfbrLNtr/ujCzpWa2zsyeSK0ba2Z3m9mzyX19ged+JNnmWTP7SP+1euAo0H/fNLO/Jp/Rm81sTIHn9vh5Hw4K9N+XzWxV6nN6YoHn9vh9PRwU6L/rU33XYGaPFnjusN//dhWVlOwkMysHngGOB14C/gKc6u5PprY5FzjE3Reb2SLgJHdfWJIGDzBmNhGY6O7LzawOeAR4X5f+mwNc4O7zS9TMAc3MGoDZ7p53vtTki+eTwInAEcB/u/sR/dfCwSH5LK8CjnD3F1Pr56D9L4eZvQNoBq5x94OSdd8AGt3960kiU+/uS7o8byzwMDAbcOLzfri7b+jXN1BiBfrvBOAed283s4sBuvZfsl0DPXzeh4MC/fdloNnd/6uH5+3w+3o4yNd/XeKXAE3uflGeWAPDfP/bVTTCvfPeAjzn7i+4exvwM2BBl20WAFcnyzcCx5qZ9WMbByx3X+3uy5PlzcBTwF6lbdWQs4D4j9Xd/QFgTHKgI7mOBZ5PJ9uSn7v/AWjssjr9/9zVwPvyPPVdwN3u3pgk2XcDc4vW0AEqX/+5+13u3p48fACY3O8NGyQK7H+90Zvv6yGvp/5LcpNTgOv6tVHDkBLunbcX8I/U45fonjBu3yb5D7UJGNcvrRtEklKbw4AH84SPNLPHzOx2M5vRrw0b+By4y8weMbOP54n3Zh8VWEThLxntfzs2wd1XJ8trgAl5ttG+2DtnAbcXiO3o8z6cnZeU5CwtUNKk/W/HjgbWuvuzBeLa/3YRJdxSEmZWC9wEfNrdN3UJLyculToT+C5wS3+3b4B7u7vPAuYBn0h+LpSdYGZVwHuBG/KEtf/tJI/aRNUnvg5m9q9AO7CswCb6vOd3GTANOBRYDVxS2uYMWqfS8+i29r9dRAn3zlsF7J16PDlZl3cbM6sARgOv9kvrBgEzqySS7WXu/ouucXff5O7NyfKvgUozG9/PzRyw3H1Vcr8OuJn42TStN/vocDcPWO7ua7sGtP/12tpMqVJyvy7PNtoXe2BmZwDzgdO8wAlVvfi8D0vuvtbdO9y9E7iC/P2i/a8HSX5yMnB9oW20/+06Srh33l+A6Wa2bzJKtgi4tcs2twKZs/E/QJwYo9EftteL/Qh4yt2/VWCbPTM172b2FmI/1QELYGajkpNNMbNRwAnAE102uxX4sIW3EifDrEbSCo7qaP/rtfT/cx8BfplnmzuBE8ysPvnJ/4Rk3bBnZnOBzwHvdfctBbbpzed9WOpyXspJ5O+X3nxfD2fHAX9195fyBbX/7VoVpW7AYJOcUX4e8aVRDix195VmdhHwsLvfSiSUPzGz54gTFRaVrsUDzlHA6cCK1DREXwSmALj7D4iDlHPMrB1oARbpgGW7CcDNST5YAVzr7neY2WLY3n+/JmYoeQ7YApxZorYOSMkXx/HAP6fWpftP+18XZnYdMAcYb2YvAV8Cvg783Mw+CrxInHiFmc0GFrv72e7eaGZfIRIfgIvc/fWc/DaoFei/LwDVwN3J5/mBZGarScCV7n4iBT7vJXgLJVWg/+aY2aFEKVMDyec53X+Fvq9L8BZKKl//ufuPyHMei/a/4tG0gCIiIiIiRaSSEhERERGRIlLCLSIiIiJSREq4RURERESKSAm3iIiIiEgRKeEWERERESkiJdwiIkOImXWY2aOp2+d34WtPNTPNwysispM0D7eIyNDS4u6HlroRIiKSpRFuEZFhwMwazOwbZrbCzB4ys/2S9VPN7B4ze9zMfmtmU5L1E8zsZjN7LLm9LXmpcjO7wsxWmtldZlZTsjclIjJIKOEWERlaarqUlCxMxZrc/WDgf4DvJOu+C1zt7ocAy4BLk/WXAr9395nALCBzhb7pwPfcfQawEXh/kd+PiMigpytNiogMIWbW7O61edY3AMe4+wtmVgmscfdxZvYKMNHdtyXrV7v7eDNbD0x299bUa0wF7nb36cnjJUClu/9n8d+ZiMjgpRFuEZHhwwss74zW1HIHOhdIRGSHlHCLiAwfC1P39yfLfwYWJcunAfcly78FzgEws3IzG91fjRQRGWo0MiEiMrTUmNmjqcd3uHtmasB6M3ucGKU+NVn3SeAqM/sXYD1wZrL+fOByM/soMZJ9DrC66K0XERmCVMMtIjIMJDXcs939lVK3RURkuFFJiYiIiIhIEWmEW0RERESkiDTCLSIiIiJSREq4RURERESKSAm3iIiIiEgRKeEWERERESkiJdwiIiIiIkWkhFtEREREpIj+H3cf4zoou3byAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_loss(zero_bias_history, \"Zero Bias\", 0)\n", - "plot_loss(careful_bias_history, \"Careful Bias\", 1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "fKMioV0ddG3R" - }, - "source": [ - "The above figure makes it clear: In terms of validation loss, on this problem, this careful initialization gives a clear advantage. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "RsA_7SEntRaV" - }, - "source": [ - "### Train the model" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "yZKAc8NCDnoR" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train on 182276 samples, validate on 45569 samples\n", - "Epoch 1/100\n", - "182276/182276 [==============================] - 3s 16us/sample - loss: 0.0256 - tp: 64.0000 - fp: 745.0000 - tn: 181227.0000 - fn: 240.0000 - accuracy: 0.9946 - precision: 0.0791 - recall: 0.2105 - auc: 0.8031 - val_loss: 0.0079 - val_tp: 17.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 66.0000 - val_accuracy: 0.9984 - val_precision: 0.7083 - val_recall: 0.2048 - val_auc: 0.9377\n", - "Epoch 2/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0100 - tp: 111.0000 - fp: 131.0000 - tn: 181841.0000 - fn: 193.0000 - accuracy: 0.9982 - precision: 0.4587 - recall: 0.3651 - auc: 0.8758 - val_loss: 0.0056 - val_tp: 40.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 43.0000 - val_accuracy: 0.9989 - val_precision: 0.8511 - val_recall: 0.4819 - val_auc: 0.9422\n", - "Epoch 3/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0075 - tp: 148.0000 - fp: 57.0000 - tn: 181915.0000 - fn: 156.0000 - accuracy: 0.9988 - precision: 0.7220 - recall: 0.4868 - auc: 0.9206 - val_loss: 0.0048 - val_tp: 52.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 31.0000 - val_accuracy: 0.9992 - val_precision: 0.8814 - val_recall: 0.6265 - val_auc: 0.9382\n", - "Epoch 4/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0065 - tp: 157.0000 - fp: 48.0000 - tn: 181924.0000 - fn: 147.0000 - accuracy: 0.9989 - precision: 0.7659 - recall: 0.5164 - auc: 0.9210 - val_loss: 0.0045 - val_tp: 52.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 31.0000 - val_accuracy: 0.9992 - val_precision: 0.8814 - val_recall: 0.6265 - val_auc: 0.9387\n", - "Epoch 5/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0058 - tp: 172.0000 - fp: 43.0000 - tn: 181929.0000 - fn: 132.0000 - accuracy: 0.9990 - precision: 0.8000 - recall: 0.5658 - auc: 0.9246 - val_loss: 0.0042 - val_tp: 51.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 32.0000 - val_accuracy: 0.9991 - val_precision: 0.8793 - val_recall: 0.6145 - val_auc: 0.9390\n", - "Epoch 6/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0054 - tp: 169.0000 - fp: 28.0000 - tn: 181944.0000 - fn: 135.0000 - accuracy: 0.9991 - precision: 0.8579 - recall: 0.5559 - auc: 0.9210 - val_loss: 0.0039 - val_tp: 56.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 27.0000 - val_accuracy: 0.9993 - val_precision: 0.8889 - val_recall: 0.6747 - val_auc: 0.9391\n", - "Epoch 7/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0054 - tp: 167.0000 - fp: 33.0000 - tn: 181939.0000 - fn: 137.0000 - accuracy: 0.9991 - precision: 0.8350 - recall: 0.5493 - auc: 0.9224 - val_loss: 0.0038 - val_tp: 60.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 23.0000 - val_accuracy: 0.9993 - val_precision: 0.8955 - val_recall: 0.7229 - val_auc: 0.9392\n", - "Epoch 8/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0050 - tp: 182.0000 - fp: 28.0000 - tn: 181944.0000 - fn: 122.0000 - accuracy: 0.9992 - precision: 0.8667 - recall: 0.5987 - auc: 0.9215 - val_loss: 0.0038 - val_tp: 62.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 21.0000 - val_accuracy: 0.9994 - val_precision: 0.8986 - val_recall: 0.7470 - val_auc: 0.9332\n", - "Epoch 9/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0047 - tp: 186.0000 - fp: 36.0000 - tn: 181936.0000 - fn: 118.0000 - accuracy: 0.9992 - precision: 0.8378 - recall: 0.6118 - auc: 0.9238 - val_loss: 0.0036 - val_tp: 63.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 20.0000 - val_accuracy: 0.9994 - val_precision: 0.9000 - val_recall: 0.7590 - val_auc: 0.9332\n", - "Epoch 10/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0048 - tp: 176.0000 - fp: 33.0000 - tn: 181939.0000 - fn: 128.0000 - accuracy: 0.9991 - precision: 0.8421 - recall: 0.5789 - auc: 0.9208 - val_loss: 0.0036 - val_tp: 63.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 20.0000 - val_accuracy: 0.9994 - val_precision: 0.9000 - val_recall: 0.7590 - val_auc: 0.9332\n", - "Epoch 11/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0045 - tp: 180.0000 - fp: 32.0000 - tn: 181940.0000 - fn: 124.0000 - accuracy: 0.9991 - precision: 0.8491 - recall: 0.5921 - auc: 0.9341 - val_loss: 0.0035 - val_tp: 64.0000 - val_fp: 7.0000 - val_tn: 45479.0000 - val_fn: 19.0000 - val_accuracy: 0.9994 - val_precision: 0.9014 - val_recall: 0.7711 - val_auc: 0.9331\n", - "Epoch 12/100\n", - "169984/182276 [==========================>...] - ETA: 0s - loss: 0.0045 - tp: 175.0000 - fp: 30.0000 - tn: 169674.0000 - fn: 105.0000 - accuracy: 0.9992 - precision: 0.8537 - recall: 0.6250 - auc: 0.9306Restoring model weights from the end of the best epoch.\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.0045 - tp: 188.0000 - fp: 31.0000 - tn: 181941.0000 - fn: 116.0000 - accuracy: 0.9992 - precision: 0.8584 - recall: 0.6184 - auc: 0.9326 - val_loss: 0.0034 - val_tp: 63.0000 - val_fp: 6.0000 - val_tn: 45480.0000 - val_fn: 20.0000 - val_accuracy: 0.9994 - val_precision: 0.9130 - val_recall: 0.7590 - val_auc: 0.9332\n", - "Epoch 00012: early stopping\n" - ] - } - ], - "source": [ - "model = make_model()\n", - "model.load_weights(initial_weights)\n", - "baseline_history = model.fit(\n", - " train_features,\n", - " train_labels,\n", - " batch_size=BATCH_SIZE,\n", - " epochs=EPOCHS,\n", - " callbacks=[early_stopping],\n", - " validation_data=(val_features, val_labels),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "iSaDBYU9xtP6" - }, - "source": [ - "### Check training history\n", - "In this section, you will produce plots of your model's accuracy and loss on the training and validation set. These are useful to check for overfitting, which you can learn more about in this [tutorial](https://www.tensorflow.org/tutorials/keras/overfit_and_underfit).\n", - "\n", - "Additionally, you can produce these plots for any of the metrics you created above. False negatives are included as an example." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "WTSkhT1jyGu6" - }, - "outputs": [], - "source": [ - "def plot_metrics(history):\n", - " metrics = [\"loss\", \"auc\", \"precision\", \"recall\"]\n", - " for n, metric in enumerate(metrics):\n", - " name = metric.replace(\"_\", \" \").capitalize()\n", - " plt.subplot(2, 2, n + 1)\n", - " plt.plot(\n", - " history.epoch,\n", - " history.history[metric],\n", - " color=colors[0],\n", - " label=\"Train\",\n", - " )\n", - " plt.plot(\n", - " history.epoch,\n", - " history.history[\"val_\" + metric],\n", - " color=colors[0],\n", - " linestyle=\"--\",\n", - " label=\"Val\",\n", - " )\n", - " plt.xlabel(\"Epoch\")\n", - " plt.ylabel(name)\n", - " if metric == \"loss\":\n", - " plt.ylim([0, plt.ylim()[1]])\n", - " elif metric == \"auc\":\n", - " plt.ylim([0.8, 1])\n", - " else:\n", - " plt.ylim([0, 1])\n", - "\n", - " plt.legend()" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "u6LReDsqlZlk" - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_metrics(baseline_history)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "UCa4iWo6WDKR" - }, - "source": [ - "Note: That the validation curve generally performs better than the training curve. This is mainly caused by the fact that the dropout layer is not active when evaluating the model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "aJC1booryouo" - }, - "source": [ - "### Evaluate metrics\n", - "\n", - "You can use a [confusion matrix](https://developers.google.com/machine-learning/glossary/#confusion_matrix) to summarize the actual vs. predicted labels where the X axis is the predicted label and the Y axis is the actual label." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "aNS796IJKrev" - }, - "outputs": [], - "source": [ - "# TODO 1\n", - "train_predictions_baseline = #TODO: Your code goes here.\n", - "test_predictions_baseline = #TODO: Your code goes here." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "MVWBGfADwbWI" - }, - "outputs": [], - "source": [ - "def plot_cm(labels, predictions, p=0.5):\n", - " cm = confusion_matrix(labels, predictions > p)\n", - " plt.figure(figsize=(5, 5))\n", - " sns.heatmap(cm, annot=True, fmt=\"d\")\n", - " plt.title(f\"Confusion matrix @{p:.2f}\")\n", - " plt.ylabel(\"Actual label\")\n", - " plt.xlabel(\"Predicted label\")\n", - "\n", - " print(\"Legitimate Transactions Detected (True Negatives): \", cm[0][0])\n", - " print(\n", - " \"Legitimate Transactions Incorrectly Detected (False Positives): \",\n", - " cm[0][1],\n", - " )\n", - " print(\"Fraudulent Transactions Missed (False Negatives): \", cm[1][0])\n", - " print(\"Fraudulent Transactions Detected (True Positives): \", cm[1][1])\n", - " print(\"Total Fraudulent Transactions: \", np.sum(cm[1]))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "nOTjD5Z5Wp1U" - }, - "source": [ - "Evaluate your model on the test dataset and display the results for the metrics you created above." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "poh_hZngt2_9" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loss : 0.005941324691873794\n", - "tp : 55.0\n", - "fp : 12.0\n", - "tn : 56845.0\n", - "fn : 50.0\n", - "accuracy : 0.99891156\n", - "precision : 0.8208955\n", - "recall : 0.52380955\n", - "auc : 0.9390888\n", - "\n", - "Legitimate Transactions Detected (True Negatives): 56845\n", - "Legitimate Transactions Incorrectly Detected (False Positives): 12\n", - "Fraudulent Transactions Missed (False Negatives): 50\n", - "Fraudulent Transactions Detected (True Positives): 55\n", - "Total Fraudulent Transactions: 105\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "baseline_results = model.evaluate(\n", - " test_features, test_labels, batch_size=BATCH_SIZE, verbose=0\n", - ")\n", - "for name, value in zip(model.metrics_names, baseline_results):\n", - " print(name, \": \", value)\n", - "print()\n", - "\n", - "plot_cm(test_labels, test_predictions_baseline)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "PyZtSr1v6L4t" - }, - "source": [ - "If the model had predicted everything perfectly, this would be a [diagonal matrix](https://en.wikipedia.org/wiki/Diagonal_matrix) where values off the main diagonal, indicating incorrect predictions, would be zero. In this case the matrix shows that you have relatively few false positives, meaning that there were relatively few legitimate transactions that were incorrectly flagged. However, you would likely want to have even fewer false negatives despite the cost of increasing the number of false positives. This trade off may be preferable because false negatives would allow fraudulent transactions to go through, whereas false positives may cause an email to be sent to a customer to ask them to verify their card activity." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "P-QpQsip_F2Q" - }, - "source": [ - "### Plot the ROC\n", - "\n", - "Now plot the [ROC](https://developers.google.com/machine-learning/glossary#ROC). This plot is useful because it shows, at a glance, the range of performance the model can reach just by tuning the output threshold." - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "lhaxsLSvANF9" - }, - "outputs": [], - "source": [ - "def plot_roc(name, labels, predictions, **kwargs):\n", - " fp, tp, _ = sklearn.metrics.roc_curve(labels, predictions)\n", - "\n", - " plt.plot(100 * fp, 100 * tp, label=name, linewidth=2, **kwargs)\n", - " plt.xlabel(\"False positives [%]\")\n", - " plt.ylabel(\"True positives [%]\")\n", - " plt.xlim([-0.5, 20])\n", - " plt.ylim([80, 100.5])\n", - " plt.grid(True)\n", - " ax = plt.gca()\n", - " ax.set_aspect(\"equal\")" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "DfHHspttKJE0" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_roc(\n", - " \"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0]\n", - ")\n", - "plot_roc(\n", - " \"Test Baseline\",\n", - " test_labels,\n", - " test_predictions_baseline,\n", - " color=colors[0],\n", - " linestyle=\"--\",\n", - ")\n", - "plt.legend(loc=\"lower right\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "gpdsFyp64DhY" - }, - "source": [ - "It looks like the precision is relatively high, but the recall and the area under the ROC curve (AUC) aren't as high as you might like. Classifiers often face challenges when trying to maximize both precision and recall, which is especially true when working with imbalanced datasets. It is important to consider the costs of different types of errors in the context of the problem you care about. In this example, a false negative (a fraudulent transaction is missed) may have a financial cost, while a false positive (a transaction is incorrectly flagged as fraudulent) may decrease user happiness." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "cveQoiMyGQCo" - }, - "source": [ - "## Class weights" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ePGp6GUE1WfH" - }, - "source": [ - "### Calculate class weights\n", - "\n", - "The goal is to identify fradulent transactions, but you don't have very many of those positive samples to work with, so you would want to have the classifier heavily weight the few examples that are available. You can do this by passing Keras weights for each class through a parameter. These will cause the model to \"pay more attention\" to examples from an under-represented class." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "qjGWErngGny7" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Weight for class 0: 0.50\n", - "Weight for class 1: 289.44\n" - ] - } - ], - "source": [ - "# Scaling by total/2 helps keep the loss to a similar magnitude.\n", - "# The sum of the weights of all examples stays the same.\n", - "# TODO 1\n", - "weight_for_0 = #TODO: Your code goes here.\n", - "weight_for_1 = #TODO: Your code goes here.\n", - "\n", - "class_weight = #TODO: Your code goes here.\n", - "\n", - "print('Weight for class 0: {:.2f}'.format(weight_for_0))\n", - "print('Weight for class 1: {:.2f}'.format(weight_for_1))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Mk1OOE2ZSHzy" - }, - "source": [ - "### Train a model with class weights\n", - "\n", - "Now try re-training and evaluating the model with class weights to see how that affects the predictions.\n", - "\n", - "Note: Using `class_weights` changes the range of the loss. This may affect the stability of the training depending on the optimizer. Optimizers whose step size is dependent on the magnitude of the gradient, like `optimizers.SGD`, may fail. The optimizer used here, `optimizers.Adam`, is unaffected by the scaling change. Also note that because of the weighting, the total losses are not comparable between the two models." - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "UJ589fn8ST3x" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:sample_weight modes were coerced from\n", - " ...\n", - " to \n", - " ['...']\n", - "WARNING:tensorflow:sample_weight modes were coerced from\n", - " ...\n", - " to \n", - " ['...']\n", - "Train on 182276 samples, validate on 45569 samples\n", - "Epoch 1/100\n", - "182276/182276 [==============================] - 3s 19us/sample - loss: 1.0524 - tp: 138.0000 - fp: 2726.0000 - tn: 179246.0000 - fn: 166.0000 - accuracy: 0.9841 - precision: 0.0482 - recall: 0.4539 - auc: 0.8321 - val_loss: 0.4515 - val_tp: 59.0000 - val_fp: 432.0000 - val_tn: 45054.0000 - val_fn: 24.0000 - val_accuracy: 0.9900 - val_precision: 0.1202 - val_recall: 0.7108 - val_auc: 0.9492\n", - "Epoch 2/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.5537 - tp: 216.0000 - fp: 3783.0000 - tn: 178189.0000 - fn: 88.0000 - accuracy: 0.9788 - precision: 0.0540 - recall: 0.7105 - auc: 0.9033 - val_loss: 0.3285 - val_tp: 69.0000 - val_fp: 514.0000 - val_tn: 44972.0000 - val_fn: 14.0000 - val_accuracy: 0.9884 - val_precision: 0.1184 - val_recall: 0.8313 - val_auc: 0.9605\n", - "Epoch 3/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.4178 - tp: 238.0000 - fp: 4540.0000 - tn: 177432.0000 - fn: 66.0000 - accuracy: 0.9747 - precision: 0.0498 - recall: 0.7829 - auc: 0.9237 - val_loss: 0.2840 - val_tp: 69.0000 - val_fp: 570.0000 - val_tn: 44916.0000 - val_fn: 14.0000 - val_accuracy: 0.9872 - val_precision: 0.1080 - val_recall: 0.8313 - val_auc: 0.9669\n", - "Epoch 4/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.3848 - tp: 247.0000 - fp: 5309.0000 - tn: 176663.0000 - fn: 57.0000 - accuracy: 0.9706 - precision: 0.0445 - recall: 0.8125 - auc: 0.9292 - val_loss: 0.2539 - val_tp: 71.0000 - val_fp: 622.0000 - val_tn: 44864.0000 - val_fn: 12.0000 - val_accuracy: 0.9861 - val_precision: 0.1025 - val_recall: 0.8554 - val_auc: 0.9709\n", - "Epoch 5/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.3596 - tp: 254.0000 - fp: 6018.0000 - tn: 175954.0000 - fn: 50.0000 - accuracy: 0.9667 - precision: 0.0405 - recall: 0.8355 - auc: 0.9323 - val_loss: 0.2363 - val_tp: 72.0000 - val_fp: 713.0000 - val_tn: 44773.0000 - val_fn: 11.0000 - val_accuracy: 0.9841 - val_precision: 0.0917 - val_recall: 0.8675 - val_auc: 0.9725\n", - "Epoch 6/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.3115 - tp: 255.0000 - fp: 6366.0000 - tn: 175606.0000 - fn: 49.0000 - accuracy: 0.9648 - precision: 0.0385 - recall: 0.8388 - auc: 0.9477 - val_loss: 0.2243 - val_tp: 72.0000 - val_fp: 768.0000 - val_tn: 44718.0000 - val_fn: 11.0000 - val_accuracy: 0.9829 - val_precision: 0.0857 - val_recall: 0.8675 - val_auc: 0.9728\n", - "Epoch 7/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.3179 - tp: 258.0000 - fp: 6804.0000 - tn: 175168.0000 - fn: 46.0000 - accuracy: 0.9624 - precision: 0.0365 - recall: 0.8487 - auc: 0.9435 - val_loss: 0.2165 - val_tp: 72.0000 - val_fp: 812.0000 - val_tn: 44674.0000 - val_fn: 11.0000 - val_accuracy: 0.9819 - val_precision: 0.0814 - val_recall: 0.8675 - val_auc: 0.9739\n", - "Epoch 8/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2880 - tp: 260.0000 - fp: 6669.0000 - tn: 175303.0000 - fn: 44.0000 - accuracy: 0.9632 - precision: 0.0375 - recall: 0.8553 - auc: 0.9530 - val_loss: 0.2122 - val_tp: 72.0000 - val_fp: 783.0000 - val_tn: 44703.0000 - val_fn: 11.0000 - val_accuracy: 0.9826 - val_precision: 0.0842 - val_recall: 0.8675 - val_auc: 0.9769\n", - "Epoch 9/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2676 - tp: 262.0000 - fp: 6904.0000 - tn: 175068.0000 - fn: 42.0000 - accuracy: 0.9619 - precision: 0.0366 - recall: 0.8618 - auc: 0.9594 - val_loss: 0.2056 - val_tp: 72.0000 - val_fp: 855.0000 - val_tn: 44631.0000 - val_fn: 11.0000 - val_accuracy: 0.9810 - val_precision: 0.0777 - val_recall: 0.8675 - val_auc: 0.9750\n", - "Epoch 10/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2498 - tp: 266.0000 - fp: 6833.0000 - tn: 175139.0000 - fn: 38.0000 - accuracy: 0.9623 - precision: 0.0375 - recall: 0.8750 - auc: 0.9593 - val_loss: 0.2001 - val_tp: 73.0000 - val_fp: 840.0000 - val_tn: 44646.0000 - val_fn: 10.0000 - val_accuracy: 0.9813 - val_precision: 0.0800 - val_recall: 0.8795 - val_auc: 0.9761\n", - "Epoch 11/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2681 - tp: 262.0000 - fp: 6845.0000 - tn: 175127.0000 - fn: 42.0000 - accuracy: 0.9622 - precision: 0.0369 - recall: 0.8618 - auc: 0.9559 - val_loss: 0.1964 - val_tp: 73.0000 - val_fp: 865.0000 - val_tn: 44621.0000 - val_fn: 10.0000 - val_accuracy: 0.9808 - val_precision: 0.0778 - val_recall: 0.8795 - val_auc: 0.9768\n", - "Epoch 12/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2406 - tp: 268.0000 - fp: 7070.0000 - tn: 174902.0000 - fn: 36.0000 - accuracy: 0.9610 - precision: 0.0365 - recall: 0.8816 - auc: 0.9646 - val_loss: 0.1940 - val_tp: 73.0000 - val_fp: 848.0000 - val_tn: 44638.0000 - val_fn: 10.0000 - val_accuracy: 0.9812 - val_precision: 0.0793 - val_recall: 0.8795 - val_auc: 0.9771\n", - "Epoch 13/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2285 - tp: 269.0000 - fp: 6976.0000 - tn: 174996.0000 - fn: 35.0000 - accuracy: 0.9615 - precision: 0.0371 - recall: 0.8849 - auc: 0.9680 - val_loss: 0.1930 - val_tp: 73.0000 - val_fp: 857.0000 - val_tn: 44629.0000 - val_fn: 10.0000 - val_accuracy: 0.9810 - val_precision: 0.0785 - val_recall: 0.8795 - val_auc: 0.9772\n", - "Epoch 14/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2322 - tp: 268.0000 - fp: 6718.0000 - tn: 175254.0000 - fn: 36.0000 - accuracy: 0.9629 - precision: 0.0384 - recall: 0.8816 - auc: 0.9644 - val_loss: 0.1915 - val_tp: 73.0000 - val_fp: 808.0000 - val_tn: 44678.0000 - val_fn: 10.0000 - val_accuracy: 0.9820 - val_precision: 0.0829 - val_recall: 0.8795 - val_auc: 0.9781\n", - "Epoch 15/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2631 - tp: 267.0000 - fp: 6578.0000 - tn: 175394.0000 - fn: 37.0000 - accuracy: 0.9637 - precision: 0.0390 - recall: 0.8783 - auc: 0.9551 - val_loss: 0.1900 - val_tp: 73.0000 - val_fp: 803.0000 - val_tn: 44683.0000 - val_fn: 10.0000 - val_accuracy: 0.9822 - val_precision: 0.0833 - val_recall: 0.8795 - val_auc: 0.9781\n", - "Epoch 16/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2314 - tp: 266.0000 - fp: 6644.0000 - tn: 175328.0000 - fn: 38.0000 - accuracy: 0.9633 - precision: 0.0385 - recall: 0.8750 - auc: 0.9672 - val_loss: 0.1882 - val_tp: 73.0000 - val_fp: 806.0000 - val_tn: 44680.0000 - val_fn: 10.0000 - val_accuracy: 0.9821 - val_precision: 0.0830 - val_recall: 0.8795 - val_auc: 0.9784\n", - "Epoch 17/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2152 - tp: 271.0000 - fp: 6663.0000 - tn: 175309.0000 - fn: 33.0000 - accuracy: 0.9633 - precision: 0.0391 - recall: 0.8914 - auc: 0.9687 - val_loss: 0.1895 - val_tp: 73.0000 - val_fp: 754.0000 - val_tn: 44732.0000 - val_fn: 10.0000 - val_accuracy: 0.9832 - val_precision: 0.0883 - val_recall: 0.8795 - val_auc: 0.9785\n", - "Epoch 18/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2420 - tp: 264.0000 - fp: 6535.0000 - tn: 175437.0000 - fn: 40.0000 - accuracy: 0.9639 - precision: 0.0388 - recall: 0.8684 - auc: 0.9610 - val_loss: 0.1895 - val_tp: 73.0000 - val_fp: 749.0000 - val_tn: 44737.0000 - val_fn: 10.0000 - val_accuracy: 0.9833 - val_precision: 0.0888 - val_recall: 0.8795 - val_auc: 0.9786\n", - "Epoch 19/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2279 - tp: 268.0000 - fp: 6443.0000 - tn: 175529.0000 - fn: 36.0000 - accuracy: 0.9645 - precision: 0.0399 - recall: 0.8816 - auc: 0.9672 - val_loss: 0.1895 - val_tp: 73.0000 - val_fp: 763.0000 - val_tn: 44723.0000 - val_fn: 10.0000 - val_accuracy: 0.9830 - val_precision: 0.0873 - val_recall: 0.8795 - val_auc: 0.9788\n", - "Epoch 20/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2247 - tp: 267.0000 - fp: 6596.0000 - tn: 175376.0000 - fn: 37.0000 - accuracy: 0.9636 - precision: 0.0389 - recall: 0.8783 - auc: 0.9684 - val_loss: 0.1896 - val_tp: 73.0000 - val_fp: 760.0000 - val_tn: 44726.0000 - val_fn: 10.0000 - val_accuracy: 0.9831 - val_precision: 0.0876 - val_recall: 0.8795 - val_auc: 0.9797\n", - "Epoch 21/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2296 - tp: 269.0000 - fp: 6562.0000 - tn: 175410.0000 - fn: 35.0000 - accuracy: 0.9638 - precision: 0.0394 - recall: 0.8849 - auc: 0.9656 - val_loss: 0.1889 - val_tp: 73.0000 - val_fp: 750.0000 - val_tn: 44736.0000 - val_fn: 10.0000 - val_accuracy: 0.9833 - val_precision: 0.0887 - val_recall: 0.8795 - val_auc: 0.9797\n", - "Epoch 22/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.1982 - tp: 271.0000 - fp: 6583.0000 - tn: 175389.0000 - fn: 33.0000 - accuracy: 0.9637 - precision: 0.0395 - recall: 0.8914 - auc: 0.9756 - val_loss: 0.1879 - val_tp: 73.0000 - val_fp: 764.0000 - val_tn: 44722.0000 - val_fn: 10.0000 - val_accuracy: 0.9830 - val_precision: 0.0872 - val_recall: 0.8795 - val_auc: 0.9777\n", - "Epoch 23/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2154 - tp: 273.0000 - fp: 6552.0000 - tn: 175420.0000 - fn: 31.0000 - accuracy: 0.9639 - precision: 0.0400 - recall: 0.8980 - auc: 0.9682 - val_loss: 0.1882 - val_tp: 73.0000 - val_fp: 762.0000 - val_tn: 44724.0000 - val_fn: 10.0000 - val_accuracy: 0.9831 - val_precision: 0.0874 - val_recall: 0.8795 - val_auc: 0.9779\n", - "Epoch 24/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.1861 - tp: 272.0000 - fp: 6248.0000 - tn: 175724.0000 - fn: 32.0000 - accuracy: 0.9655 - precision: 0.0417 - recall: 0.8947 - auc: 0.9779 - val_loss: 0.1885 - val_tp: 73.0000 - val_fp: 772.0000 - val_tn: 44714.0000 - val_fn: 10.0000 - val_accuracy: 0.9828 - val_precision: 0.0864 - val_recall: 0.8795 - val_auc: 0.9785\n", - "Epoch 25/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.1953 - tp: 270.0000 - fp: 6501.0000 - tn: 175471.0000 - fn: 34.0000 - accuracy: 0.9641 - precision: 0.0399 - recall: 0.8882 - auc: 0.9751 - val_loss: 0.1877 - val_tp: 73.0000 - val_fp: 768.0000 - val_tn: 44718.0000 - val_fn: 10.0000 - val_accuracy: 0.9829 - val_precision: 0.0868 - val_recall: 0.8795 - val_auc: 0.9786\n", - "Epoch 26/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.1704 - tp: 277.0000 - fp: 6215.0000 - tn: 175757.0000 - fn: 27.0000 - accuracy: 0.9658 - precision: 0.0427 - recall: 0.9112 - auc: 0.9808 - val_loss: 0.1903 - val_tp: 73.0000 - val_fp: 698.0000 - val_tn: 44788.0000 - val_fn: 10.0000 - val_accuracy: 0.9845 - val_precision: 0.0947 - val_recall: 0.8795 - val_auc: 0.9788\n", - "Epoch 27/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.1946 - tp: 271.0000 - fp: 6036.0000 - tn: 175936.0000 - fn: 33.0000 - accuracy: 0.9667 - precision: 0.0430 - recall: 0.8914 - auc: 0.9748 - val_loss: 0.1908 - val_tp: 73.0000 - val_fp: 692.0000 - val_tn: 44794.0000 - val_fn: 10.0000 - val_accuracy: 0.9846 - val_precision: 0.0954 - val_recall: 0.8795 - val_auc: 0.9786\n", - "Epoch 28/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2115 - tp: 271.0000 - fp: 5873.0000 - tn: 176099.0000 - fn: 33.0000 - accuracy: 0.9676 - precision: 0.0441 - recall: 0.8914 - auc: 0.9688 - val_loss: 0.1914 - val_tp: 73.0000 - val_fp: 691.0000 - val_tn: 44795.0000 - val_fn: 10.0000 - val_accuracy: 0.9846 - val_precision: 0.0955 - val_recall: 0.8795 - val_auc: 0.9785\n", - "Epoch 29/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2237 - tp: 266.0000 - fp: 6047.0000 - tn: 175925.0000 - fn: 38.0000 - accuracy: 0.9666 - precision: 0.0421 - recall: 0.8750 - auc: 0.9672 - val_loss: 0.1909 - val_tp: 73.0000 - val_fp: 698.0000 - val_tn: 44788.0000 - val_fn: 10.0000 - val_accuracy: 0.9845 - val_precision: 0.0947 - val_recall: 0.8795 - val_auc: 0.9784\n", - "Epoch 30/100\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.2232 - tp: 272.0000 - fp: 5990.0000 - tn: 175982.0000 - fn: 32.0000 - accuracy: 0.9670 - precision: 0.0434 - recall: 0.8947 - auc: 0.9668 - val_loss: 0.1919 - val_tp: 73.0000 - val_fp: 642.0000 - val_tn: 44844.0000 - val_fn: 10.0000 - val_accuracy: 0.9857 - val_precision: 0.1021 - val_recall: 0.8795 - val_auc: 0.9785\n", - "Epoch 31/100\n", - "178176/182276 [============================>.] - ETA: 0s - loss: 0.2022 - tp: 273.0000 - fp: 5659.0000 - tn: 172216.0000 - fn: 28.0000 - accuracy: 0.9681 - precision: 0.0460 - recall: 0.9070 - auc: 0.9705Restoring model weights from the end of the best epoch.\n", - "182276/182276 [==============================] - 1s 4us/sample - loss: 0.1989 - tp: 276.0000 - fp: 5796.0000 - tn: 176176.0000 - fn: 28.0000 - accuracy: 0.9680 - precision: 0.0455 - recall: 0.9079 - auc: 0.9708 - val_loss: 0.1920 - val_tp: 73.0000 - val_fp: 626.0000 - val_tn: 44860.0000 - val_fn: 10.0000 - val_accuracy: 0.9860 - val_precision: 0.1044 - val_recall: 0.8795 - val_auc: 0.9788\n", - "Epoch 00031: early stopping\n" - ] - } - ], - "source": [ - "weighted_model = make_model()\n", - "weighted_model.load_weights(initial_weights)\n", - "\n", - "weighted_history = weighted_model.fit(\n", - " train_features,\n", - " train_labels,\n", - " batch_size=BATCH_SIZE,\n", - " epochs=EPOCHS,\n", - " callbacks=[early_stopping],\n", - " validation_data=(val_features, val_labels),\n", - " # The class weights go here\n", - " class_weight=class_weight,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "R0ynYRO0G3Lx" - }, - "source": [ - "### Check training history" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "BBe9FMO5ucTC" - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_metrics(weighted_history)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "REy6WClTZIwQ" - }, - "source": [ - "### Evaluate metrics" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "nifqscPGw-5w" - }, - "outputs": [], - "source": [ - "# TODO 1\n", - "train_predictions_weighted = #TODO: Your code goes here.\n", - "test_predictions_weighted = #TODO: Your code goes here." - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "owKL2vdMBJr6" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loss : 0.06950428275801711\n", - "tp : 94.0\n", - "fp : 905.0\n", - "tn : 55952.0\n", - "fn : 11.0\n", - "accuracy : 0.9839191\n", - "precision : 0.0940941\n", - "recall : 0.8952381\n", - "auc : 0.9844724\n", - "\n", - "Legitimate Transactions Detected (True Negatives): 55952\n", - "Legitimate Transactions Incorrectly Detected (False Positives): 905\n", - "Fraudulent Transactions Missed (False Negatives): 11\n", - "Fraudulent Transactions Detected (True Positives): 94\n", - "Total Fraudulent Transactions: 105\n" - ] - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "weighted_results = weighted_model.evaluate(\n", - " test_features, test_labels, batch_size=BATCH_SIZE, verbose=0\n", - ")\n", - "for name, value in zip(weighted_model.metrics_names, weighted_results):\n", - " print(name, \": \", value)\n", - "print()\n", - "\n", - "plot_cm(test_labels, test_predictions_weighted)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "PTh1rtDn8r4-" - }, - "source": [ - "Here you can see that with class weights the accuracy and precision are lower because there are more false positives, but conversely the recall and AUC are higher because the model also found more true positives. Despite having lower accuracy, this model has higher recall (and identifies more fraudulent transactions). Of course, there is a cost to both types of error (you wouldn't want to bug users by flagging too many legitimate transactions as fraudulent, either). Carefully consider the trade offs between these different types of errors for your application." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "hXDAwyr0HYdX" - }, - "source": [ - "### Plot the ROC" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3hzScIVZS1Xm" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmoAAAJQCAYAAAA+M0i0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3XmcVNWd///3p5t9a1mbXdwR2e3gimniOooanBgjk8QwGnQSNU5mMtHMbyYkmSiTr0kM6hhRg8aNLGpENAZjrIgmgM2i4IIIIjag7A3N1t30+f1xq+kGqqqruusuVfV6Ph71uNu553zq0A0fzr33XHPOCQAAANFTFHYAAAAASIxEDQAAIKJI1AAAACKKRA0AACCiSNQAAAAiikQNAAAgokjUAAAAIopEDQAAIKJI1AAAACKqTdgBBKFXr15uyJAhGZ2ze/dude7c2Z+Acgj94KEfGtEXHvrBQz80oi889INn8eLFW5xzvVtbT0EkakOGDFFFRUVG58RiMZWXl/sTUA6hHzz0QyP6wkM/eOiHRvSFh37wmNlH2aiHS58AAAARRaIGAAAQUSRqAAAAEUWiBgAAEFEkagAAABFFogYAABBRJGoAAAARRaIGAAAQUb4lamb2KzPbZGYrmuzrYWYvmdmq+LJ7fL+Z2Qwz+8DM3jKzsUnqPNXMlsfLzTAz8yt+AACAsPk5ovawpIsO23erpJedcydIejm+LUn/IOmE+GeqpPuS1HmfpK83KXt4/QAAAHnDt0TNOfeqpG2H7b5c0iPx9Uckfb7J/l87zwJJR5lZv6Ynxre7OecWOOecpF83OR8AACDvBP2uz1Ln3Mb4+ieSSuPrAyR93KRcZXzfxib7BsT3H14mITObKm90TqWlpYrFYhkFWl1dnfE5+Yh+8NAPjegLD/3goR8a0Rce+iG7Qnspu3POmZnzsf6ZkmZKUllZmcv0BbG8VNZDP3joh0b0hYd+8NAPjegLD/2QXUE/9flpwyXN+HJTfP96SYOalBsY39fU+vj+VGUAAADyRtCJ2hxJ18TXr5H0bJP9X40//Xm6pKoml0glSfHtnWZ2evxpz682OR8AACDv+Dk9x5OS/i7pJDOrNLNrJU2XdL6ZrZJ0Xnxbkl6QtEbSB5IekPSNJvUsa1LtNyQ9GC+3WtIf/YofAAAgbL7do+acuzrJoXMTlHWSvpmkntFN1iskDc9KgAAAABHHmwkAAAAiikQNAAAgokjUAAAAIopEDQAAIKJI1AAAACKKRA0AACCiSNQAAAAiikQNAAAgokjUAAAAIopEDQAAIKJI1AAAACKKRA0AACCiSNQAAAAiikQNAAAgokjUAAAAIopEDQAAIKJI1AAAACKKRA0AACCiSNQAAAAiqk3YAQAAAPhm62pp3YKwo2gxEjUAAJC/Hp0k7fgo7ChajEQNAADkrz3bvOXwL0jF7QJs+JdZqYVEDQAA5L+JP5c6dAuwwewkajxMAAAAEFEkagAAABFFogYAABBRJGoAAAARRaIGAAAQUTz1CQBAvlv7mrT4YcnV+97UyZs2SVt+7Xs7aavdE3YErUKiBgBAvotNl9bOD6SpUknaFEhT6WvXVWrTIewoWoREDQCAfHeg1lt+5jpp8Bm+NvXOO+9o2LBhvraRsdLhUpsgJ7vNHhI1AAAKxfAvSEf7m6ht2hrTsBHlvrZRSHiYAAAAIKJI1AAAACKKRA0AACCiSNQAAAAiikQNAAAgonjqEwCARKoqpUcnSdXpTQp2Vl2dtCCi/6zu3xV2BGihiP5EAQAQsso3pC3vp128rSTV+RZN63XsLvU8PuwokCESNQAAUjnpYunye5st9trrr+vss84KIKAWatclZyd9LWQkagAApFLcVurUo9lidW27plUOyAQPEwAAAEQUiRoAAEBEkagBAABEFIkaAABARPEwAQAgmmr3SpveCa/9bWvCaxuII1EDAETTo5OkdX8POwrJuPiE8JCoAQCiaduH3rJ0uDdFRhiK2kpjvhJO24BI1AAAUfdPv5e69Qs7CiAUjOcCAABEFIkaAABARJGoAQAARBSJGgAAQESRqAEAAEQUT30CQD6rWi8t/51UX+tL9YM/+lB69Q1f6lZNtT/1AjmERA0A8tlfp0tLfu1b9cdK0oe+VS/JpLYd/GwAiDQSNQDIZ/t3ecuhE6XeJ2W9+o8++khHH3101us9qO9IqWN3/+oHIi6URM3MviXp65JM0gPOubvM7DeSGv4WOUrSDufc6ATnrpW0S9IBSXXOubJgogaAHDb8Cmn4P2a92g9jMR1dXp71egF4Ak/UzGy4vCRtnKQaSS+a2Vzn3FVNyvxUUlWKaiY457b4GykAAEC4wnjq82RJC51ze5xzdZL+KumKhoNmZpK+KOnJEGIDAACIjDAStRWSxptZTzPrJOliSYOaHB8v6VPn3Kok5ztJ88xssZlN9TlWAACA0JhzLvhGza6V9A1JuyW9LWm/c+6W+LH7JH3gnPtpknMHOOfWm1kfSS9Jusk592qCclMlTZWk0tLSU2fPnp1RjNXV1erSpUtG5+Qj+sFDPzSiLzy50g/D3v6J+mx+XW8P+3dt7jM+6/XnSj8Egb7w0A+eCRMmLM7GffShPEzgnHtI0kOSZGa3S6qMr7eRdxn01BTnro8vN5nZM/LudTsiUXPOzZQ0U5LKyspceYY3u8ZiMWV6Tj6iHzz0QyP6wpMz/bD5YWmzdMqwYdLw8qxXnzP9EAD6wkM/ZFdYT332iSdag+UlZqfHD50n6T3nXGWS8zpLKnLO7YqvXyDph4EEDQC54tX/J33wF299y8pwYwHQKmHNo/aUmfWUVCvpm865HfH9X9JhDxGYWX9JDzrnLpZUKukZ73kDtZH0hHPuxeDCBoAc8Mrtkqs/dF/JoMRlAURaWJc+E94o4Zz7WoJ9G+Q9cCDn3BpJo3wNDgByXUOS9rUXvGXn3lLvE8OLB0CL8WYCAMhXQ84KOwIArRTG9BwAAABIA4kaAABARHHpEwDgj8evlFbNk6bGpP5jvH1zbpaWPJK4fL9R0vVNZluaVpK87ol3SWVTvPWKWdLcW5KXndbkjYT3nyNtfDNxubHXSJfN8NY3LJVmlievc2rsiO9ULkmxw8rl+HdKqJnvVC419kOefKdDZPKdsoARNQCAP1bNCzsCIOeF8maCoJWVlbmKioqMzmHCPg/94KEfGtEXnhb3w4E6qb426/Ec4sd9vWXTEQqfpOyHhlGJAOKIAn43PPSDx8xy980EAFCQtq/1LtPs3R52JAByBIkaAATl07fjSZpJbdr729YJF/hbP4BAkKgBQNBO+gfp6iebLweg4PEwAQAAQEQxogYA8MfUWNgRADmPRA0A4I+G+asAtBiXPgEAACKKRA0A4I85N3sfAC3GpU8ASMY56eOF0u4tRxzqtXmF9G51ZvWtz2zi7ZzX8Lqehtf9AMgYiRoAJLMmJj36+YSHhkvS2y2st4i/egGkh78tACCZ6k3esmv/I26M37Jli3r16pV5ncVtpDNuzEJwAAoBiRoANGfI2dI/PnDIrhW8zxBAAHiYAAAAIKJI1AAAACKKS58AAH/0GxV2BEDOI1EDAPjj+lfDjgDIeVz6BAAAiChG1ADkju0fSQt/KdXtC6a9rR8E0w4AJEGiBiB3LLxfWvB/wbfbsXvwbeaDaSXxZVW4cQA5jEQNQO5oGEk75QppyFnBtFncXhp6STBtAcBhSNQA5J6jz5Q+c13YUQCA73iYAAAAIKIYUQOi6PErpVXzvPV+ow6d5qDhvp9EJt4llU3x1itmSXNvSV626X1D958jbXwzcbmx10iXzfDWNyxVeexyKZakzqmxxndizrlZWvJI4nIt/U6b3/OWL/y79zlcC7+TZpYnb39qLOF3KpcO7YeI/Tm15DsdIY3vVC55/ZDJdwKQNkbUgChqSNKAXHfCBWFHAOQ0c86FHYPvysrKXEVFRUbnxHjhsiT6oUHg/RDhp+VC/ZmY+22p4iHp4julcV8PJ4Y4fjc89EMj+sJDP3jMbLFzrqy19TCiBgAAEFHcowYgHH/+gbTiqczO2bPNn1gAIKJI1IAoGntN2BH4r+IhaV8LLu1akdR7aPbjAYAIIlEDoqjh6b1CcP2rUvtu6Zdv303q3NO/eAAgQkjUAITrqMG8ogkAkuBhAiCKNiz1PgCAgsaIGhBFDZOVRnB6DgBAcBhRAwAAiCgSNQAAgIji0icAfzkn7fhIqj9w6P76+nDiAYAcQqIGwF9/+ZE0/6cpClhgoQBAriFRA+CvTe96yy6lUrvOhx4bdLrUoST4mAAgR5CoAQjGxJ9LQy8JOwoAyCkkakAUTY2FHQEAIAJI1IAo6j8m7AgAABHA9BwAAAARRaIGRNGcm70PAKCgkagBUbTkEe8DACho3KMGILnta6VVL3mT1sYNqFwlLXw/szoAAC1CogYguWdvlNbOP2TXCZL0QQvqatMhGxEBQEEhUQOQ3N4d3vKUSVKnXpKk9evXa8CAAZnV07VUGjI+y8EBQP4jUQPQvLO/LfUbKUlaFYtpQHl5uPEAQIHgYQIAAICIYkQN+eXxK6VV8w7dN/Ya6bIZ3vqGpdLM8uTnT401TjY75+aDT16WS1KsSbl+o6TrX23cnpbifZUT75LKpnjrFbOkubckLzutqrF+AEDBI1FDfjk8SctVTZNAAEDBIlFDfmoYmTpc/zHJjx3ushkHR+JisZjKU92XlW6dZVMaR9cAAGgGiRryC5cMAQB5JJREzcy+JenrkkzSA865u8xsWnzf5nix7znnXkhw7kWSfiGpWNKDzrnpwUSNnFColwzfnC29/Uz262WyWgAIVeCJmpkNl5eQjZNUI+lFM5sbP/xz59ydKc4tlnSvpPMlVUp6w8zmOOfe8TlsINr+/ANp1wZ/6rYiqXNvf+oGAKQUxojayZIWOuf2SJKZ/VXSFWmeO07SB865NfFzZ0u6XBKJGgpbfZ23/PwvpY5HZbfu7kOkbv2yWycAIC3mmrzDL5AGzU6W9KykMyTtlfSypApJWyV9TdLO+Pa/Oee2H3buFyRd5Jy7Lr79FUmnOeduTNDOVElTJam0tPTU2bNnZxRndXW1unTpktE5+SjX+qE8drkkKVb+bFbrjXo/nPn6NWpXu0N/O+Nh1bTv7mtbUe+LoNAPHvqhEX3hoR88EyZMWOycK2ttPYGPqDnn3jWz/5U0T9JuScskHZB0n6QfSXLx5U8l/XMr2pkpaaYklZWVuZRP7CXQ7FN+BSLn+iHmLbIdc+T74Y12Uq105plneq9r8lHk+yIg9IOHfmhEX3joh+wK5c0EzrmHnHOnOufOkbRd0vvOuU+dcwecc/WSHpB3mfNw6yUNarI9ML4PAAAg74SSqJlZn/hysLz7054ws6Y3wUyStCLBqW9IOsHMjjGzdpK+JGmO3/ECAACEIax51J4ys56SaiV90zm3w8zuNrPR8i59rpV0vSSZWX9503Bc7JyrM7MbJf1J3vQcv3LOvR3OVwAAAPBXKImac258gn1fSVJ2g6SLm2y/IOmI+dUAAADyDW8mAHLFgVrpofOlTxLcFVBfG3w8AADfkaghv0y8K+wI/LNzg7RhafLjpSOkTj2DiwcA4DsSNeSXQnjheckg6aYlR+4vbiuZBR8PAMA3JGpAzjGpTbuwgwAABCCU6TkA31TM8j4AAOQBRtSQX+be4i0L4RIoACDvMaIGAAAQUSRqAAAAEcWlTyAXbF2demoOAEBeIlEDom7raunusY3bRQyEA0ChIFEDom7nem/ZvkTqN1Ia9aVw4wEABIZEDcgV/UZKX5sbdhQAgACRqCF8j18prZqX+NjUmNR/jLc+52ZpySOJy/UbJV3/qjStyo8IAQAIBTe7IHzJkjQAAAocI2oI39SYt2wYOUvmshneBwCAAkGihvA1l6ABAFCguPQJAAAQUYyoIXxzbvaWhXpZc+lj0pZVyY9XfRxcLACASCFRQ/ganuQsxERt+1rp2W+mV7Z9N19DAQBED4kaEKaaPd6yc2/p9G8kL1dULA27PJiYAACRQaIGREGnXtL4b4cdBQAgYniYAAAAIKJI1AAAACKKRA0AACCiuEcN4es3KuwIAACIJBI1hO/6V8OOAACASCJRA4KwdbU3se/+qkP31+4LJx4AQE4gUQOCsPIF6aPXkh/veVxwsQAAcgaJGsI3rSS+rEpdLpc55y1HXiWdcfibCEzqc3LgIQEAoo9EDQhS5948PAEASBvTcwAAAEQUiRoAAEBEkagBAABEFIkaAABARPEwAeCnA7XS3h1STXXYkQAAchCJGsI38a6wI/BHXY10z6nSjnVhRwIAyFEkaghf2ZSwI/DHni3xJM2kTj2ldp2kEy8KOyoAQA4hUUN2PH6ltGpe43a/UYe+w7NhUttEJt6Vv8maJHXtK/3be2FHAQDIQTxMgOxomqQBAICsYEQN2ZXsNVD5/HooAAB8wogaAABARDGihuwYe03YEQAAkHdI1JAdl80IOwIAAPIOlz4BP1Rvlt6dG3YUAIAcx4gasmPDUm/Zf0y4cUTFnBul91/01ovbhRsLACBnkaghO2aWe0ue7vTs3uItjz9PKvvncGMBAOQsEjXAT5+9VRr0mbCjAADkKO5RAwAAiCgSNQAAgIgiUQMAAIgoEjUAAICIIlEDAACIKJ76RHZMjYUdQTj2VUmv3CHt2Xro/m1rwokHANBqm3ft14yXV2nXvtqwQyFRQ5YU6kS378+TFt6X/HjnnsHFAgDIirlvbdCjCz4KOwxJJGpA6xyo8ZaDz5BOnXLose5HSz2ODT4mAECr1B6olySVn9Rbl4/u36I6rvjf7MRCoobsmHOztyzUl7N3P0YadVXYUQAAsuiEPl00aczAUGPgYQJkx5JHvA8AAMiaUBI1M/uWma0ws7fN7Jb4vv9nZu+Z2Vtm9oyZHZXk3LVmttzMlplZRbCRAwAABCfwRM3Mhkv6uqRxkkZJmmhmx0t6SdJw59xISe9Lui1FNROcc6Odc2W+BwwAABCSMEbUTpa00Dm3xzlXJ+mvkq5wzs2Lb0vSAknhXhQGAAAImTnngm3Q7GRJz0o6Q9JeSS9LqnDO3dSkzHOSfuOceyzB+R9K2i7JSbrfOTczSTtTJU2VpNLS0lNnz56dUZzV1dXq0qVLRufko3T7oTx2uSQpVv6s3yGFIlk/9N34soaunKFPSj+n907+VgiRBY/fDQ/94KEfGtEXnnzohz9+WKvfrKzRRUPa6EtD27eojgkTJizOxpW/wJ/6dM69a2b/K2mepN2Slkk60HDczP5TUp2kx5NUcbZzbr2Z9ZH0kpm955x7NUE7MyXNlKSysjJXXl6eUZyxWEyZnpOP0u6HmLcIvc+ck2b/k/TxgqxWW1Nbq3Zt2x55oG6/JKlv377qG/Z3Dwi/Gx76wUM/NKIvPA39ULW3VpMfWKCNVfvCDilje2u8tGTQoEEqLx8WaiyhTM/hnHtI0kOSZGa3S6qMr39N0kRJ57okQ33OufXx5SYze0bevW5HJGrIjn4b/iRNuzx5gWlV8YKjggmoOfuqpJXPZ73adpKUaoLqAWOz3iYA5LK311fp7Q07ww6jxYpMGjEw4XONgQolUTOzPvFEa7CkKySdbmYXSfoPSZ91zu1Jcl5nSUXOuV3x9Qsk/TCwwAtFxSxvWTYldbmmro9Yrtyuq/StZVmr7vXXX9dZZ52V+GBRG6lj+L/MABBFZUd31/1fOTXsMDLWrk2RunZIcCUlYGFNePuUmfWUN0bxTefcDjO7R1J7eZczJWmBc+4GM+sv6UHn3MWSSiU9Ez/eRtITzrkXw/kKeWzuLd6ybIo29r9QJ02+I9x4WsKKpM69slZdbbuSrNYHAIWibXGRenZp2X1eCO/S5/gE+45PUnaDpIvj62vkTekBAACQ93gzAQAAQESRqAEAAEQUiRoAAEBEhfUwAdC8LR9INdWZnZNpeQDIUfX1Tis/3aUD9cFOXN+ctVUHtGJ9ldZs2R12KHmBRA3RtPQx6dlvtvx878lgAMhbP37hXT302odhh5HY3187uFrEtbtWIVHDkRomsQ3TtjXeskup98nUKZ/PbjwAEDEfbfVGrAb16KhuEZjvq0HTV0gVF5n+6bSjQ44ot5GoIdrGfV065zthRwEAkfXfE0/R+cNa8B9an3ivkDpiFi60EAOSAAAAEUWihiPdf473AQAAoeLSJ4608c2wIwAAAGJEDQAAILKSjqiZ2dNpnL/NOXddFuMBAABAXKpLnyMk3ZDiuEn6RXbDQV7Zs016c7ZUtzfzc9ctzH48ABCi2gP1enpJpbZU12Slvg+ZULYgpErUvu+ceznVyWb24yzHg3zy93ul+Xe2ro52XbITCwCE7NX3N+u7Ty3Per2d2xVnvU5ER9JEzTn3xOH7zKydpDbOuT3JygAH7d/lLY8tl/qPyfz89l2lUVdnMyIACE31/rqD6/9SflxW6uzTtb3GHdMjK3UhmtJ+6tPMpkiaLKnYzF53zv2Xf2EhVGOvyW59J/6DdHqqq+gAUDguG9Vf371oaNhhIEekepjgYufcC012XeicOz9+7E1JJGr56rIZYUcAAACUenqOz5jZM2Y2PL79tpndb2b3SXovgNgAAAAKWqp71H5gZv0l/cjMaiX9t6Qekjo555YEFSBCsGGpt2zJfWUAACBrmrtHbbukb0g6RdKvJP1N0k/9Dgohm1nuLadVhRoGAACFLumlTzP7gaS5kuZJOss5N1HeJc8XzGxyQPEBAAAUrFQjapc750abmUlaLOlu59zTZvacpJuDCQ+Rs/JFacG9Un1982W3rfY/HgCIq9pTq+/9Ybm27NofSvs7duzVfSv/nvT4lupw4kJuS5WovWtm/yepk6TXGnY652rF5c/c8viV0qp5R+4fe03jE54bljZe8kxlwb3Sh69m1n7JwMzKAwjdlFmL9MrKzQe3rx43SHdcMVKStLyySpfe81pj4RefP+Tc5248WyMGlkiSbnv6LT256OOEbQwf0E1zbxp/cHvIrc8nLCdJt08aocmnDZYkPbFwnb73TPYnjs2K7duaLTLnzQ2a8+aGjL7T2umXHFyfePd8rVi/M2G5lH9Oh/Hrz6l/fD2fvlNL/pyyJdXDBFeb2RhJtc65FVlvGcFJlKQ154QLEu9vGEm78A6p74jm6+l4lFQ6vPlyACKlaZKWa04/toe+de6JkqTvPbM86auWPje0j74+/lhJ0prN1frPPyT/p+7Hnx+uY3t7b0p5YP4a/eW9TQnLHdOrs26f1Ph349UPLGjRdwAamHMu8QGzkc65t1KenEaZKCgrK3MVFRUZnROLxVReXu5PQEG7/xxveX2GI2FK0A+zLpE+ek26Zq50zPik5+WbvPp5aCX6wpPv/dAwwtDcCEGU+uG5NzfopieX6pKR/XTv5LGBtx+lvggT/eAxs8XOubLW1pPq0uejZna2vJevJ/OIJOZwiLoWJGgAACB8qRK1npLeVupELfHYLwAgpw0f0C3sEAAo9T1q3AEOAAWq6Y3WAMKT6hVSyBfTSrwPAADIKc29mQD5pP6AVF+X0SlWXyvVNZn7x6UxfxoAxNXU1csp8UNr2VaXzvyOQI4hUSskd54o7dmS0SmflSSeRQAKTrpPfaby4Pw1+vEL7yrJ5AIA0tBsomZmp0t6yzm3x8yulveU593OucSzyCG6GpK04nZpn1JfX6+iosOukJcMlEpPyWJgAPLRgjVb5ZxUXGQqtlTPpWVP22JT+Ym9A2kLCEI6I2ozJY0ys5GSvitplqRHJZX7GBf8ctznpK88k3bxV5kPB0Ar/fLLp+r8YaVhhwHkpHQeJqhz3qy4l0u6xzn3C0k8tw0AAOCzdEbUdpvZdyR9RdJnzaxIUlt/wwIAAEA6I2pXyZv09nrn3EZJAyX9zNeokF0T75JOuz7sKAAAQIaaTdSccxskPdFk1yZJv/UtImRf2ZTkL1kHAACRlc5Tn/8s6UZJJZKOkzRY0v9JOs/f0AAAYbl90oiwQwCg9O5Ru1nSOEkLJck5976Z8fhOLqmYJW1cFnYUAHLEe5/sVO+u7SVJL73zacqyKzbVqTZJmc279ifcDyB96SRq+5xzNRafA8fMiv0NCVk395bG9bqa8OIAEHkbduzVP/xifmaT1C6pSHm4TXEwc6gB+SidRO11M/sPSR3MbIKkb0qa629Y8M3xnws7AgARtqV6v5yT2rcp0jG9Omtg946py2/Zql69eiY93rtrB51+TPLjAFJLJ1H7D0lTJb0n6VuS/iTpfj+Dgo+OPjvsCADkgP119Xrvk1168ZZzUpaLxWIqL/9MQFEBhSedRO0SSQ865+7zOxgAAAA0SmcetSslfWBms8zsIu5RAwAACEY686h9RdKJkp6TNEXSGjP7pd+BAQAAFLp0Ln3KObffzJ6VtFdSsaQvSrrBz8AAAAAKXbMjamZ2vpk9KGm1pH+S9GtJff0ODFk0rUoaOC7sKAAAQIbSGVGbKuk3km5yzu31OR60xJybpSWPJD7Wb5Q07PPS9rWBhgTkkymzFumVlZsP2Xd0tyL9tbxxe8itzyc9//ZJIzT5tMGSpCcWrtP3nlmetOza6ZccXJ9493ytWL8zYbmrxw3SHVeMlCQtr6zSpfe8lrTO5248WyMGlkiSbnv6LT256OOE5Xp0bqfTj+2RtB4AwWs2UXPOXRlEIPBJzW7p5R80bnfsHl4sQI46PEnLV9t21+iF5Z9Iksaf0EuPXntayBEBSJqomdlfnXOfNbPtkprOUW2SnHOO/3ZFxWUzvE8ia2LSry+XehwnXXSH1PvEQEMD8knT0a5YLJb0WCqTTxt8cHStOXNvGp9WuREDS9Ju/44rRh4ciZOkfbUHNPS/XlSbItN/XzpMkmRmmnBS77TqA+CvVCNqE+LLXkEEghbasNRb9h+TulzJAOnEC/2PB8hDV48bFHYIvisuMn31jCFhhwHgMEkTNedcfXz1Iefc15oeM7OHJX1NCN/Mcm85rSrUMIB81nQECgCClM6Et4f8DRWf8Jb3hQAAAPgsaaJmZt+N35820sy2xT/bJW2W9EJgEQJAyJZXVml5JaPWAIKXakTtJ5J6S/p5fNlbUi8ktirrAAAgAElEQVTnXA/n3HeCCA4AouDSe15LOf0FAPgl1cMExzvnVpnZo5JOadhpZpIk59xbPscGAABQ0FIlardKulbSvQmOOUnn+BIRWm5/tfTkl6SqysZ9tcxRDCCxR/62Vr96/cOwwwCQQqqnPq+NL9ObyCcDZvYtSV+XNyfbA865u8ysh7w3IAyRtFbSF51z2xOce42k/y+++T/OuSRT8hegjcuktfMTH+tzSuL9AArW7Dc+1kdb90iShvbtGnI0ABJp9s0EZnaFpJecc7vM7FZJYyX92Dn3ZksaNLPh8pK0cZJqJL1oZnPlvarqZefc9Hg7t0r67mHn9pD0fUll8kb1FpvZnEQJXcGYGmtcd/F5ifuPlf7xwcb9RcXSUUcHGRWAHPJ//zRW5w8rDTsMAAmkMz3HtHiSdqakiyU9Lun+VrR5sqSFzrk9zrk6SX+VdIWkyyU1jI49IunzCc69UF7SuC2enL0k6aJWxJL7+o9pMtltPFFr20nqeVzjp/sQKX5vIQAc7uiendS2OJ1/DgAELZ3fzAPx5URJ9zvnnpXUvhVtrpA03sx6mlknecnfIEmlzrmN8TKfSEr037sBkpq+Tbgyvg8AACDvNHvpU9JGM7tX0j9IOtXM2im9BC8h59y7Zva/kuZJ2i1pmRqTwYYyzsxcovPTZWZT5V1OVWlp6RHv5WtOdXV1xueE4cSV3rMe75/0TR21/S2NlrS9aofezFLsudIPfqMfGhViX0w7o4OkQ9/vmQ/9UF3tPWxUUVGhzd2KW1hH7vdDttAXHvohu9JJ1L4ob9TrbufcdjPrL+/+sRZzzj0k6SFJMrPb5Y2MfWpm/ZxzG82sn6RNCU5dL6m8yfZASbEkbcyUNFOSysrKXHl5eaJiScViMWV6Tihil0uS+l//O2mNSW9K3Y/qnrXYc6YffEY/NKIvPPnQD13enC/t2qmysjKd0r+kRXXkQz9kC33hoR+yq9mRMedctaS3JZWb2Q2Sujvn/tiaRs2sT3w5WN79aU9ImiPpmniRayQ9m+DUP0m6wMy6m1l3SRfE9wEAAOSddJ76vFHSNyT9Ib7rt2Z2r3Pu/1rR7lNm1lNSraRvOud2mNn0eN3XSvpI3kiezKxM0g3Oueucc9vM7EeS3ojX80Pn3LZWxJFnWnW1GEAStz3tze8d1Zez79pXq227azI+r6buQPOFAIQqnUufUyWNi4+sNVyq/JukFidqieZmc85tlXRugv0Vkq5rsv0rSb9qadsAkKknF3nPMEUxUdtSvV/n/OQV7alpedJl4qlwIKrSSdRM3nxnDWrj+xA1DfOoMRUHUDAqt+/VnpoDaldcpL4lHTI+f0ivzjq+TxcfIgOQDekkao9KWmhmT8lL0D6vxvnOAAARcHK/rnr2xrPDDgNAljWbqDnnfmJmMUlny7sJ6gbn3Bupz0Jg+o1qstFwjxojagAA5IN0RtQkaZ+k/ZLq40tExfWvhh0BAADwSbPTc5jZf0p6UlI/efOWPWFmt/kdGFqBe9QAAMgL6YyofVXSGOfcHkkysx9LWirpDj8DQws4pucA/DB8QLewQwBQoNJ6hdRh5drE9yEbpqWYDXziXVLZFG+9YpY095YU9VRlNy4gDVNmLdIrKzcf3F47/ZKD6xPvnq8V63cmPO/qcYMOTnWxvLJKl97zWtI2nrvxbI0Y6P2e3Pb0Wwenyjjc8AHdNPemxpl/htz6fNI6b580QpNPGyxJemLhOn3vmeVJy66dfskh9QJAkNJJ1LZJetvM/iTvbvULJL1hZj+TJOfct32MD+k44QLpkxXS+y/Gd3DpE8FomqTBP/tqD+iPKzaqel/dEccqt+8NISIAQUknUXs+/mmwwKdYClO6I2FlUxpH1xL56VBpV3ygs23H1scFZKDpSFqDdEehRgwsSXh+IndcMTLtSWfTrXPyaYMPjq5F1e8WV+q//rAiZZn2bVv2UnUA0ZbO9BwPBREIWmnvdm9Zdm3qhA5Azqna0zjn+JdPPzKpLDLTpDEDggwJQEDSnZ4Dfrj/HG+ZzSk2LvwxI2pAnvrmhOP0nQuHhh0GgACRqIVp45thRwC0yu2TRoQdAgDktbQTNTNr75xjslsAB0X93i4AyHXpTHg7zsyWS1oV3x5lZnf7HhkAAECBazZRkzRD0kRJWyXJOfempAl+BgUgNzyxcJ2eWLgu7DAAIG+lc+mzyDn3kR36WqIDPsUDIIc0TBTLJVAA8Ec6idrHZjZOkjOzYkk3SXrf37CQtu0fSX/5H6mO2wdRWDbt3KfpL76nnXuPnAQ2CFu37tNjH1UE0taHW6oDaQdA9KSTqP2LvMufgyV9KunP8X1orbHXtL6O5b+Tlv/WW+/YXSpq2/o6gRzwwvKNenrJ+nCD2PRpoM316doh0PYAhC+dCW83SfpSALEUnstmtL6O+vhV6FMmSedNk4qZcQWFoa7eSZLOO7mPvlg2KPD2V6xYoeHDhwfWXuf2bXTaMT0Caw9ANDT7r7qZPSDvHZ+HcM5N9SUitEzPE6TuQ8KOAgjc4B6ddcEpfQNvt93m91QeQrsACks6wy9/brLeQdIkSR/7E06B2bDUW/YfE24cAAAgktK59Pmbpttm9qik13yLqJDMLPeW6b6YHQAAFJSW3NB0jKTSbAcCIPesnX5J2CEAQF5L5x617Wq8R61I0jZJt/oZFAAAAJpJ1Myb5XaUpIZn4Oudc0c8WAAAAIDsS5moOeecmb3gnAvuGXRI78+Tnv66VLu3+bL14Uz2CUjSxLvnS5Lm3jS+xXVUbt+jq+5foM3VmU3afKCe/zMCyH/p3KO2zMzGOOeW+h4NPGti0r4d6Zdv00EadJpv4QDJrFi/s9V1vFVZpfU70vhPSQLtiov0mSHdWx0DAERV0kTNzNo45+okjZH0hpmtlrRbkskbbBsbUIyF67xp0mlpvASiqFgq5o0EyG0XDCvVjKszm6qmuMjUtrjIp4gAIHypRtQWSRor6bKAYik8U2Opjxe1ldryyhgUhuIiU4e2xWGHAQCRkipRM0lyzq0OKJbCw0S3AAAghVSJWm8z+3ayg865n/kQDwAAAOJSJWrFkrooPrIGH8y52Vtm4+XsAAAg76RK1DY6534YWCS57PErpVXzjtzf9NVQ958jbXwz8fkhJmpTZi3SKys3H9y+fdIITT5tsCTpiYXr9L0Xd0svPp/w3Kaz0k+8e37SJwCvHjdId1wxUpK0vLJKl96T/A1kz914tkYMLJEk3fb0W3pyUeLXyg4f0O2QKSGG3Jo4xoTf6ZnlScum/E5N+iFvvlMTGX2n46s0YmCJrh43KGkZAEDrNXuPGtKQKElL1wkXeMuq9dL2D731nZWtjylNTZM0IFMNiV069tYc0FuVO3T47Gfvf7oru0EBQB6xZC8aMLMezrltAcfji7KyMldRUZHRObFYTOXl5ekVnlYSX7bw5er7qqQ7T5Tq9h26/6Lp0ulpTM/RCg2jNsne2ZhRP+Qx+qFRS/vi8NHbw10ysp/unZw7s/7wM+GhHxrRFx76wWNmi51zZa2tJ+mIWr4kaYGYeFfrzt+9pTFJO/osb9mhRBrKC6+RPzbs8H7Ghw/opk7tDv2rp22x6cunHR1GWAAQaem8mQDNKZuSnXq6HyNNeSE7dQERdeeVozS0b7ewwwCAnECiVuCSXfIEAADh490r2VAxy/sAAABkESNq2TD3Fm+ZrUugAAAAYkSt4E28e74m3j0/7DAAAEACjKgVuGSTnwIAgPCRqPmterO0+GGpdk/yMnu3BxYOCsfmXfs1e9E67a09kNV6162r0aJ972UeT/X+rMYBAIWARM1vi2ZKr/4kvbLtu/obCwrKI39bq3te+cCfytesbvGpXdrz1w4ApIu/Mf3WMJJ2/HnS4DOSlzOTTrgwmJhQEPbUeCNpPTq307VnH5O1etesWaNjjz22Rece26uzBnbvlLVYACDfkagF5dhy6cybwo4CBegb5cfpuvEtS6wSiVmlysuPz1p9AIDkSNSyoaXv+AQAAEiBRK3AXT1uUNghAACAJEjUCtwdV4wMOwQAAJAEE95mw/3neB8AAIAsYkQtGza+GXYELba80ru/bsTAkpAjAQAAhyNR88Mrd0jv/9Fbr1ofbizNuPSe1yRJa6dfEnIkyJb5qzbrZy+9r3VbU0yyDADICSRqfnjt59KBw2Zh75G96RGAVH5XUaml63Yc3D66Z+cQowEAtAaJmh9cvbe89s9ScRupfTep53HhxoSCUe+cJOk7F56kiSP7kagBQA4jUfNTv1FSm3ZhR4ECNahHJ5I0AMhxPPUJAAAQUYyoZcPYa8KOAAAA5KFQRtTM7F/N7G0zW2FmT5pZBzObb2bL4p8NZvaHJOceaFJuTtCxH+LxK6UNS6XLZnifHHTu0D5hhwAAAJIIfETNzAZIulnSMOfcXjP7raQvOefGNynzlKRnk1Sx1zk3OoBQm7dqnmRF0uTfhB1Ji91y3okHbz4HAADREtalzzaSOppZraROkjY0HDCzbpI+J2lKSLFl5v0XG9drdku1+yRFP/G57em3JHmvkJo1ZVzI0UCS9tUe0J6aA62up6auPgvRAACiIPBEzTm33szulLRO0l5J85xz85oU+bykl51zO5NU0cHMKiTVSZrunEt4iTRwa2LSY1+Q6mvDjiQtTy76WBLv+oyKtVt265IZ87U7C4kaACB/mAv4speZdZf0lKSrJO2Q9DtJv3fOPRY//kdJDzrnnkpy/oB4snespL9IOtc5tzpBuamSpkpSaWnpqbNnz84ozurqanXp0iVlmfLY5ZKkWPmzGrTuaR235hEdKGqn+qL22tZjtN4d9u8ZtRmkr724W5L08EWpp29Ipx8Kgd/9sOTTOs1Y6k2S3KVt6+vr1s70b2Ud1LNj9m9D5WfCQz946IdG9IWHfvBMmDBhsXOurLX1hHHp8zxJHzrnNkuSmT0t6UxJj5lZL0njJE1KdrJzbn18ucbMYpLGSDoiUXPOzZQ0U5LKyspceXl5RkHGYjE1e07MW5SXl0uvLZPWSMWn36DiC36kUkmlGbUYsBefl6Rmv2Na/VAA/O6Hmrc/kZYu1nknl+rBa1r9e+0rfiY89IOHfmhEX3joh+wK46nPdZJON7NOZmaSzpX0bvzYFyTNdc7tS3SimXU3s/bx9V6SzpL0TgAxpyH696UBAIDcEnii5pxbKOn3kpZIWh6PYWb88JckPdm0vJmVmdmD8c2TJVWY2ZuSXpF3j1pEErU4s7AjAAAAeSKUpz6dc9+X9P0E+8sT7KuQdF18/W+SRvgdX9qmxhrXmeICAABkGW8maI3+Y5psNCRquTGiNnxAt7BDAAAAzSBRy7YcufQ596bxzRcCAACh4qXsrTHnZu8jcekTLVa5fY8Wfrgt7DAAABHEiFprLHnEW142Q7l26RPR8c8Pv6H3P62WJLVrw88PAKARI2oFasitz2vIrc+HHQYkbdtdI0n67Im99S+fPT7kaAAAUcKIWrYcHFBjRAQtc+eVo9S7a/uwwwAARAgjalnDpU8AAJBdJGoAAAARRaKWLQ1PfXLpEwAAZAn3qLVGv1FNNpieAwAAZBeJWmtc/2qCnYyoAQCA7CBRy4bXZ0jvPuet58ilz9snReeVqalsrNqre1/5QHv2Hwg1jk8+3a85ny7zpe6d++p8qRcAkPtI1Fpr5wbppf9q3O7UK7xYMjD5tMFhh5CW31dU6rEF68IOw7NhvW9Vt29TpM7ti32rHwCQm0jUWmNaSeN6xx7eGwpOuCC8ePJQ7YF6SdKFp5TqgmF9Q4vjvffe1dChJ/tW/0l9u6pTO34dAQCH4l+GbGnfRTr50rCjSNsTC71RqlwZWRvWr0T/eOrA0NqP7fpA5SG2DwAoTCRqBep7zyyXlDuJGgAAhYhErTmPXymddLFUNsXbrpglzb0l3JhaYMqsRXpl5eawwwAAABlgwtvmrJonxaYnPz5kfHCxtEKiJG3CSb1DiAQAAKSLEbV0VH/SuF42pXF0TZK2r5V+MeqIU6Jm7fRLwg4BAABkiBE1AACAiGJEraXq66XHvyBtWBp2JDnjnr+s0sN/+0iZvG5rd8gT3QIAECYStZbatUFa/XLj9oBTw4slDRPvni9JmntTePfUPbN0vbZU78/4vOIi0/AB3XyICACAaCNRa62ittK335E6R/vG/BXrd4YdwkG/vf4MHdOrc9rlO7QtUtcObX2MCACAaCJRa60ufbwP0tajc1v17to+7DAAAIg8ErXmTKsKOwIAAFCgeOoTAAAgokjUAAAAIopErTn3n+N9AAAAAsY9as3Z+Gbj+rYPpf27vPXqTeHE00JXjxsUeJvOOa3Zslv7ar250PbX1QceAwAAuYxELV3vPCv99qsJDljgobTEHVeMDLzNR/62VtOeeyfBkdzoMwAAwkailq6tq71lp15S136N+0dPDieeHLB2656D6yf38yasPb5Pl4zmUAMAoJCRqGVqzJel838QdhQZW17pTTMyYmBJ4G1//9JhmnLWMYG3CwBAriNRKxCX3vOaJGnt9EtCjgQAAKSLpz4BAAAiihG15oy9JuwIAABAgSJRa85lM7zl/J+FGwcAACg4XPoEAACIKEbUmrNhqbRnh7TgvrAjySlvrN2mtyp3hB0GAAA5jUStOTPLD91uxxxg6Zgy6w1V76+TJHVqVxxyNAAA5CYStUx0HyKV/XPYUbTIczeeHWh7DUnaf1x0ki4Z2T/QtgEAyBckapn47K1S515hR9EiYUx0K0nfKD8+lHYBAMgHJGqpPH7lodsW7jsqp8xapFdWbj5kX9MJbCfePV8r1u9MeO7V4waF8r5PAADQcjz1mcqqed6ya9/4jnATtcOTNAAAkN8YUUvH0WdLK34f+ohag2SvgZp70/iAIwEAAH5iRC2VqTHvI+dtG90FAACCw4haKv3HeEtXH24ccbdPGhF2CAAAIEAkaulwDSNq4V76nHza4FDbl6S7/vy+Fq7ZFnYYAAAUBBK1VObcHF+JJ2ohP0wQtn21B3TXn1elXb5fSQcfowEAIP+RqKWy5BFvefJl3jLke9SeWLhOUngjaw0Di5L0xHWnNVv+pL5dfYwGAID8R6KWjoZ71EK+9Pm9Z5ZLCv8SaIe2RTrz+Nyc+BcAgFzCY4wZKexLnwAAIFgkaumIyMMEAACgsJCopYV51AAAQPDIPNJxcB41RtQAAEBweJgglX6jvGVEHiYIU329U219NCb+BQCgUJCopXL9q1JdjfQ/veM7CjNR+3jbHl1+7+vatrsm7FAAACgoJGrN2bWhcb3/6PDiUPKXsfvtnY07DyZpbYpMFwzrG0ocAAAUGhK1dB01WOpa2AnK+cNK9cBXy8IOAwCAghHKwwRm9q9m9raZrTCzJ82sg5k9bGYfmtmy+Cfh8JWZXWNmq+Kfa3wNdFqJ9ItRvjYBAACQTOAjamY2QNLNkoY55/aa2W8lfSl++DvOud+nOLeHpO9LKpM3Z8ZiM5vjnNvud9xRMPHu+ZKkuTeNDzkSAAAQhLAufbaR1NHMaiV1krShmfINLpT0knNumySZ2UuSLpL0pC9RRsyK9TvDDgEAAAQo8ETNObfezO6UtE7SXknznHPzzGyypB+b2X9LelnSrc65/YedPkDSx022K+P7jmBmUyVNlaTS0lLFYrGM4qyurj5ke+++fVqYYR1+yfS7tEZ1dbVWfrpCkrRly5ZA246S6urqgv3uh6MvPPSDh35oRF946IfsCuPSZ3dJl0s6RtIOSb8zsy9Luk3SJ5LaSZop6buSftjSdpxzM+P1qKyszJWXl2d0/uE/ZB07dFCmdWTdi89LUqBxxGIxDT9mqLR0sXr16qXy8sJ8mCAWi4X/5x8R9IWHfvDQD43oCw/9kF1hXPo8T9KHzrnNkmRmT0s60zn3WPz4fjObJenfE5y7XlJ5k+2BkmL+hVpY6g7Ua9Habdpbc+DgvuWb6lS7f0eIUQEAULjCSNTWSTrdzDrJu/R5rqQKM+vnnNtoZibp85JWJDj3T5Juj4/KSdIF8kbi/FeX/5O9/vrvH+mHc99JcGS1JG8ONQAAEJww7lFbaGa/l7REUp2kpfIuUf7RzHrLm/5/maQbJMnMyiTd4Jy7zjm3zcx+JOmNeHU/bHiwwBcT75LemSOt+YvUrb9vzUTFp7v2SZKO7d1ZQ3p2liRt3bpVPXv2VJsi07VnHxNmeAAAFJxQnvp0zn1f3jQbTX0uSdkKSdc12f6VpF/5F10TZVOkbWu8RG3Y5YE0mcrV4wYF0s6Vpw7Sv5QfJ6nhXoPPBNIuAAA4FG8miKgpsxbplZWbD24/d+PZuuOKkSFGBAAAghbKmwlyRsUs6ZNEt8r5r2mSBgAAChMjaqnMvSXsCEJ7ETsAAAgfI2oAAAARRaIGAAAQUVz6jKjnbjzbt7q37a7RQ6+tUfW+ukP2V3xUEO+2BwAgZ5CopavjUYE2N2JgiW91P72kUve+sjrp8ZKObX1rGwAApI9ELR39x0gjrwo7iqzZV+u9Imr8Cb107tA+hxzr2qGtLhnZL4ywAADAYUjU0jHqaqltx0CbvO3ptyTJ17nTRg4s0dfO4m0DAABEFQ8TpDJuamhNP7noYz256OPQ2gcAAOEjUQMAAIgoEjUAAICIIlFLZcXTYUcAAAAKGIlaKnu2hB0BAAAoYDz1mYd++Nw7emXlpqTHt++pCTAaAADQUiRq6di/K/Amhw/o1uJzH/7bh6p3zZc7sbRri9sAAAD+I1FLR+nwwJuce9P4Fp/bkKP9+dufVZElLtOpXRv1LenQ4jYAAID/SNTy2LG9OqsoWaYGAAAij4cJAAAAIopELZVuA0Nresitz2vIrc+H1j4AAAgfiVoqpaeEHQEAAChgJGoAAAARRaKWyr6qsCPI2Jbq/XJpTM0BAACij0QtlY8XeEtXH24caXr+rY0q+58/hx0GAADIEqbnSEeOJGorP9kpSeraoY0uG9WfqTkAAMhxJGpJjHjrh40bHY9KWXbKrEV6ZeXmhMeGD+h2yOS1qZ7kvH3SCE0+bbAkqU/X9tq0a38GETe67uxj9a3zTmjRuQAAIDq49JlEz22Lm2ylHpnyY4b/W847URNO6p31egEAQO5gRC0dljpRu+OKkbrjipFpVbV2+iVplZt82uCDo2sAAKAwMaKWFu71AgAAwSNRS6Li1J9KfeIT3jYzora8skrLK3NvKg8AABBtXPpMorrr8VK7TvGt1Inapfe8Jin9y5oAAADpIFFLpWHm2GZG1IK0YM1WfbCpOuGxt9YzqgcAQD4hUUvixJX3SlVr41vRSNS2Vu/X5AcWqL6ZNw+0b8sVbQAA8gGJWhL9N85r3LBoJD679tWp3kld2rfRZaP7JyzTpX0b/ePYgQFHBgAA/ECilo5oDKgd1LNLO90+aUTYYQAAAJ9FY6go8iKWqQEAgIJAopaOCD1MAAAACgeXPtOSOlF77sazA4oDAAAUEhK1dDQzojZiYElAgQAAgELCpc8kdnU5TmrT8LJ1Ln0CAIDgMaKWxOKyn6k8drm30cyI2m1PvyVJab+YPZEPt+zWXX9+X3trDiQtsyfFMQAAkH9I1JKw+rrGjc69U5Z9ctHHklqXqP2u4mM9u2xDWmV7d2nf4nYAAEDuIFFLwlx940bXvr63Vxd/3cAVYwfogmHJ2zOTyo7u7ns8AAAgfCRqSZwz/0pvpTjY0auTSrvqouH+J4YAACD6eJigORF5fRQAACg8ZCHNYbJbAAAQEhK1ZpGoAQCAcHCPWnPSGFEbPqBbAIEAAIBCQ6LWrOYTtbk3jQ8gDgAAUGhI1Jrj0z1qd7+8Svf9dbXqnTctR+0B50s7AAAgd3GPWhKrjrs2vuZPojbvnU+1p+aA9tXWa19tvQ7UO3VoW6RRg47ypT0AAJB7GFFL4tO+E3TC6ofSytOG3Pq8JGnt9Esybud3N5yh4f29l7oXF5natSF3BgAAHhK1pBouRfr71Gf7NkXq2K7Y1zYAAEBuYvgmib4b/+KtMI8aAAAICYlaEsevmRVfI1EDAADhIFFrDiNqAAAgJCRqzeFdnwAAICShZCFm9q9m9raZrTCzJ82sg5k9bmYr4/t+ZWZtk5x7wMyWxT9zAog26zXu2ler5eursl4vAADIL4E/9WlmAyTdLGmYc26vmf1W0pckPS7py/FiT0i6TtJ9CarY65wbHUiwklRf12yR2yeNyKjKyQ8sPLhexKVVAACQRFjTc7SR1NHMaiV1krTBOTev4aCZLZI0MKTYDrV3W7NFJp82OKMq1+/YK0macFJvDe3btUVhAQDyR21trSorK7Vv376wQ2m1kpISvfvuu2GHEZgOHTpo4MCBats24YXAVgs8UXPOrTezOyWtk7RX0rzDkrS2kr4i6VtJquhgZhWS6iRNd879wdeAB47zreo7rxylNsXcAwcAha6yslJdu3bVkCFDZDl+pWXXrl3q2rUwBiGcc9q6dasqKyt1zDHH+NKGORfsOybNrLukpyRdJWmHpN9J+r1z7rH48Qck7XbO3ZLk/AHxZO9YSX+RdK5zbnWCclMlTZWk0tLSU2fPnp1RnG0/WaKz3vuBqrqdpKVjf5KybOzjWklS+aD0sumbXt6tXbXSjM91Urd20f6FrK6uVpcuXcIOI3T0QyP6wkM/eOiHRq3pi5KSEh133HE5n6RJ0oEDB1RcXDgTuTvntHr1alVVHXrv+YQJExY758paW38Ylz7Pk/Shc26zJJnZ05LOlPSYmX1fUm9J1yc72Tm3Pr5cY2YxSWMkHZGoOedmSpopSWVlZa68vDyjIJc86w3blnQrUXPnfi3+CqlpXzk/rbrbzn9Jqq3RWWeeqZ5d2mcUV9BisViz378Q0A+N6AsP/eChHxq1pi/effdddevWLbsBhaSQRtQadOjQQWPGjPGl7jCuu62TdLqZdTLvvw7nSnrXzK6TdICYyRIAACAASURBVKGkq51z9YlONLPuZtY+vt5L0lmS3gkobgAA8s7WrVs1evRojR49Wn379tWAAQMObtfU1KRVx5QpU7Ry5cq023zwwQfVu3dvjR49Wqeccoq++MUvau/evS39Ckeoq6vTUUcdJUn6+OOPddVVV2Wt7qAFnqg55xZK+r2kJZKWx2OYKemXkkol/T0+9cZ/S5KZlZnZg/HTT5ZUYWZvSnpF3j1qviRqQ9+7249qAQCIlJ49e2rZsmVatmyZbrjhBv3rv/7/7d17fE1X2sDx3yNIhFRc3mJUqbiGxBGZyEWLmQx1GUOr1aIX1VHTqVur82prymiZGC11e8fbFsW4zVtNq62MDlNG5a1INe4iigavS2lJxDWy3j/2zpHIyVVOTiTP9/M5n+yz99prr72yc87K2muvZ5zzffXq1QHr9l5Wlss+FAAWL15M69ati3XcIUOGkJSUxN69ewH48MMPS34SBWjSpAmrV692S95lwSMj2Y0xk4wxbYwx7Y0xTxhjrhpjqhpjAowxDvs1xU6baIx51l6ON8YEGWM62D8XuquMvpdPuCtrpZRSqtw7dOgQgYGBDBkyhHbt2nHy5ElGjBhBaGgo7dq1Y8qUKc60Xbp0ISkpiczMTJo0acKECRPo0KEDERERnDlzpsDjZGZmcunSJerUqQPAJ598QufOnenYsSM9evRw7v+vf/2LDh064HA4CAkJISMjA4CYmBjCwsIIDg7OVaac5+FwWLN6vf/++wwcOJCePXvSsmVLXnnlFWe6uLg4IiIiCAkJYdCgQc78Pc1T03NUWhcuXefHjKJ1JSullKp8mtnjnkvb0Zg+xd7nwIEDLF26lNBQa0x8TEwMdevWJTMzk+7duzNw4EACAwNz7XPhwgW6du1KTEwML774IosWLWLChAl58l6+fDmbNm3i//7v/2jXrh29e/cG4IEHHqBfv36ICAsWLODtt99m+vTpzJgxg3fffZfOnTtz8eJFfHx8WLduHampqWzbtg1jDL179yY+Pp6wsPxnbNi5cyfffPMN1apVo1WrVowaNYqqVasSExPDxo0b8fX1ZerUqcyePZtXX3212HVW2nRuiMLcuFqq2a1OTHUue1erPE/FKKWUuvMEBAQ4G2kAK1euJCQkhJCQEPbv38++fXlHH9WoUYNevXoB0KlTJ44ePeoy7+xbn6dOnaJVq1bMnDkTgNTUVHr06EFQUBAzZ8503hqNiopizJgxzJ07l7S0NLy8vPjiiy+Ii4ujY8eOhISEcOjQIQ4ePFjgOUVHR3PXXXdRo0YN2rRpQ2pqKvHx8ezbt4/IyEgcDgfLly/Pt9xlTXvUChM5ulSzy7h6A4AO99SmlrdWv1JKqdxK0vPlLjVr1nQup6SkMHv2bBISEvD392fo0KEuJ+jNHtcG4OXlRWZmwRF+qlSpQt++fXnvvfcYP348v//973n11Vfp3bs3GzZsICYmBoCJEyfSr18/Pv/8c8LDw9m4cSPGGCZOnMjw4cNz5VnQMb29b862kF0+YwwPPvggy5YtK7hCPEB71ApTr0WhSY7G9Cn2H1a31neXtERKKaVUmUtLS8PPz4+77rqLkydPsn79+lLL+6uvviIgIACwbp02btwYYwxLlixxpvnuu+8IDg7mlVdeISQkhOTkZHr27MnChQud48mOHz/O2bNni338yMhINm/ezOHDhwHIyMggJSWlFM7s9mmXTmEqwOSDSiml1O0KCQkhMDCQNm3a0LRpU6Kiom4rv+wxajdu3KBp06Z88MEHAEyePJkBAwZQt25dunXrxsmTJwF466232LJlC1WqVCE4OJgePXpQvXp1Dhw4QHh4OAB+fn6sWLHCOTVHUTVo0ICFCxcyaNAg55Qk06ZNo2XLlrd1jqWhzCMTeEJoaKhJTEws1j7X32xMtcyL8NwWaBRcamWZ9c+DzN6YwphftmTcr1qVWr7uopNZWrQebtK6sGg9WLQebrrdCW/btm1bugXykMo44a2r35+IlEpkAr31mY+r3vWsBSm8ivrO3ULfuVvcXCKllFJKVTZ66zMfkt3TWIRbn3tOpLm5NEoppZSqjLRHLR9VsrKn5dAxakoppZTyDO1Ry0eNK6ethXx61F75aBcrE44VKa9th88xY30y125kcepC3keZlVJKKaVc0YZaoYrWo9a99X/ku23NjuMkfv9TrnVN6/neVqmUUkopVfFpQ60w+fSo/fmhYP78UNGeBs2yh7uN/kULftm2ATW9q9Li7lqlVUKllFJKVVA6Rq1QeRtqu49fYPfxC8XO6Z66vnRo4q+NNKWUUuXGuXPncDgcOBwOGjZsSOPGjZ3vs+cUK4pFixZx6tQpl9uGDh3Kfffdh8PhoE2bNrz55pulVXwANmzYQP/+/QGIjY1lxowZpZq/J2mPWmFc9Kj9et5XQPkK86GUUkqVRL169UhKSgKsyWZr1arF+PHji53PokWLCAkJcUYYuNWsWbPo378/ly9fpk2bNjz11FM0adLktsruyoABA0o9T0/SHrVC6VOfSimlKqclS5YQFhaGw+Hg+eefJysri8zMTJ544gmCgoJo3749c+bMYfXq1SQlJTFo0CCioqIK7Im7fPkyIoKvrzVWe9KkSfz85z+nffv2jBw5kuyJ+GfNmkVgYCDBwcEMHToUgIsXL/L0008TFhZGx44d+fTTT/Pk//777zN27FjA6skbM2YMkZGRNG/enNjYWGe6mJgYwsLCCA4OZsqUKaVWZ6VNe9QKoyGklFJKlbFmEz7Pd9u0AUEM7nwvACu2pfJq7O58097OnZ89e/YQGxtLfHw8VatWZcSIEaxatYqAgADOnj3L7t3Wcc+fP4+/vz9z585l3rx5BAQE5ArMnm3cuHFMnjyZlJQUXnrpJerVsyaWHzNmDH/6058wxjB48GD+8Y9/0KtXL/7yl7/w/fffU716dc6fPw/AlClTePDBB/nggw/46aef6Ny5M7/61a8KPI8zZ86wdetWdu/ezaOPPsqAAQNYt24dqampbNu2DWMMvXv3Jj4+nsjIyBLXl7toj1o+rlSv5+kiKKWUUh6zYcMGtm/fTmhoKA6Hg82bN/Pdd9/RokULkpOTGT16NOvXr6d27dpFym/WrFkkJSVx6tQp1q1bR0JCAgAbN24kLCyMDh06sHnzZvbu3QtAu3btGDp0KMuXL6datWoAfPHFF0ydOhWHw0H37t25cuUKqampBR63f//+iAjBwcGcOHHCmU9cXBwdO3YkJCSEQ4cOcfDgwZJWlVtpj1p+xMv+WXiP2rXMLC5dyyxwu1JKKVVURe0JG9z5XmfvWmkzxvDMM8/wxhtv5Nm2a9cu4uLimD9/PmvWrOHdd98tcr5+fn507dqVr776ivbt2/PCCy+wY8cOGjduzMSJE7lyxZpvdP369WzevJm1a9cybdo0du3ahTGGjz/+OM84uIIaa97e3rnOKfvnxIkTGT58eJHL7SnaUHPFGHyunrHfFNxQu3DpOr94exPnMor+ZIxSSilV3kVHRzNw4EDGjBlD/fr1OXfuHBkZGdSoUQMfHx8eeeQRWrZsybPPPgtYDbD09PRC871+/ToJCQmMHz+ey5cvU6VKFerXr096ejpr1qxhyJAh3Lhxg+PHj/OLX/yCLl260KRJEy5dukTPnj2ZO3cu77zzDgDffvstHTt2LPa59ezZkzfffJPHHnuMmjVrcvz4cXx8fKhfv36x83I3bai5knXj5rJ/wf+pfP9jBucyrlFFwM+nWr7p6tasTlizuqVVQqWUUsqtgoKCmDRpEtHR0WRlZVGtWjUWLFiAl5cXw4cPxxiDiDB9+nQAhg0bxrPPPou3tzeJiYl5xqllj1G7evUqPXv2pF+/fogITz31FIGBgTRq1IjOnTsDkJmZyeDBg0lPTycrK4vx48fj5+fHpEmTGDt2LEFBQWRlZdGiRQs++eSTYp9b7969OXDgAOHh4YDVyFyxYkW5bKhJdjdgRRYaGmoSExOLvsONTHjDHqM2Oe98adlzqAXdU5tdx8/Tb95WghrX5tNRXUqjuOXKpk2b6Natm6eL4XFaDzdpXVi0HixaDzfdTl3s37+ftm3blm6BPCQ9PR0/Pz9PF6NMufr9icg3xpjQ281be9RKIOieog2cVEoppZS6HfrUp1JKKaVUOaUNtRJ45aNdvPLRLk8XQymllFIVnDbUSmBlwjFWJhzzdDGUUkopVcFpQ00ppZRSqpzShtptSjjyo6eLoJRSSqkKShtqBcmOTpCP/SfTePPz/QBU89KYoEoppe48586dw+Fw4HA4aNiwIY0bN3a+Lyi4ek7Dhg0jOTm5SGmzsrKoW7cuaWlpABw7dgwR4euvvwasqAH16tVzxvd0JTY2lhkzZhR4nA0bNtC/f3+X22bOnOmMgFBUBeXnTjo9x204d/HmBfxSj9YeLIlSSilVMvXq1SMpKQmAyZMnU6tWLcaPH58rjTEGYwxVqrju31m8eHGRj1elShXCwsL4+uuv6dGjB/Hx8XTs2JH4+HjCw8PZt28fjRo1wt/fP988BgwYUOTjuTJz5kyeeeYZfHx8biufsqA9aqUgqkU9olqUv9mMlVJKqZI6dOgQgYGBDBkyhHbt2nHy5ElGjBhBaGgo7dq1Y8qUKc60Xbp0ISkpiczMTJo0acKECRPo0KEDERERnDlzJk/ekZGRxMfHAxAfH8+4ceNyvY+KigLg9OnTPPTQQ4SGhjobdwDvv/8+Y8eOBSAlJYXOnTsTFBTEa6+9lquBl56ezkMPPUTr1q158sknASs4/JkzZ7j//vuJjo4GIC4ujoiICEJCQhg0aBAZGRkAfP7557Ru3ZqQkJASRUAoDdqjVhBzw+Xq9o3vKuOCKKWUqjQmu2lSdReRdgpz4MABli5dSmioNcF+TEwMdevWJTMzk+7duzNw4EACAwNz7XPhwgW6du1KTEwML774IosWLWLChAm50kRFRTlDTyUmJhITE8Ps2bMBq6GWHeFh9OjR/OEPfyA8PJyjR4/St29f9uzZkyuvUaNGMX78eB555BHmzZuXa9uOHTvYu3cvDRo0IDw8nK+//ppx48bx9ttvs2XLFvz9/Tlz5gwxMTFs3LgRX19fpk6dyuzZsxk7dizPPfccmzdvpnnz5gwcOLDY9VcatEetBD4bdT+fjbrf08VQSiml3CogIMDZSANYuXIlISEhhISEsH//fvbt25dnnxo1atCrVy8AOnXqxNGjR/Ok6dy5M4mJiVy8eBFjDDVq1ODee+/l6NGjuXrUNmzYwMiRI3E4HPTv35+ffvqJy5cv58pr27ZtPPzwwwAMHjw417bw8HB+9rOf4eXlhcPhcFmW+Ph49u3bR2RkJA6Hg+XLl3P06FH27dtHq1atCAgIQEQYMmRIsequtGiPmlJKKVWelKDny11q1qzpXE5JSWH27NkkJCTg7+/P0KFDXQ7IzxmM3cvLi8zMzDxpatWqRbNmzVi8eLGzIRgeHs6nn37KhQsXaNGiBWCNjUtISMgT4L2ovL29Cy2LMYYHH3yQZcuW5VpfrBjhbqQ9akoppZQqVFpaGn5+ftx1112cPHmS9evX31Z+kZGRvPPOO0RERAAQERGR6z1AdHQ08+fPd77Pfughp7CwMGJjYwFYtWpVkY7t5+dHenq6sxybN2/m8OHDAGRkZJCSkkJgYCApKSkcOXIEYwwrV64s2YneJm2olUCzCZ/TbMLnni6GUkopVWZCQkIIDAykTZs2PPnkk87bkyUVFRXF4cOHnQ2z0NBQjh07RmRkpDPN/Pnz2bp1K8HBwQQGBvLee+/lyWfOnDlMnz6d4OBgjhw5Qu3ahY/xGzFiBNHR0URHR9OgQQMWLlzIoEGD6NChA5GRkRw8eBBfX18WLFhAr169CA0NpVGjRrd1viUlxhiPHLgshYaGmmJ1YS7tD4e/tJYnX2DY4gS+TP4hT7L7W9ZnS8pZolrUY/mz4aVU2vJl06ZNzkGdlZnWw01aFxatB4vWw023Uxf79++nbdu2pVsgD0lPT8fPz6/MjpeRkYGvry8iwt/+9jdiY2NZs2ZNmR0fXP/+ROQbY0xoPrsUmY5Rc6V2k1xvXTXSALaknAWgjm/J7p0rpZRS6vZs376dsWPHkpWVRZ06dYo1p9udQBtqrjSLgm+XQiMHANMGBAEwuPO9ALSaGMe1zCymDQjCp1oVurW+22NFVUoppSqzbt26uRy7VlFoQ60g/2FFG8huoDnZd4sfCmmMT7WCw0wppZRSSpWUPkzgypF/F7jZ2C010fCeSimllHIjbai5krQ819sV21JZsS01TzJBW2pKKaWUch+99VkEr8buBm7eAs1+UFZ71JRSSinlTtqjVgIVf0ITpZRSlcW5c+dwOBw4HA4aNmxI48aNne+vXbtW5HwWLVrEqVOn8qz/5ptvcoWhWrZsGbVq1eLGDSue9rfffktISEiBeb/22mt8+eWXBaaZOHEi77zzTp71P/74IwsWLCjKKRQpv7KmDbUSyJ57TjvUlFJK3enq1atHUlISSUlJjBw5knHjxjnfFyd0U34NtQ4dOvDdd99x6dIlwIqt2apVK3bu3Ol8n3OSW1emTp1K9+7di3FWN5W0oVZeaEOtBLJ71ETvfSqllKrAlixZQlhYGA6Hg+eff56srCwyMzN54oknCAoKon379syZM4fVq1eTlJTEoEGDiIqKytUTV7VqVTp27EhCQgJg9aD97ne/Iz4+HiBXEPbt27fTtWtXOnXqRK9evTh9+jQAQ4cO5eOPPwZg7dq1tG7dmk6dOjFq1Cj69+/vPNbu3bvp2rUrzZs3d4aemjBhAsnJyTgcDiZMmABATEwMYWFhBAcHM2XKFOf+U6ZMoVWrVnTp0oWUlBR3VWux6Bi1gpx1/UtyjlErw6IopZSqRCYXEAap7zsQOsxaTlwMn40tIJ+SB3jfs2cPsbGxxMfHU7VqVUaMGMGqVasICAjg7Nmz7N5tjd8+f/48/v7+zJ07l3nz5hEQEJCnJy4qKor4+Hg6deqEt7c3DzzwAH/605944YUXiI+PZ9q0aVy9epUxY8awdu1a6tevz/Lly/njH//Iu+++68zn0qVLPP/882zdupV7772XRx99NNdxDh48yMaNGzl//jxt27Zl5MiRxMTEcOjQIedca+vWrSM1NZVt27ZhjKF3797Oc1yzZg07d+7k2rVrOByOXHFHPUUbagX5WccCN2uHmlJKqYpqw4YNbN++3Tm+7PLlyzRp0oSePXuSnJzM6NGj6dOnDz169Cg0r8jISObPn8/Pf/5zwsLCaNWqFcnJyZw8eZLr16/TtGlTkpKS2Lt3L9HR0QDcuHGDe+65J1c++/bto3Xr1jRt2hSAxx9/nKVLlzq39+3bl+rVq3P33XdTt25dfvghb2ShL774gri4ODp2tL7jL168yMGDBzl79iwPP/wwNWrUoEaNGvz6178uWcWVMm2oudJrBsS9XGhLTG99KqWUcoui9oSFDrvZu1bKjDE888wzvPHGG3m27dq1i7i4OObPn8+aNWty9Xq5EhERwZNPPsnWrVuJiIhARGjYsCFr1qxxjk8zxhAcHMyWLVtKXGZvb2/nspeXF5mZmS7Pa+LEiQwfPjzX+rfeeqvEx3UnHaNWIKshdjSmD0dj+gA3HyRQSimlKrLo6Gj+/ve/c/asFdf63LlzpKam8sMPP2CM4ZFHHmHKlCns2LEDAD8/P9LT013m5e/vT4MGDVi2bJnzdmJ4eDizZs1yjk8LDAzkxIkTzrFs165dY+/evbnyCQwMJDk5mWPHjmGMYfXq1YWex63l6tmzJwsXLiQjIwOA48ePc/bsWR544AFiY2O5cuUKaWlpfPbZZ8WpLrfRHjWX8m+MaTtNKaVUZRAUFMSkSZOIjo4mKyuLatWqsWDBAry8vBg+fDjGGESE6dOnAzBs2DCeffZZvL29SUxMdDlObf369TRq1Aiwetlef/11Z4+at7c3H374IaNHjyYtLY0bN27w0ksv0a5dO2cevr6+zJs3j+joaGrVqkVoaChXrlwp8DwaNGhAp06dCAoKok+fPsTExHDgwAHCw8MBqyG3YsUKwsLCGDBgAMHBwTRo0ICwsLBSq8vbIZWhhyg0NNQkJiYWfYeZgZB2AsJGQO8ZuTbdyDIEvLoOETjy5z6lXNLyZ9OmTXTr1s3TxfA4rYebtC4sWg8WrYebbqcu9u/fT9u2bUu3QB6Snp6On5+f2/K/ePEitWrVwhjDc889R1BQEKNGjXLb8YrC1e9PRL4xxoTms0uR6a1PV9JO2AvWrc++c7fQd651z1znUFNKKaU8569//SsOh4PAwEAuX77Mb3/7W08Xya301mcR7DmRBliNtCNnrXva+iCBUkopVfZefvllXn75ZU8Xo8xoQ60gtzTG/vvfh4mJOwBAFW2nKaWUUsrNtKFWoNytsSM/WL1pDe/y4eFOjT1RIKWUUhVU9uB8dWdx91h/HaNWAmOjW/JyzzaeLoZSSqkKwsfHh3PnzukUUHcYYwznzp3Dx8fHbcfQHrWC6H82SimlysA999zD8ePHXc6kf6e5cuWKWxsu5Y2Pj0+eCAqlySMNNREZBzyLNWHZbmAY0AhYBdQDvgGeMMZcc7HvK8Bw4AYw2hizvtQL2DgUTiSiz3YqpZQqC9WqVeO+++7zdDFKxaZNm5zhmdTtK/NbnyLSGBgNhBpj2gNewGPAdGCWMaYF8BNWY+zWfQPttO2AB4H/EhGvUi9ku/653j4e1oTHw5qU+mGUUkoppQriqTFqVYEaIlIV8AVOAr8APrS3LwH6u9jvN8AqY8xVY8wR4BBQ+lMHZ48RsG99/vmhYP78UHCpH0YppZRSqiBl3lAzxpwA3gJSsRpoF7BudZ43xmRHTz0OuHqssjFwLMf7/NLdHueEtzBscQK7jxcxOK5SSimlVCkq8zFqIlIHq2fsPuA88D9YtzFL+zgjgBH224siklzMLOrDtLMwjQ9u2fD4dHj8tkt4x6gPnPV0IcoBrYebtC4sWg8WrYebtC4sWg+W1qWRiSceJogGjhhjfgAQkY+AKMBfRKravWr3ACdc7HsCyDlYLL90GGPeBd4taSFFJLE0YnTd6bQeLFoPN2ldWLQeLFoPN2ldWLQeLCJSjCDj+fPEGLVUIFxEfMWa2e+XwD7gS2CgneYp4BMX+64FHhMRbxG5D2gJJJRBmZVSSimlypwnxqhtw3poYAfW1BxVsHq+/hN4UUQOYU3RsRBARPqJyBR7373A37Eadv8Afm+MuVHW56CUUkopVRY8Mo+aMWYSMOmW1Ydx8QSnMWYtVk9a9vupwFS3FtBS4tumFYzWg0Xr4SatC4vWg0Xr4SatC4vWg6VU6kE0XIVSSimlVPmksT6VUkoppcqpSt9QE5EHRSRZRA6JyAQX271FZLW9fZuINCv7UrqXiDQRkS9FZJ+I7BWRMS7SdBORCyKSZL9e90RZ3U1EjorIbvsc8zyxI5Y59vWwS0RCPFFOdxOR1jl+10kikiYiY29JUyGvCRFZJCJnRGRPjnV1ReSfIpJi/6yTz75P2WlSROSpsit16cunHmaIyAH72o8VEf989i3w7+hOk09dTBaREzmu/9757Fvgd8ydJJ96WJ2jDo6KSFI++1aYayK/70y3fU4YYyrtCyt81XdAc6A6sBMIvCXN88ACe/kxYLWny+2GemgEhNjLfsBBF/XQDfjM02Utg7o4CtQvYHtvIA4rEGw4sM3TZS6DOvECTgFNK8M1ATwAhAB7cqz7CzDBXp4ATHexX12ssbZ1gTr2ch1Pn08p10MPoKq9PN1VPdjbCvw7utNe+dTFZGB8IfsV+h1zJ71c1cMt298GXq/o10R+35nu+pyo7D1qYcAhY8xhYwWAX4U1GW9Ov8EKaQXW06q/tKcVqTCMMSeNMTvs5XRgP+6I+FAx/AZYaixfY83/18jThXKzXwLfGWO+93RByoIx5t/Aj7eszvk5kF+Iu57AP40xPxpjfgL+iRsm8y4rrurBGPOFuRlB5musuSwrvHyuiaIoynfMHaOgerC/Fx8FVpZpoTyggO9Mt3xOVPaGWlFCUjnT2B9QF7CmD6mQ7Fu7HYFtLjZHiMhOEYkTkXZlWrCyY4AvROQbsaJb3KpswpiVL4+R/4dvZbgmABoYY07ay6eABi7SVLZr4xms3mVXCvs7qihesG8DL8rnNldluibuB04bY1Ly2V4hr4lbvjPd8jlR2RtqKgcRqQWsAcYaY9Ju2bwD69ZXB2Au8HFZl6+MdDHGhAC9gN+LyAOeLpAniUh1oB9WqLdbVZZrIhdj3b+o1I/Li8hrQCawPJ8kleHv6K9AAODAilv9tmeL43GPU3BvWoW7Jgr6zizNz4nK3lArSkgqZxoRqQrUBs6VSenKkIhUw7rglhtjPrp1uzEmzRhz0V5eB1QTkfplXEy3M8acsH+eAWLJO7dfkcOYVRC9gB3GmNO3bqgs14TtdPYtbvvnGRdpKsW1ISJPA32BIfaXUR5F+Du64xljThtjbhhjsoD3cH2OleWaqAo8BKzOL01Fuyby+c50y+dEZW+obQdaish9ds/BY+SYXNe2FiukFVghrv6V34fTncoeW7AQ2G+MmZlPmobZY/NEJAzr2qlQDVYRqSkiftnLWAOn99ySbC3wpFjCgQs5uroronz/S64M10QOOT8H8gtxtx7oISJ17NtgPex1FYaIPAj8AehnjLmUT5qi/B3d8W4ZmzoA1+dYlO+YiiAaOGCMOe5qY0W7Jgr4znTP54Snn57w9AvrKb6DWE/mvGavm4L1QQTgg3Xb5xBWXNHmni6zG+qgC1YX7S4gyX71BkYCI+00LwB7sZ5a+hqI9HS53VAPze3z22mfa/b1kLMeBJhvXy+7gVBPl9uN9VETq+FVO8e6Cn9NYDVMTwLXscaPDMcal7oRSAE2AHXttKHA+zn2fcb+rDgEJzv9ywAABORJREFUDPP0ubihHg5hja/J/pzIfiL+Z8A6e9nl39Gd/MqnLpbZnwG7sL6gG91aF/b7PN8xd+rLVT3Y6z/I/lzIkbbCXhMFfGe65XNCIxMopZRSSpVTlf3Wp1JKKaVUuaUNNaWUUkqpckobakoppZRS5ZQ21JRSSimlyiltqCmllFJKlVPaUFNKuZ2I3BCRpByvZgWkbSYi5WKOJREJFZE59nI3EYnMsW2kiDzppuM+LSI/iMj79vsoO1RRooi0tNf5i8gXIlIlx35fishFEQl1R7mUUmWvqqcLoJSqFC4bYxyeLkRxGWMSgUT7bTfgIhBvb1vg5sOvNsa8YC+/hDVPUzOsuexeAiYC04w1M352ebuLyCY3l0spVYa0R00p5RF2z9kWEdlhvyJdpGknIgl2L9yuHL1JQ3Os/28R8XKx71ER+YuI7LbTtshx3H/Z+W0UkXvt9Y+IyB47yPy/7XXdROQzuwdwJDDOPub9IjJZRMaLSBsRSbjlvHbby51EZLNYgajX5wgvM1pE9tllWFWE6roO+Nqv6yISADQxxmwqeo0rpe5E2qOmlCoLNUQkyV4+YowZgBUH71fGmCt2A2wl1gzeOY0EZhtjltsheLxEpC0wCIgyxlwXkf8ChgBLXRz3gjEmyL5F+Q5WjMq5wBJjzBIReQaYA/QHXgd6GmNOiIh/zkyMMUdFZAFw0RjzFoCI/NLedkBEqovIfcaYI3bZVosVC3Au8BtjzA8iMgiYijUr+QTgPmPM1VuPlY8/2+d3GXgCeAurR00pVcFpQ00pVRZc3fqsBswTEQdwA2jlYr//BV4TkXuAj4wxKXYDqROw3Q41WgPXwY/hZpzSlcAsezkCK4A0WGGA/mIvbwU+EJG/Ax9RPH/HaqDF2D8HAa2B9sA/7XJ6YYXfASv0zHIR+Rj4uLDMjTFJQDiAiDxg5yMishqrt+0lY8zpYpZZKXUH0IaaUspTxgGngQ5YwzCu3JrAGLNCRLYBfYB1IvIcVrzVJcaYV4pwDJPPct6ExowUkc72sb4RkU5FOw0AVgP/IyIfWVmZFBEJAvYaYyJcpO8DPAD8GqshGmSMySzsIHYw6IlYwb3nYgVIbwaMBl4rRnmVUncIHaOmlPKU2sBJezD8E1g9TrmISHPgsDFmDvAJEIwV9HigiNxtp6krIk3zOcagHD//116Ox2rogHXLdIudT4AxZpsx5nXgB6DJLXmlA36uDmKM+Q6rV/CPWI02gGTgP0Qkws6/mj3mrgrW+LIvgf+066FWPuW/1ZNYga5/xBqvlmW/fIu4v1LqDqM9akopT/kvYI09fuwfQIaLNI8CT4jIdeAU1lOOP4rIRCB7aorrwO+B713sX0dEdgFXgcftdaOAxSLyMlaDbJi9foY9Vk6wGoM7ga458voU+FBEfmPncavVwAzgPgBjzDURGQjMEZHaWJ+37wAHgb/Z6wSYY4w5X1BFAYiIL/A00MNeNRNYB1wDBhe2v1LqziTGFHg3QCml7kgichQINcac9XRZiktEnsYq+wuFpXWx7yZgvD21iFLqDqe3PpVSqvy5DPTKnvC2qETkS6A5Vi+jUqoC0B41pZRSSqlySnvUlFJKKaXKKW2oKaWUUkqVU9pQU0oppZQqp7ShppRSSilVTmlDTSmllFKqnNKGmlJKKaVUOfX/acT6xh9/zPMAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_roc(\n", - " \"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0]\n", - ")\n", - "plot_roc(\n", - " \"Test Baseline\",\n", - " test_labels,\n", - " test_predictions_baseline,\n", - " color=colors[0],\n", - " linestyle=\"--\",\n", - ")\n", - "\n", - "plot_roc(\n", - " \"Train Weighted\", train_labels, train_predictions_weighted, color=colors[1]\n", - ")\n", - "plot_roc(\n", - " \"Test Weighted\",\n", - " test_labels,\n", - " test_predictions_weighted,\n", - " color=colors[1],\n", - " linestyle=\"--\",\n", - ")\n", - "\n", - "\n", - "plt.legend(loc=\"lower right\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "5ysRtr6xHnXP" - }, - "source": [ - "## Oversampling" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "18VUHNc-UF5w" - }, - "source": [ - "### Oversample the minority class\n", - "\n", - "A related approach would be to resample the dataset by oversampling the minority class." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "sHirNp6u7OWp" - }, - "outputs": [], - "source": [ - "# TODO 1\n", - "pos_features = #TODO: Your code goes here.\n", - "neg_features = train_features[~bool_train_labels]\n", - "\n", - "pos_labels = #TODO: Your code goes here.\n", - "neg_labels = #TODO: Your code goes here." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "WgBVbX7P7QrL" - }, - "source": [ - "#### Using NumPy\n", - "\n", - "You can balance the dataset manually by choosing the right number of random \n", - "indices from the positive examples:" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "BUzGjSkwqT88" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(181972, 29)" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ids = np.arange(len(pos_features))\n", - "choices = np.random.choice(ids, len(neg_features))\n", - "\n", - "res_pos_features = pos_features[choices]\n", - "res_pos_labels = pos_labels[choices]\n", - "\n", - "res_pos_features.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "7ie_FFet6cep" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(363944, 29)" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "resampled_features = np.concatenate([res_pos_features, neg_features], axis=0)\n", - "resampled_labels = np.concatenate([res_pos_labels, neg_labels], axis=0)\n", - "\n", - "order = np.arange(len(resampled_labels))\n", - "np.random.shuffle(order)\n", - "resampled_features = resampled_features[order]\n", - "resampled_labels = resampled_labels[order]\n", - "\n", - "resampled_features.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "IYfJe2Kc-FAz" - }, - "source": [ - "#### Using `tf.data`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "usyixaST8v5P" - }, - "source": [ - "If you're using `tf.data` the easiest way to produce balanced examples is to start with a `positive` and a `negative` dataset, and merge them. See [the tf.data guide](../../guide/data.ipynb) for more examples." - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "yF4OZ-rI6xb6" - }, - "outputs": [], - "source": [ - "BUFFER_SIZE = 100000\n", - "\n", - "\n", - "def make_ds(features, labels):\n", - " ds = tf.data.Dataset.from_tensor_slices((features, labels)) # .cache()\n", - " ds = ds.shuffle(BUFFER_SIZE).repeat()\n", - " return ds\n", - "\n", - "\n", - "pos_ds = make_ds(pos_features, pos_labels)\n", - "neg_ds = make_ds(neg_features, neg_labels)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "RNQUx-OA-oJc" - }, - "source": [ - "Each dataset provides `(feature, label)` pairs:" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "llXc9rNH7Fbz" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Features:\n", - " [-2.46955933 3.42534191 -4.42937043 3.70651659 -3.17895499 -1.30458304\n", - " -5. 2.86676917 -4.9308611 -5. 3.58555137 -5.\n", - " 1.51535494 -5. 0.01049775 -5. -5. -5.\n", - " 2.02380731 0.36595419 1.61836304 -1.16743779 0.31324117 -0.35515978\n", - " -0.62579636 -0.55952005 0.51255883 1.15454727 0.87478003]\n", - "\n", - "Label: 1\n" - ] - } - ], - "source": [ - "for features, label in pos_ds.take(1):\n", - " print(\"Features:\\n\", features.numpy())\n", - " print()\n", - " print(\"Label: \", label.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "sLEfjZO0-vbN" - }, - "source": [ - "Merge the two together using `experimental.sample_from_datasets`:" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "e7w9UQPT9wzE" - }, - "outputs": [], - "source": [ - "resampled_ds = tf.data.experimental.sample_from_datasets(\n", - " [pos_ds, neg_ds], weights=[0.5, 0.5]\n", - ")\n", - "resampled_ds = resampled_ds.batch(BATCH_SIZE).prefetch(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "EWXARdTdAuQK" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.48974609375\n" - ] - } - ], - "source": [ - "for features, label in resampled_ds.take(1):\n", - " print(label.numpy().mean())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "irgqf3YxAyN0" - }, - "source": [ - "To use this dataset, you'll need the number of steps per epoch.\n", - "\n", - "The definition of \"epoch\" in this case is less clear. Say it's the number of batches required to see each negative example once:" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "xH-7K46AAxpq" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "278.0" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "resampled_steps_per_epoch = np.ceil(2.0 * neg / BATCH_SIZE)\n", - "resampled_steps_per_epoch" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "XZ1BvEpcBVHP" - }, - "source": [ - "### Train on the oversampled data\n", - "\n", - "Now try training the model with the resampled data set instead of using class weights to see how these methods compare.\n", - "\n", - "Note: Because the data was balanced by replicating the positive examples, the total dataset size is larger, and each epoch runs for more training steps. " - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "soRQ89JYqd6b" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train for 278.0 steps, validate for 23 steps\n", - "Epoch 1/100\n", - "278/278 [==============================] - 13s 48ms/step - loss: 0.4624 - tp: 267186.0000 - fp: 124224.0000 - tn: 160439.0000 - fn: 17495.0000 - accuracy: 0.7511 - precision: 0.6826 - recall: 0.9385 - auc: 0.9268 - val_loss: 0.3299 - val_tp: 79.0000 - val_fp: 2825.0000 - val_tn: 42661.0000 - val_fn: 4.0000 - val_accuracy: 0.9379 - val_precision: 0.0272 - val_recall: 0.9518 - val_auc: 0.9799\n", - "Epoch 2/100\n", - "278/278 [==============================] - 11s 39ms/step - loss: 0.2362 - tp: 264077.0000 - fp: 26654.0000 - tn: 257570.0000 - fn: 21043.0000 - accuracy: 0.9162 - precision: 0.9083 - recall: 0.9262 - auc: 0.9708 - val_loss: 0.1926 - val_tp: 75.0000 - val_fp: 1187.0000 - val_tn: 44299.0000 - val_fn: 8.0000 - val_accuracy: 0.9738 - val_precision: 0.0594 - val_recall: 0.9036 - val_auc: 0.9779\n", - "Epoch 3/100\n", - "278/278 [==============================] - 11s 40ms/step - loss: 0.1887 - tp: 263490.0000 - fp: 12935.0000 - tn: 271381.0000 - fn: 21538.0000 - accuracy: 0.9395 - precision: 0.9532 - recall: 0.9244 - auc: 0.9804 - val_loss: 0.1373 - val_tp: 75.0000 - val_fp: 1064.0000 - val_tn: 44422.0000 - val_fn: 8.0000 - val_accuracy: 0.9765 - val_precision: 0.0658 - val_recall: 0.9036 - val_auc: 0.9778\n", - "Epoch 4/100\n", - "278/278 [==============================] - 11s 41ms/step - loss: 0.1605 - tp: 263933.0000 - fp: 10513.0000 - tn: 274505.0000 - fn: 20393.0000 - accuracy: 0.9457 - precision: 0.9617 - recall: 0.9283 - auc: 0.9866 - val_loss: 0.1078 - val_tp: 75.0000 - val_fp: 1070.0000 - val_tn: 44416.0000 - val_fn: 8.0000 - val_accuracy: 0.9763 - val_precision: 0.0655 - val_recall: 0.9036 - val_auc: 0.9783\n", - "Epoch 5/100\n", - "278/278 [==============================] - 11s 39ms/step - loss: 0.1423 - tp: 265715.0000 - fp: 9592.0000 - tn: 275145.0000 - fn: 18892.0000 - accuracy: 0.9500 - precision: 0.9652 - recall: 0.9336 - auc: 0.9901 - val_loss: 0.0928 - val_tp: 75.0000 - val_fp: 1051.0000 - val_tn: 44435.0000 - val_fn: 8.0000 - val_accuracy: 0.9768 - val_precision: 0.0666 - val_recall: 0.9036 - val_auc: 0.9762\n", - "Epoch 6/100\n", - "278/278 [==============================] - 11s 40ms/step - loss: 0.1297 - tp: 267181.0000 - fp: 8944.0000 - tn: 275445.0000 - fn: 17774.0000 - accuracy: 0.9531 - precision: 0.9676 - recall: 0.9376 - auc: 0.9920 - val_loss: 0.0847 - val_tp: 75.0000 - val_fp: 1077.0000 - val_tn: 44409.0000 - val_fn: 8.0000 - val_accuracy: 0.9762 - val_precision: 0.0651 - val_recall: 0.9036 - val_auc: 0.9748\n", - "Epoch 7/100\n", - "278/278 [==============================] - 11s 39ms/step - loss: 0.1203 - tp: 267440.0000 - fp: 8606.0000 - tn: 276459.0000 - fn: 16839.0000 - accuracy: 0.9553 - precision: 0.9688 - recall: 0.9408 - auc: 0.9933 - val_loss: 0.0775 - val_tp: 75.0000 - val_fp: 1003.0000 - val_tn: 44483.0000 - val_fn: 8.0000 - val_accuracy: 0.9778 - val_precision: 0.0696 - val_recall: 0.9036 - val_auc: 0.9742\n", - "Epoch 8/100\n", - "278/278 [==============================] - 11s 40ms/step - loss: 0.1132 - tp: 268799.0000 - fp: 8165.0000 - tn: 276260.0000 - fn: 16120.0000 - accuracy: 0.9573 - precision: 0.9705 - recall: 0.9434 - auc: 0.9941 - val_loss: 0.0716 - val_tp: 75.0000 - val_fp: 927.0000 - val_tn: 44559.0000 - val_fn: 8.0000 - val_accuracy: 0.9795 - val_precision: 0.0749 - val_recall: 0.9036 - val_auc: 0.9713\n", - "Epoch 9/100\n", - "278/278 [==============================] - 11s 40ms/step - loss: 0.1074 - tp: 269627.0000 - fp: 7971.0000 - tn: 276559.0000 - fn: 15187.0000 - accuracy: 0.9593 - precision: 0.9713 - recall: 0.9467 - auc: 0.9947 - val_loss: 0.0670 - val_tp: 75.0000 - val_fp: 880.0000 - val_tn: 44606.0000 - val_fn: 8.0000 - val_accuracy: 0.9805 - val_precision: 0.0785 - val_recall: 0.9036 - val_auc: 0.9713\n", - "Epoch 10/100\n", - "278/278 [==============================] - 11s 39ms/step - loss: 0.1017 - tp: 270359.0000 - fp: 7590.0000 - tn: 277311.0000 - fn: 14084.0000 - accuracy: 0.9619 - precision: 0.9727 - recall: 0.9505 - auc: 0.9952 - val_loss: 0.0629 - val_tp: 75.0000 - val_fp: 848.0000 - val_tn: 44638.0000 - val_fn: 8.0000 - val_accuracy: 0.9812 - val_precision: 0.0813 - val_recall: 0.9036 - val_auc: 0.9717\n", - "Epoch 11/100\n", - "276/278 [============================>.] - ETA: 0s - loss: 0.0977 - tp: 269672.0000 - fp: 7408.0000 - tn: 274621.0000 - fn: 13547.0000 - accuracy: 0.9629 - precision: 0.9733 - recall: 0.9522 - auc: 0.9955Restoring model weights from the end of the best epoch.\n", - "278/278 [==============================] - 11s 39ms/step - loss: 0.0978 - tp: 271609.0000 - fp: 7474.0000 - tn: 276625.0000 - fn: 13636.0000 - accuracy: 0.9629 - precision: 0.9732 - recall: 0.9522 - auc: 0.9955 - val_loss: 0.0615 - val_tp: 75.0000 - val_fp: 841.0000 - val_tn: 44645.0000 - val_fn: 8.0000 - val_accuracy: 0.9814 - val_precision: 0.0819 - val_recall: 0.9036 - val_auc: 0.9637\n", - "Epoch 00011: early stopping\n" - ] - } - ], - "source": [ - "resampled_model = make_model()\n", - "resampled_model.load_weights(initial_weights)\n", - "\n", - "# Reset the bias to zero, since this dataset is balanced.\n", - "output_layer = resampled_model.layers[-1]\n", - "output_layer.bias.assign([0])\n", - "\n", - "val_ds = tf.data.Dataset.from_tensor_slices((val_features, val_labels)).cache()\n", - "val_ds = val_ds.batch(BATCH_SIZE).prefetch(2)\n", - "\n", - "resampled_history = resampled_model.fit(\n", - " resampled_ds,\n", - " epochs=EPOCHS,\n", - " steps_per_epoch=resampled_steps_per_epoch,\n", - " callbacks=[early_stopping],\n", - " validation_data=val_ds,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "avALvzUp3T_c" - }, - "source": [ - "If the training process were considering the whole dataset on each gradient update, this oversampling would be basically identical to the class weighting.\n", - "\n", - "But when training the model batch-wise, as you did here, the oversampled data provides a smoother gradient signal: Instead of each positive example being shown in one batch with a large weight, they're shown in many different batches each time with a small weight. \n", - "\n", - "This smoother gradient signal makes it easier to train the model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "klHZ0HV76VC5" - }, - "source": [ - "### Check training history\n", - "\n", - "Note that the distributions of metrics will be different here, because the training data has a totally different distribution from the validation and test data. " - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "YoUGfr1vuivl" - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_metrics(resampled_history)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "1PuH3A2vnwrh" - }, - "source": [ - "### Re-train\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "KFLxRL8eoDE5" - }, - "source": [ - "Because training is easier on the balanced data, the above training procedure may overfit quickly. \n", - "\n", - "So break up the epochs to give the `callbacks.EarlyStopping` finer control over when to stop training." - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "e_yn9I26qAHU" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train for 20 steps, validate for 23 steps\n", - "Epoch 1/1000\n", - "20/20 [==============================] - 4s 181ms/step - loss: 0.8800 - tp: 18783.0000 - fp: 16378.0000 - tn: 4036.0000 - fn: 1763.0000 - accuracy: 0.5571 - precision: 0.5342 - recall: 0.9142 - auc: 0.7752 - val_loss: 1.3661 - val_tp: 83.0000 - val_fp: 40065.0000 - val_tn: 5421.0000 - val_fn: 0.0000e+00 - val_accuracy: 0.1208 - val_precision: 0.0021 - val_recall: 1.0000 - val_auc: 0.9425\n", - "Epoch 2/1000\n", - "20/20 [==============================] - 1s 35ms/step - loss: 0.7378 - tp: 19613.0000 - fp: 15282.0000 - tn: 5187.0000 - fn: 878.0000 - accuracy: 0.6055 - precision: 0.5621 - recall: 0.9572 - auc: 0.8680 - val_loss: 1.1629 - val_tp: 83.0000 - val_fp: 36851.0000 - val_tn: 8635.0000 - val_fn: 0.0000e+00 - val_accuracy: 0.1913 - val_precision: 0.0022 - val_recall: 1.0000 - val_auc: 0.9580\n", - "Epoch 3/1000\n", - "20/20 [==============================] - 1s 39ms/step - loss: 0.6431 - tp: 19522.0000 - fp: 13990.0000 - tn: 6558.0000 - fn: 890.0000 - accuracy: 0.6367 - precision: 0.5825 - recall: 0.9564 - auc: 0.8950 - val_loss: 0.9853 - val_tp: 82.0000 - val_fp: 32268.0000 - val_tn: 13218.0000 - val_fn: 1.0000 - val_accuracy: 0.2919 - val_precision: 0.0025 - val_recall: 0.9880 - val_auc: 0.9660\n", - "Epoch 4/1000\n", - "20/20 [==============================] - 1s 39ms/step - loss: 0.5563 - tp: 19488.0000 - fp: 12475.0000 - tn: 8032.0000 - fn: 965.0000 - accuracy: 0.6719 - precision: 0.6097 - recall: 0.9528 - auc: 0.9135 - val_loss: 0.8430 - val_tp: 82.0000 - val_fp: 26633.0000 - val_tn: 18853.0000 - val_fn: 1.0000 - val_accuracy: 0.4155 - val_precision: 0.0031 - val_recall: 0.9880 - val_auc: 0.9713\n", - "Epoch 5/1000\n", - "20/20 [==============================] - 1s 37ms/step - loss: 0.4984 - tp: 19489.0000 - fp: 11049.0000 - tn: 9377.0000 - fn: 1045.0000 - accuracy: 0.7047 - precision: 0.6382 - recall: 0.9491 - auc: 0.9242 - val_loss: 0.7307 - val_tp: 82.0000 - val_fp: 20850.0000 - val_tn: 24636.0000 - val_fn: 1.0000 - val_accuracy: 0.5424 - val_precision: 0.0039 - val_recall: 0.9880 - val_auc: 0.9753\n", - "Epoch 6/1000\n", - "20/20 [==============================] - 1s 39ms/step - loss: 0.4463 - tp: 19305.0000 - fp: 9622.0000 - tn: 10895.0000 - fn: 1138.0000 - accuracy: 0.7373 - precision: 0.6674 - recall: 0.9443 - auc: 0.9336 - val_loss: 0.6405 - val_tp: 82.0000 - val_fp: 15843.0000 - val_tn: 29643.0000 - val_fn: 1.0000 - val_accuracy: 0.6523 - val_precision: 0.0051 - val_recall: 0.9880 - val_auc: 0.9773\n", - "Epoch 7/1000\n", - "20/20 [==============================] - 1s 40ms/step - loss: 0.4121 - tp: 19365.0000 - fp: 8524.0000 - tn: 11931.0000 - fn: 1140.0000 - accuracy: 0.7641 - precision: 0.6944 - recall: 0.9444 - auc: 0.9411 - val_loss: 0.5691 - val_tp: 82.0000 - val_fp: 11981.0000 - val_tn: 33505.0000 - val_fn: 1.0000 - val_accuracy: 0.7371 - val_precision: 0.0068 - val_recall: 0.9880 - val_auc: 0.9787\n", - "Epoch 8/1000\n", - "20/20 [==============================] - 1s 39ms/step - loss: 0.3784 - tp: 19242.0000 - fp: 7375.0000 - tn: 13072.0000 - fn: 1271.0000 - accuracy: 0.7889 - precision: 0.7229 - recall: 0.9380 - auc: 0.9461 - val_loss: 0.5120 - val_tp: 80.0000 - val_fp: 9309.0000 - val_tn: 36177.0000 - val_fn: 3.0000 - val_accuracy: 0.7957 - val_precision: 0.0085 - val_recall: 0.9639 - val_auc: 0.9794\n", - "Epoch 9/1000\n", - "20/20 [==============================] - 1s 45ms/step - loss: 0.3551 - tp: 19106.0000 - fp: 6529.0000 - tn: 13989.0000 - fn: 1336.0000 - accuracy: 0.8080 - precision: 0.7453 - recall: 0.9346 - auc: 0.9495 - val_loss: 0.4657 - val_tp: 80.0000 - val_fp: 7354.0000 - val_tn: 38132.0000 - val_fn: 3.0000 - val_accuracy: 0.8386 - val_precision: 0.0108 - val_recall: 0.9639 - val_auc: 0.9799\n", - "Epoch 10/1000\n", - "20/20 [==============================] - 1s 38ms/step - loss: 0.3350 - tp: 19149.0000 - fp: 5794.0000 - tn: 14698.0000 - fn: 1319.0000 - accuracy: 0.8263 - precision: 0.7677 - recall: 0.9356 - auc: 0.9535 - val_loss: 0.4275 - val_tp: 80.0000 - val_fp: 5832.0000 - val_tn: 39654.0000 - val_fn: 3.0000 - val_accuracy: 0.8720 - val_precision: 0.0135 - val_recall: 0.9639 - val_auc: 0.9802\n", - "Epoch 11/1000\n", - "20/20 [==============================] - 1s 40ms/step - loss: 0.3168 - tp: 19224.0000 - fp: 5013.0000 - tn: 15322.0000 - fn: 1401.0000 - accuracy: 0.8434 - precision: 0.7932 - recall: 0.9321 - auc: 0.9552 - val_loss: 0.3969 - val_tp: 80.0000 - val_fp: 4730.0000 - val_tn: 40756.0000 - val_fn: 3.0000 - val_accuracy: 0.8961 - val_precision: 0.0166 - val_recall: 0.9639 - val_auc: 0.9805\n", - "Epoch 12/1000\n", - "20/20 [==============================] - 1s 40ms/step - loss: 0.3077 - tp: 19028.0000 - fp: 4564.0000 - tn: 16058.0000 - fn: 1310.0000 - accuracy: 0.8566 - precision: 0.8065 - recall: 0.9356 - auc: 0.9593 - val_loss: 0.3695 - val_tp: 80.0000 - val_fp: 3819.0000 - val_tn: 41667.0000 - val_fn: 3.0000 - val_accuracy: 0.9161 - val_precision: 0.0205 - val_recall: 0.9639 - val_auc: 0.9804\n", - "Epoch 13/1000\n", - "20/20 [==============================] - 1s 40ms/step - loss: 0.2936 - tp: 19047.0000 - fp: 4028.0000 - tn: 16444.0000 - fn: 1441.0000 - accuracy: 0.8665 - precision: 0.8254 - recall: 0.9297 - auc: 0.9597 - val_loss: 0.3461 - val_tp: 79.0000 - val_fp: 3149.0000 - val_tn: 42337.0000 - val_fn: 4.0000 - val_accuracy: 0.9308 - val_precision: 0.0245 - val_recall: 0.9518 - val_auc: 0.9802\n", - "Epoch 14/1000\n", - "20/20 [==============================] - 1s 38ms/step - loss: 0.2829 - tp: 19087.0000 - fp: 3596.0000 - tn: 16855.0000 - fn: 1422.0000 - accuracy: 0.8775 - precision: 0.8415 - recall: 0.9307 - auc: 0.9619 - val_loss: 0.3266 - val_tp: 79.0000 - val_fp: 2691.0000 - val_tn: 42795.0000 - val_fn: 4.0000 - val_accuracy: 0.9409 - val_precision: 0.0285 - val_recall: 0.9518 - val_auc: 0.9803\n", - "Epoch 15/1000\n", - "20/20 [==============================] - 1s 39ms/step - loss: 0.2748 - tp: 19020.0000 - fp: 3174.0000 - tn: 17283.0000 - fn: 1483.0000 - accuracy: 0.8863 - precision: 0.8570 - recall: 0.9277 - auc: 0.9627 - val_loss: 0.3095 - val_tp: 79.0000 - val_fp: 2360.0000 - val_tn: 43126.0000 - val_fn: 4.0000 - val_accuracy: 0.9481 - val_precision: 0.0324 - val_recall: 0.9518 - val_auc: 0.9797\n", - "Epoch 16/1000\n", - "20/20 [==============================] - 1s 40ms/step - loss: 0.2666 - tp: 18890.0000 - fp: 2889.0000 - tn: 17757.0000 - fn: 1424.0000 - accuracy: 0.8947 - precision: 0.8673 - recall: 0.9299 - auc: 0.9653 - val_loss: 0.2945 - val_tp: 78.0000 - val_fp: 2101.0000 - val_tn: 43385.0000 - val_fn: 5.0000 - val_accuracy: 0.9538 - val_precision: 0.0358 - val_recall: 0.9398 - val_auc: 0.9796\n", - "Epoch 17/1000\n", - "20/20 [==============================] - 1s 38ms/step - loss: 0.2583 - tp: 18959.0000 - fp: 2517.0000 - tn: 17973.0000 - fn: 1511.0000 - accuracy: 0.9017 - precision: 0.8828 - recall: 0.9262 - auc: 0.9657 - val_loss: 0.2817 - val_tp: 78.0000 - val_fp: 1929.0000 - val_tn: 43557.0000 - val_fn: 5.0000 - val_accuracy: 0.9576 - val_precision: 0.0389 - val_recall: 0.9398 - val_auc: 0.9794\n", - "Epoch 18/1000\n", - "20/20 [==============================] - 1s 46ms/step - loss: 0.2511 - tp: 19104.0000 - fp: 2344.0000 - tn: 18043.0000 - fn: 1469.0000 - accuracy: 0.9069 - precision: 0.8907 - recall: 0.9286 - auc: 0.9678 - val_loss: 0.2704 - val_tp: 78.0000 - val_fp: 1787.0000 - val_tn: 43699.0000 - val_fn: 5.0000 - val_accuracy: 0.9607 - val_precision: 0.0418 - val_recall: 0.9398 - val_auc: 0.9793\n", - "Epoch 19/1000\n", - "20/20 [==============================] - 1s 40ms/step - loss: 0.2445 - tp: 19183.0000 - fp: 2087.0000 - tn: 18215.0000 - fn: 1475.0000 - accuracy: 0.9130 - precision: 0.9019 - recall: 0.9286 - auc: 0.9693 - val_loss: 0.2598 - val_tp: 78.0000 - val_fp: 1665.0000 - val_tn: 43821.0000 - val_fn: 5.0000 - val_accuracy: 0.9634 - val_precision: 0.0448 - val_recall: 0.9398 - val_auc: 0.9791\n", - "Epoch 20/1000\n", - "20/20 [==============================] - 1s 39ms/step - loss: 0.2373 - tp: 18995.0000 - fp: 1906.0000 - tn: 18602.0000 - fn: 1457.0000 - accuracy: 0.9179 - precision: 0.9088 - recall: 0.9288 - auc: 0.9712 - val_loss: 0.2500 - val_tp: 78.0000 - val_fp: 1587.0000 - val_tn: 43899.0000 - val_fn: 5.0000 - val_accuracy: 0.9651 - val_precision: 0.0468 - val_recall: 0.9398 - val_auc: 0.9788\n", - "Epoch 21/1000\n", - "19/20 [===========================>..] - ETA: 0s - loss: 0.2378 - tp: 18121.0000 - fp: 1821.0000 - tn: 17599.0000 - fn: 1371.0000 - accuracy: 0.9180 - precision: 0.9087 - recall: 0.9297 - auc: 0.9714Restoring model weights from the end of the best epoch.\n", - "20/20 [==============================] - 1s 40ms/step - loss: 0.2376 - tp: 19083.0000 - fp: 1918.0000 - tn: 18513.0000 - fn: 1446.0000 - accuracy: 0.9179 - precision: 0.9087 - recall: 0.9296 - auc: 0.9714 - val_loss: 0.2401 - val_tp: 78.0000 - val_fp: 1485.0000 - val_tn: 44001.0000 - val_fn: 5.0000 - val_accuracy: 0.9673 - val_precision: 0.0499 - val_recall: 0.9398 - val_auc: 0.9785\n", - "Epoch 00021: early stopping\n" - ] - } - ], - "source": [ - "resampled_model = make_model()\n", - "resampled_model.load_weights(initial_weights)\n", - "\n", - "# Reset the bias to zero, since this dataset is balanced.\n", - "output_layer = resampled_model.layers[-1]\n", - "output_layer.bias.assign([0])\n", - "\n", - "resampled_history = resampled_model.fit(\n", - " resampled_ds,\n", - " # These are not real epochs\n", - " steps_per_epoch=20,\n", - " epochs=10 * EPOCHS,\n", - " callbacks=[early_stopping],\n", - " validation_data=(val_ds),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "UuJYKv0gpBK1" - }, - "source": [ - "### Re-check training history" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "FMycrpJwn39w" - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_metrics(resampled_history)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "bUuE5HOWZiwP" - }, - "source": [ - "### Evaluate metrics" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "C0fmHSgXxFdW" - }, - "outputs": [], - "source": [ - "# TODO 1\n", - "train_predictions_resampled = #TODO: Your code goes here.\n", - "test_predictions_resampled = #TODO: Your code goes here." - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "FO0mMOYUDWFk" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loss : 0.3960801533448772\n", - "tp : 99.0\n", - "fp : 5892.0\n", - "tn : 50965.0\n", - "fn : 6.0\n", - "accuracy : 0.8964573\n", - "precision : 0.016524788\n", - "recall : 0.94285715\n", - "auc : 0.9804354\n", - "\n", - "Legitimate Transactions Detected (True Negatives): 50965\n", - "Legitimate Transactions Incorrectly Detected (False Positives): 5892\n", - "Fraudulent Transactions Missed (False Negatives): 6\n", - "Fraudulent Transactions Detected (True Positives): 99\n", - "Total Fraudulent Transactions: 105\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAU4AAAFNCAYAAABvx4bHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xm8XdP9//HXO4kQGchADEkbQ0LRVlBUtT9FQ2hFW0NRUt+QVquG8i2lhgpFtV+kVRVzaJEaUw0pMbdipohMgkoagkRITEnu5/fHXjdOrjvt45x77sl5P/PYj7v32mvvvfa9uZ+7hr3XUURgZmat16HSBTAzqzYOnGZmOTlwmpnl5MBpZpaTA6eZWU4OnGZmOTlwmpnl5MDZDknqIulvkhZK+uunOM9Bkv5RyrJViqSvSppW6XKYgQPnpyLpQEmPS1okaa6kOyTtWIJT7wP0BXpHxL7FniQi/hwRQ0pQnrKSFJI2bi5PRDwYEZt8yusMSX+QXpP0hqSHJP2PpA4N8vWSdIukxZJekXRgM+c8XdKS9H+gftmwYP+Wkp6Q9F76uuWnuQdrHxw4iyTpZ8AFwK/JgtxngD8Cw0pw+s8C0yNiaQnOVfUkdSrBOX5D9rO6DNgUWAc4EtgZuF3SqgXZLwI+Ivu5HgRcLGnzZk5/Q0R0K1hmpWt2Bm4DrgV6AlcDt6V0q2YR4SXnAqwBLAL2bSbPqmSB9b9puQBYNe3bCZgNHAfMA+YCh6Z9vyL7pV2SrjECOB24tuDcA4AAOqXtHwCzgHeBl4CDCtIfKjhuB+AxYGH6ukPBvvuAUcA/03n+AfRp4t7qy//zgvLvDewBTAfmAycV5N8WeBh4O+X9A9A57Xsg3cvidL/7F5z/BOA14Jr6tHTMRukaW6Xt9YA3gJ2aKO8h6X5WbWL/ecCpab1r+v4PKth/DXBOE8eu8LNpsG8IMAdQQdp/gN0r/X/Yy6dbKl6AalyA3YGl9YGriTxnAJOBtYG1gH8Bo9K+ndLxZwCrpIDzHtAz7W8YKJsMnOkX/R1gk7RvXWDztL48cAK9gAXAwem4A9J277T/PuBFYBDQJW03FSzqy39qKv/hKXD9BegObA68D2yQ8m8NbJ+uOwB4ATim4HwBbNzI+c8l+wPUpTBwpjyHA1OA1YGJwG+b+VnMAPqn9XPJgvGTwPnp+9EFeDHtHwy81+D444G/NXHu08n+EM0HngeOKNh3LHBHg/y3A8dV+v+wl0+3uKlenN7Am9F8U/og4IyImBcRb5DVJA8u2L8k7V8SERPIalvF9uHVAVtI6hIRcyPi+Uby7AnMiIhrImJpRFwHTAW+VZDnyoiYHhHvA+OA5vrjlgBnRcQS4HqgD3BhRLybrj8F+CJARDwREZPTdV8GLgH+Xyvu6bSI+DCVZwURcSkwE3iE7I/FyY2dJPWd/jciXpU0FBgKfIHsj98uQMd0/vmS+gDdyP4QFVpI9gehMeOAz5H9cTwcOFXSAWlft3Rsa89lVcKBszhvAX1a6HtbD3ilYPuVlLb8HA0C73tkv2i5RMRisubtj4C5kv4uadNWlKe+TOsXbL+WozxvRcSytF4f2F4v2P9+/fGSBkm6PQ3KvEPW19inmXMDvBERH7SQ51JgC+D3EfFhE3nWJmsuA3weuDP9MZsH3JnK14GsD3I+2R+wHg3O0YOs++ITImJKRPw3IpZFxL+AC8kG98h7LqseDpzFeRj4kKxfryn/JRvkqfeZlFaMxWRN0nrrFO6MiIkR8Q2ymtdUsoDSUnnqyzSnkbyldjFZuQZGRA/gJEAtHNPsfIeSupH1G18OnC6pVxNZ3yT7vgA8C+wmaW1Ja5PVOrsCZwMTIqKOrI+2k6SBBef4IlkzvDWCj+/teeALkgrv9Qs5zmXtlANnESJiIVn/3kWS9pa0uqRVJA1No7cA1wG/lLRWagKeSja6Woynga9J+oykNYBf1O+Q1FfSMEldyYL5IrJmbkMTgEHpEapOkvYHNiPrcyu37mTN30WpNnxEg/2vAxt+4qjmXQg8HhGHAX8H/tRYpoiYDvSXtG5E3EFWy3wGGE82MHUEWQ3w+JR/MXAzcIakrpK+QvakxDWNnT9973sqsy1wFNlIOmT9xMuAoyStKunIlH5Pznu19qbSnazVvJD1Yz5OViN8jewXeIe0bzVgNNko8ty0vlratxMFAx0p7WVg17R+Og1GaskekXmbrF/vcD4eHFoXuJ+s7+xtsl/WzdIxP2DFUfUdgSdS3ieAHQv23QccVrC9wrENyrJC+VM5AhhQkPYQ8P20/jWyGuci4EGyQbHCcv0ofY/eBvZr4vuzPI0skM0BeqXtbun7clAT5R2ZfjafGMxrIq0XcGv6uf4HOLBg31eBRQXb15F13SxK93hUg3MNTt/r98kGpAZX+v+tl0+/KP1wzVZqkv5A1uQ+layrpQPZ40JnAntGRMP+X7MmOXBazZD0beAnpNF+skfEzo1sUMes1Rw4zcxy8uCQmVlODpxmZjl96skTymXJm7Pch1ClBm/e5GRCVgWee31yS8/YNqrY39lV+mxY1PUqyTVOM2v3JL0s6VlJT0t6PKX1knSXpBnpa8+ULkmjJc2U9G9JWxWcZ3jKP0PS8IL0rdP5Z6Zjmw3mDpxmVhp1y4pbWu/rEbFlRGyTtk8EJkXEQGBS2oZsPoKBaRlJ9uYa6e2y04DtyGbsOq0+2KY8hxcct3tzBXHgNLPSiLriluINI5vjlPR174L0sZGZDKwpaV1gN+CuiJgfEQuAu4Dd074ekU1EE8BYmn+d2oHTzEqkrq64pXUC+EeaRX9kSusbEXPT+mtkE09DNnHNqwXHzk5pzaXPbiS9Se12cMjMqksUWXtMgXBkQdKYiBjTINuOETEnTc5yl6SpK147QlKbDSg7cJpZabS+9riCFCQbBsqGeeakr/Mk3ULWR/l6mrxlbmpuz0vZ5wD9Cw7vl9LmkM15UJh+X0rv10j+JrmpbmalUaY+zjRLVff6dbI5Bp4jm+GqfmR8OB/PSjUeOCSNrm8PLExN+onAkDSbVc90nolp3zuStk+j6YcUnKtRrnGaWWnkGyHPoy9wS3pCqBPwl4i4U9JjwDhJI8gm5d4v5Z9A9nE0M8km5D4UICLmSxpF9nlbkH0Cw/y0/mPgKrKPUbkjLU1qt++q+wH46uUH4KtbsQ/Af/Ty40X9znYesE3VPQDvGqeZlUaRfZzVyIHTzEqi2FH1auTAaWal4RqnmVlOrnGameVUvlH1dseB08xKwzVOM7Oc3MdpZpZTDdU4/cqlmVlOrnGaWWm4qW5mlk+ER9XNzPKpoT5OB04zKw031c3McnKN08wsJ785ZGaWk2ucZmY5uY/TzCwn1zjNzHJyjdPMLCcHTjOzfPzmkJlZXq5xmpnl5MEhM7OcXOM0M8uphmqcnsjYzCwn1zjNrDTcVDczy6mGmuoOnGZWGq5xmpnl5MBpZpaTm+pmZjm5xmlmlpNrnGZmObnGaWaWk2ucZmY5ucZpZpaTA6eZWU4RlS5Bm3HgNLPScI3TzCwnB04zs5w8qm5mllMN1Tg9kbGZWU6ucZpZaXhU3cwsJzfVzcxyqqsrbmkFSR0lPSXp9rS9gaRHJM2UdIOkzil91bQ9M+0fUHCOX6T0aZJ2K0jfPaXNlHRia8rjwGlmpRF1xS2tczTwQsH2ucD5EbExsAAYkdJHAAtS+vkpH5I2A74HbA7sDvwxBeOOwEXAUGAz4ICUt1kOnGZWElEXRS0tkdQP2BO4LG0L2Bm4MWW5Gtg7rQ9L26T9u6T8w4DrI+LDiHgJmAlsm5aZETErIj4Crk95m+XAaWalUWRTXdJISY8XLCMbnPkC4OdAffW0N/B2RCxN27OB9dP6+sCrAGn/wpR/eXqDY5pKb5YHh8ysNIp8AD4ixgBjGtsn6ZvAvIh4QtJOxReutBw4zaw0WtHsLsJXgL0k7QGsBvQALgTWlNQp1Sr7AXNS/jlAf2C2pE7AGsBbBen1Co9pKr1JbqqbWWmUYVQ9In4REf0iYgDZ4M49EXEQcC+wT8o2HLgtrY9P26T990REpPTvpVH3DYCBwKPAY8DANErfOV1jfEu36hqnmZVG2z7HeQJwvaQzgaeAy1P65cA1kmYC88kCIRHxvKRxwBRgKfCTiFgGIOlIYCLQEbgiIp5v6eIOnJ/SkO8Op+vqq9OhQwc6duzIuCtGs/CddznulLP572uvs946ffndqF+wRo/uLHznXU45+3xenTOXVTt3ZtRJxzJwwwEAvPPuIk475wJmznoFJEaddCxbbvE5Lrr8Wm4afyc911wDgKN/OJyv7bBtBe945TXxsVtYvHgxdcvqWLZ0GfvvdiibbD6QU887gVVX7cyypcsYdeJ5PPfUFHqs0Z1RF5xM/wH9+PDDDznlmLOYOXUW66y3Nr/+w2n07tOLiODGa2/l2kvHVfrW2kaZ3xyKiPuA+9L6LLIR8YZ5PgD2beL4s4CzGkmfAEzIUxYHzhK44vfnLA9sAJddM47tt9mSww7ej8uuGcfl147jZz8ewaVjb2DTgRsx+uxTmfXKq5z1u4u4fPQ5AJxzwZ/4ynbbcP5Zv2TJkiW8/8GHy8938P57c+iB+3ziulZ6//Odn/D2/IXLt4879Ugu/u3lPHTPw3x1ly9z3ClHcuh3fszhRw9n6nMzOPrQE9lg489y8jnHc9g+P2Xp0mWcd9poXnh2Gqt3XZ1xd13Fv+5/lFnTX67cTbUVvzn06UnaVNIJkkan5QRJnyvX9dqTex98mGFDdwVg2NBdueeBhwF48eX/sN1WXwRgw8/2Z87c13lz/gLeXbSYJ555ju9+K3uZYZVVVqFH926VKbytICLo1r0rAN16dGPe628AsNGgDXjkoccBeGnmK6zff116r9WLN+e9xQvPTgPgvcXvMWvGy/RdZ+3KFL6t1UVxSxUqS41T0gnAAWQPkz6akvsB10m6PiLOKcd1K0ESI489GUnsO2wo+w7bg7cWvM1afXoB0Kd3T95a8DYAm2y8IXff/0+23nILnp0yjbmvz+P1eW/SsUMHeq65Br886/+YNnMWm20ykBOP+RGrd1kNgOtu+hvj75zE5psO5H+PPJw1enSv2P2uzIJgzA2jiQj+es0t3HjNbZx7ygVccv0FHH/aT1EH8f1vZo8YTpsyg1333IknH3mGLQZvxrr91qHvumvx1hvzl59vvf7r8rktBvHvJ5+r1C21rRqaj7NcNc4RwJci4pyIuDYt55D1SYxo4diqMvbi3/LXK//Axb8bxXU3387jTz+7wn5JZC8uwGEH78u7ixbz3eE/4c83jmfTgRvRsUMHli5bxgvTZ7L/t/fkxqsuokuX1bj8mqxfbP9v78kd467gpqsuYq3evTjvD5e2+T3WikO+9UP2+8ZwjjjwWA44dB+23n5L9v/Bdzj31AvZdath/ObUCznj/JMBuGz0WLr36M6Nk8Zy0Ih9mfrsdJYt+zhwdFm9C+dffjbnnnIBixe9V6lbals1VOMsV+CsA9ZrJH1dPn76/xMK3yC4bOx1ZSpaafVdqw8AvXuuyS5f24Fnp0yjd881eePNrObxxpvz6ZX6P7t17cqZJ/+Mm66+iLNPOZ4Fby+k3/rrsM7afei7Vh++sPmmAAzZaUemTJ8JQJ9ePenYsSMdOnRgn72G8tyU6RW4y9ow77WsGT7/zQVMmnA/nx+8GXvttwd3//1eACaOn8TnB2evMS9e9B6nHHMm++xyCL848lf07N2T2a9kj/916tSRC644m7/fNJG7J9xXkXuphKirK2qpRuUKnMcAkyTdIWlMWu4EJpG9rN+oiBgTEdtExDaHHXJAmYpWOu+9/wGLF7+3fP1fjz7JwA0HsNOO23PbHXcDcNsdd/P1r34ZyEbOlyxZAsBNf7uTrbf8PN26dqVP716ss/ZavPTKbAAmP/E0Gw34DMDyAAww6f5/sfGGn22z+6slXVZfjdW7rr58fYedtmXG1Fm88dqbfGmHrQDY7qvb8Mqs7O287j260WmVrKfru98fxhOTn1peszzj/JOZNeNlxl5SHX/8Lb+y9HFGxJ2SBpE1zevf+5wDPFb/7NTK4K35Czj6pFEALFu6jD2G7MSO22/DFp8bxHGn/Jqbb5/Ieuusze9GnQTArFde5eQzf4eAjTb4LGf84pjl5zrp2CM44Ve/YcnSJfRfb11GnXQsAL/74+VMmzELBOuv05fTfn5Um99nLei9Vi8uvPJcADp27MiEW/7BP++dzGnHnc2JZx5Lp04d+fDDj/jV8WcDsOGgAZw1+lQighenvcSpx2ZPuQze9ovstd8eTJ8ykxsnjQXgwl9fzIOTHq7MjbWlKm12F0PRTmdtXvLmrPZZMGvR4M0PrHQR7FN47vXJKua4xWd+v6jf2a6/vLao61WSn+M0s9KooRqnA6eZlUaVDvQUw4HTzErDNU4zs5xq6AF4B04zKw3XOM3M8qnWh9mL4cBpZqXhGqeZWU4OnGZmOXlwyMwsJ9c4zczyCQdOM7OcHDjNzHLy40hmZjm5xmlmllMNBc6yfcqlmdnKyjVOMyuJ9jopejk4cJpZadRQU92B08xKw4HTzCwfPwBvZpaXA6eZWU618/y7A6eZlYab6mZmeTlwmpnl5Ka6mVk+bqqbmeXlGqeZWT6ucZqZ5eUap5lZPjX0WW0OnGZWIg6cZmb51FKN0xMZm5nl5BqnmZVGDdU4HTjNrCRqqanuwGlmJVFLgbPJPk5JvZpb2rKQZtb+RV1xS0skrSbpUUnPSHpe0q9S+gaSHpE0U9INkjqn9FXT9sy0f0DBuX6R0qdJ2q0gffeUNlPSiS2Vqbka5xNAAGrsewRs2PItm1nNiMZCRUl8COwcEYskrQI8JOkO4GfA+RFxvaQ/ASOAi9PXBRGxsaTvAecC+0vaDPgesDmwHnC3pEHpGhcB3wBmA49JGh8RU5oqUJOBMyI2+LR3a2a1o1xN9cg+PnNR2lwlLQHsDByY0q8GTicLnMPSOsCNwB8kKaVfHxEfAi9Jmglsm/LNjIhZAJKuT3mbDJwtPo6kzPclnZK2PyNp25aOM7PaEnUqamkNSR0lPQ3MA+4CXgTejoilKctsYP20vj7wKkDavxDoXZje4Jim0pvUmuc4/wh8mY8j+7tk1Vozs+WK7eOUNFLS4wXLyE+cO2JZRGwJ9COrJW7a5jdYoDWj6ttFxFaSngKIiAX1nbBmZvWiyD7OiBgDjGll3rcl3UtWmVtTUqdUq+wHzEnZ5gD9gdmSOgFrAG8VpNcrPKap9Ea1psa5RFJHsj4FJK1FTT3qamatUcZR9bUkrZnWu5AN4rwA3Avsk7INB25L6+PTNmn/PamfdDzwvTTqvgEwEHgUeAwYmEbpO5MNII1vrkytqXGOBm4B+ko6KxXkl604zsxqSGv7K4uwLnB1qsB1AMZFxO2SpgDXSzoTeAq4POW/HLgmDf7MJwuERMTzksaRDfosBX4SEcsAJB0JTAQ6AldExPPNFUhZIG6epE2BXdLmPRHxQo6bLsqSN2fVzqyoK5nBmx/YciZrt557fXJREfA/2+xS1O/sZx6fVLaIWy6tfXNodbJIHECX8hXHzKpVGWuc7U5rHkc6lewZqV5AH+BKSW6qm9kKyvk4UnvTmhrnQcAXI+IDAEnnAE8DZ5azYGZWXVrR67fSaE3g/C+wGvBB2l6VFobqzaz2VGvtsRhNBk5Jvyfr01wIPC/prrT9DbIhfDOzmtRcjfPx9PUJsseR6t1XttKYWdUq9gH4atTcJB9Xt2VBzKy61dJ8nC32cUoaCJwNbEbW1wlARHhaOTNbrq6GapyteeXySrKpmpYCXwfGAteWs1BmVn0iVNRSjVoTOLtExCSyt4xeiYjTgT3LWywzqzZ+jnNFH0rqAMxI73POAbqVt1hmVm38HOeKjiZ75fIoYBTZrMvDmz3CzGpOtdYei9Fi4IyIx9LqIuDQ8hbHzKpVLQ0ONfcA/N9Ic3A2JiL2KkuJzKwqVetATzGaq3H+ts1KYWZVz32cQETc35YFMbPq5qa6mVlObqqbmeXkpno70GW9r1a6CGaWg5vqeFTdzPJxUz3jUXUzazXXOPGouplZUzytnJmVRA2NDbVqcOhK4DTgfLJp5Q6ldbMqmVkNqaWmuqeVM7OSqKX5OD2tnJmVRA19ckarapyF08ptDRyMp5UzswYCFbVUI08rZ2YlUVdDo0OtGVW/l0YGzCJi57KUyMyqUl2V1h6L0Zo+zuML1lcDvkv2wW1mZstVa7O7GK1pqj/RIOmfkh4tU3nMrErV0uBQa5rqvQo2O5ANEK1RthKZWVVyjXNFT5D1cYqsif4SMKKchTKz6uMa54o+FxEfFCZIWrVM5TGzKlVLgbM1z3H+q5G0h0tdEDOrbn6OE5C0DrA+0EXSYFh+hz3IHog3M1uuhj5Wvdmm+m7AD4B+wO/4OHC+A5xU3mKZWbXxc5xARFwNXC3puxFxUxuWycyqUA29ONSqPs6tJa1ZvyGpp6Qzy1gmM7N2rTWBc2hEvF2/ERELgD3KVyQzq0Z1RS7VqDWPI3WUtGpEfAggqQvgx5HMbAV1ch9noT8DkyRdmbYPBcaWr0hmVo1qqY+zNe+qnyvpGWDXlDQqIiaWt1hmVm2qtdldjNbUOImIO4E7ASTtKOmiiPhJWUtmZlWllp7jbNWHrkkaLOk3kl4GRgFTy1oqM6s6daiopSWS+ku6V9IUSc9LOjql95J0l6QZ6WvPlC5JoyXNlPRvSVsVnGt4yj9D0vCC9K0lPZuOGS0132HbZOCUNEjSaZKmAr8HXiX7wLavR8TvW7xbM6spUeTSCkuB4yJiM2B74CeSNgNOBCZFxEBgUtoGGAoMTMtI4GJYPtPbacB2wLbAafXBNuU5vOC43ZsrUHM1zqnAzsA3I2LHFCyXte4+zazW1Km4pSURMTcinkzr7wIvkL0OPgy4OmW7Gtg7rQ8DxkZmMrCmpHXJ3oa8KyLmp8cq7wJ2T/t6RMTkiAiywe/6czWqucD5HWAucK+kSyXtAjX0TpWZ5dIWz3FKGgAMBh4B+kbE3LTrNaBvWl+frIVcb3ZKay59diPpTWoycEbErRHxPWBT4F7gGGBtSRdLGtLcSc2s9hTbVJc0UtLjBcvIxs4vqRtwE3BMRLyzwrWzmmKbPRHVmseRFgN/Af6S+gP2BU4A/lHmsplZFSl2VD0ixgBjmssjaRWyoPnniLg5Jb8uad2ImJua2/NS+hygf8Hh/VLaHGCnBun3pfR+jeRvUqtG1etFxIKIGBMRu+Q5zsxWfuVqqqcR7suBFyLi/wp2jQfqR8aHA7cVpB+SRte3BxamJv1EYEiab6MnMASYmPa9I2n7dK1DCs7VqFY9x2lm1pIyPgD/FeBg4FlJT6e0k4BzgHGSRgCvAPulfRPI5tOYCbxH9rYjETFf0ijgsZTvjIiYn9Z/DFwFdAHuSEuTlHUNtD+dOq/fPgtmtpJb+tGcohrdf+r//aJ+Z3/06rVVN+jsGqeZlYRfuTQzy8mB08wsp1rqW8s1qm5mZq5xmlmJ1NLsSA6cZlYS7uM0M8vJgdPMLKdaGhxy4DSzknAfp5lZTm6qm5nl5Ka6mVlOdTUUOh04zawk3FQ3M8upduqbDpxmViKucZqZ5eTHkczMcvLgkJlZTrUTNh04zaxE3MdpZpZTLTXVPZGxmVlOrnGaWUnUTn3TgdPMSsR9nGZmOdVSH6cDp5mVRO2ETQdOMysRN9XNzHKKGqpzOnCaWUm4xmlmllMtDQ75AfgKWGONHtxw/Riee/Z+nv33fWy/3daVLpI146dHjuDppybxzNP3cNRPDwPgC1/YjIceGM9TT97NrbdcRffu3SpcysqLIpdq5MBZAef/3xlMnHgvW3z+/7HV1t/ghakzKl0ka8Lmm2/CiBEH8uUd9mSrrb/BnnvsykYbDeCSP53HSSf/msFb7cqtt97B8ccdUemiVlwdUdRSjRw421iPHt356o7bccWV1wGwZMkSFi58p8KlsqZsuulAHn30Kd5//wOWLVvGAw9O5tt7D2XQwA154MHJANw96UG+/e09KlzSyqsrcqlGbR44JR3a1tdsTzbY4DO8+eZbXH7Z+Tz26EQu+dN5rL56l0oXy5rw/PNT2XHH7ejVqydduqzG0N13pl+/9ZgyZTp77bUbAPt895v077dehUtaeVHkv2pUiRrnrypwzXajU8eODB78eS65ZCxf2nY3Fi9+jxN+fmSli2VNmDp1JueddxF3TPgLE27/M08/8zzLltVx2MifccQPh/PI5Dvo3r0rH320pNJFrbhaqnGWZVRd0r+b2gX0bea4kcBIAHVcgw4dupahdJU1e85cZs+ey6OPPQXAzTf/nZ//rwNne3blVddz5VXXA3DmqBOZPXsu06a9yNA9DwRg4MAN2WPoLpUsYrtQrbXHYpTrcaS+wG7AggbpAv7V1EERMQYYA9Cp8/or5U/h9dffYPbs/zJo0EZMn/4iO++8Iy+8ML3SxbJmrLVWb9544y3691+Pvfceyld2/NbyNEmc9IujuWTMNZUuZsVVa+2xGOUKnLcD3SLi6YY7JN1XpmtWjaOPPYWxV/+ezp1X4aWX/sOIw35W6SJZM/56w6X06t2TJUuWctRRJ7Nw4Tv89MgRHHHEDwC49dYJXHX1DZUtZDtQFytlXadRinZ6sytrjdOsvVv60ZyiPq/y4M9+p6jf2WteubnqPh/Tbw6ZWUnUUk3HgdPMSqJaH2YvhgOnmZWER9XNzHLyqLqZWU5uqpuZ5eSmuplZTrXUVPfsSGZWEhFR1NISSVdImifpuYK0XpLukjQjfe2Z0iVptKSZkv4taauCY4an/DMkDS9I31rSs+mY0ZJafK7UgdPMSqKM83FeBezeIO1EYFJEDAQmpW2AocDAtIwELoYs0AKnAdsB2wKn1QfblOfwguMaXusTHDjNrCTKNTtSRDwAzG+QPAy4Oq1fDexdkD42MpOBNSWtSzZ3xl0RMT8iFgB3AbunfT0iYnJk1d+xBedqkgOnmZVEsfNxShop6fGCZWQrLtc3Iuam9df4eNa19YFXC/LNTmnNpc9uJL1ZHhwys5Io9nGkwlnRijw+JLXpkL5LByQsAAAGfElEQVRrnGZWEuUaHGrC66mZTfo6L6XPAfoX5OuX0ppL79dIerMcOM2sJNp4BvjxQP3I+HDgtoL0Q9Lo+vbAwtSknwgMkdQzDQoNASamfe9I2j6Nph9ScK4mualuZiVRrgfgJV0H7AT0kTSbbHT8HGCcpBHAK8B+KfsEYA9gJvAecChARMyXNAp4LOU7IyLqB5x+TDZy3wW4Iy3Nl8nzcZpZoWLn49y1/25F/c7e/erEqpuP0011M7Oc3FQ3s5Jor63XcnDgNLOS8OxIZmY5eXYkM7OcaulTLh04zawkaidsOnCaWYm4j9PMLCcHTjOznPw4kplZTq5xmpnl5MeRzMxyclPdzCwnN9XNzHJyjdPMLCfXOM3McvLgkJlZTrX0rronMjYzy8k1TjMrCTfVzcxyqqWmugOnmZWEa5xmZjm5xmlmlpNrnGZmObnGaWaWk2ucZmY5RdRVughtxoHTzErC76qbmeXk2ZHMzHJyjdPMLCfXOM3McvLjSGZmOflxJDOznNxUNzPLyYNDZmY51VKN0zPAm5nl5BqnmZWER9XNzHKqpaa6A6eZlYQHh8zMcnKN08wsJ/dxmpnl5DeHzMxyco3TzCynWurj9APwZlYSUeS/1pC0u6RpkmZKOrHMt9Ii1zjNrCTKVeOU1BG4CPgGMBt4TNL4iJhSlgu2ggOnmZVEGZvq2wIzI2IWgKTrgWFAxQKnm+pmVhJR5NIK6wOvFmzPTmkV025rnEs/mqNKl6GcJI2MiDGVLocVxz+/Tyr2d1bSSGBkQdKY9v69dY2zcka2nMXaMf/8SiQixkTENgVLw6A5B+hfsN0vpVWMA6eZtXePAQMlbSCpM/A9YHwlC9Rum+pmZgARsVTSkcBEoCNwRUQ8X8kyOXBWTrvuw7EW+efXhiJiAjCh0uWop1p62t/MrBTcx2lmlpMDZwW0t9fHrPUkXSFpnqTnKl0WqxwHzjZW8PrYUGAz4ABJm1W2VJbDVcDulS6EVZYDZ9tb/vpYRHwE1L8+ZlUgIh4A5le6HFZZDpxtr929PmZm+Thwmpnl5MDZ9trd62Nmlo8DZ9trd6+PmVk+DpxtLCKWAvWvj70AjKv062PWepKuAx4GNpE0W9KISpfJ2p7fHDIzy8k1TjOznBw4zcxycuA0M8vJgdPMLCcHTjOznBw4VwKSlkl6WtJzkv4qafVPca6dJN2e1vdqbvYmSWtK+nER1zhd0vGtTW+Q5ypJ++S41gDPZGSl5sC5cng/IraMiC2Aj4AfFe5UJvfPOiLGR8Q5zWRZE8gdOM2qnQPnyudBYONU05omaSzwHNBf0hBJD0t6MtVMu8Hy+UGnSnoS+E79iST9QNIf0npfSbdIeiYtOwDnABul2u55Kd//SnpM0r8l/argXCdLmi7pIWCTlm5C0uHpPM9IuqlBLXpXSY+n830z5e8o6byCa//w034jzZriwLkSkdSJbJ7PZ1PSQOCPEbE5sBj4JbBrRGwFPA78TNJqwKXAt4CtgXWaOP1o4P6I+CKwFfA8cCLwYqrt/q+kIema2wJbAltL+pqkrcleLd0S2AP4Uitu5+aI+FK63gtA4Rs6A9I19gT+lO5hBLAwIr6Uzn+4pA1acR2z3PxhbSuHLpKeTusPApcD6wGvRMTklL492cTJ/5QE0Jns1cFNgZciYgaApGtp/DPDdwYOAYiIZcBCST0b5BmSlqfSdjeyQNoduCUi3kvXaM27+VtIOpOsO6Ab2Suq9cZFRB0wQ9KsdA9DgC8U9H+uka49vRXXMsvFgXPl8H5EbFmYkILj4sIk4K6IOKBBvhWO+5QEnB0RlzS4xjFFnOsqYO+IeEbSD4CdCvY1fE840rV/GhGFARZJA4q4tlmz3FSvHZOBr0jaGEBSV0mDgKnAAEkbpXwHNHH8JOCIdGxHSWsA75LVJutNBP6noO90fUlrAw8Ae0vqIqk7WbdAS7oDcyWtAhzUYN++kjqkMm8ITEvXPiLlR9IgSV1bcR2z3FzjrBER8UaquV0nadWU/MuImC5pJPB3Se+RNfW7N3KKo4ExaTagZcAREfGwpH+mx33uSP2cnwMeTjXeRcD3I+JJSTcAzwDzyKbWa8kpwCPAG+lrYZn+AzwK9AB+FBEfSLqMrO/zSWUXfwPYu3XfHbN8PDuSmVlObqqbmeXkwGlmlpMDp5lZTg6cZmY5OXCameXkwGlmlpMDp5lZTg6cZmY5/X80Ap3GNpPMAgAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "resampled_results = resampled_model.evaluate(\n", - " test_features, test_labels, batch_size=BATCH_SIZE, verbose=0\n", - ")\n", - "for name, value in zip(resampled_model.metrics_names, resampled_results):\n", - " print(name, \": \", value)\n", - "print()\n", - "\n", - "plot_cm(test_labels, test_predictions_resampled)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "_xYozM1IIITq" - }, - "source": [ - "### Plot the ROC" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "fye_CiuYrZ1U" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_roc(\n", - " \"Train Baseline\", train_labels, train_predictions_baseline, color=colors[0]\n", - ")\n", - "plot_roc(\n", - " \"Test Baseline\",\n", - " test_labels,\n", - " test_predictions_baseline,\n", - " color=colors[0],\n", - " linestyle=\"--\",\n", - ")\n", - "\n", - "plot_roc(\n", - " \"Train Weighted\", train_labels, train_predictions_weighted, color=colors[1]\n", - ")\n", - "plot_roc(\n", - " \"Test Weighted\",\n", - " test_labels,\n", - " test_predictions_weighted,\n", - " color=colors[1],\n", - " linestyle=\"--\",\n", - ")\n", - "\n", - "plot_roc(\n", - " \"Train Resampled\",\n", - " train_labels,\n", - " train_predictions_resampled,\n", - " color=colors[2],\n", - ")\n", - "plot_roc(\n", - " \"Test Resampled\",\n", - " test_labels,\n", - " test_predictions_resampled,\n", - " color=colors[2],\n", - " linestyle=\"--\",\n", - ")\n", - "plt.legend(loc=\"lower right\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "3o3f0ywl8uqW" - }, - "source": [ - "## Applying this tutorial to your problem\n", - "\n", - "Imbalanced data classification is an inherantly difficult task since there are so few samples to learn from. You should always start with the data first and do your best to collect as many samples as possible and give substantial thought to what features may be relevant so the model can get the most out of your minority class. At some point your model may struggle to improve and yield the results you want, so it is important to keep in mind the context of your problem and the trade offs between different types of errors." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "imbalanced_data.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/labs/adv_tfdv_facets.ipynb b/notebooks/introduction_to_tensorflow/labs/adv_tfdv_facets.ipynb deleted file mode 100644 index 6667d22e..00000000 --- a/notebooks/introduction_to_tensorflow/labs/adv_tfdv_facets.ipynb +++ /dev/null @@ -1,595 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "HnU0fNSuG2aD" - }, - "source": [ - "# Lab: Feature Analysis Using TensorFlow Data Validation and Facets" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "iVkPBosnIFlu" - }, - "source": [ - "**Learning Objectives:**\n", - "1. Use TFRecords to load record-oriented binary format data\n", - "2. Use TFDV to generate statistics and Facets to visualize the data\n", - "3. Use the TFDV widget to answer questions\n", - "4. Analyze label distribution for subset groups \n", - " \n", - "\n", - "## Introduction \n", - "\n", - "Bias can manifest in any part of a typical machine learning pipeline, from an unrepresentative dataset, to learned model representations, to the way in which the results are presented to the user. Errors that result from this bias can disproportionately impact some users more than others.\n", - "\n", - "[TensorFlow Data Validation](https://www.tensorflow.org/tfx/data_validation/get_started) (TFDV) is one tool you can use to analyze your data to find potential problems in your data, such as missing values and data imbalances - that can lead to Fairness disparities. The TFDV tool analyzes training and serving data to compute descriptive statistics, infer a schema, and detect data anomalies. [Facets Overview](https://pair-code.github.io/facets/) provides a succinct visualization of these statistics for easy browsing. Both the TFDV and Facets are tools that are part of the [Fairness Indicators](https://www.tensorflow.org/tfx/fairness_indicators).\n", - "\n", - "In this notebook, we use TFDV to compute descriptive statistics that provide a quick overview of the data in terms of the features that are present and the shapes of their value distributions. We use Facets Overview to visualize these statistics using the Civil Comments dataset. \n", - "\n", - "Each learning objective will correspond to a __#TODO__ in this student lab notebook -- try to complete this notebook first and then review the [solution notebook](../solutions/adv_tfdv_facets.ipynb). \n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Set up environment variables and load necessary libraries \n", - "We will start by importing the necessary dependencies for the libraries we'll be using in this exercise. First, run the cell below to install Fairness Indicators. \n", - "\n", - "**NOTE:** You can ignore the \"pip\" being invoked by an old script wrapper, as it will not affect the lab's functionality.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip3 install fairness-indicators --user" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Restart the kernel after you do a pip3 install (click on the Restart the kernel button above)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "mdLlKWbIlxYH" - }, - "source": [ - "Next, import all the dependencies we'll use in this exercise, which include Fairness Indicators, TensorFlow Data Validation (tfdv), and the What-If tool (WIT) Facets Overview." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "6E__x2XkJDFW" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2.0.0\n", - "\n" - ] - } - ], - "source": [ - "import os\n", - "\n", - "# %tensorflow_version 2.x\n", - "import sys\n", - "import warnings\n", - "\n", - "warnings.filterwarnings(\"ignore\")\n", - "# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Ignore deprecation warnings\n", - "import tempfile\n", - "import warnings\n", - "from datetime import datetime\n", - "\n", - "import apache_beam as beam\n", - "import numpy as np\n", - "import pandas as pd\n", - "import tensorflow as tf\n", - "import tensorflow_data_validation as tfdv\n", - "import tensorflow_hub as hub\n", - "import tensorflow_model_analysis as tfma\n", - "from fairness_indicators.examples import util\n", - "from tensorflow_model_analysis.addons.fairness.post_export_metrics import (\n", - " fairness_indicators,\n", - ")\n", - "from tensorflow_model_analysis.addons.fairness.view import widget_view\n", - "\n", - "warnings.filterwarnings(\"ignore\")\n", - "\n", - "from witwidget.notebook.visualization import WitConfigBuilder, WitWidget\n", - "\n", - "print(tf.version.VERSION)\n", - "print(\n", - " tf\n", - ") # This statement shows us what version of Python we are currently running." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "J3R2QWkru1WN" - }, - "source": [ - "### About the Civil Comments dataset\n", - "\n", - "Click below to learn more about the Civil Comments dataset, and how we've preprocessed it for this exercise." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ZZswcJJMCDjU" - }, - "source": [ - "The Civil Comments dataset comprises approximately 2 million public comments that were submitted to the Civil Comments platform. [Jigsaw](https://jigsaw.google.com/) sponsored the effort to compile and annotate these comments for ongoing [research](https://arxiv.org/abs/1903.04561); they've also hosted competitions on [Kaggle](https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification) to help classify toxic comments as well as minimize unintended model bias. \n", - "\n", - "#### Features\n", - "\n", - "Within the Civil Comments data, a subset of comments are tagged with a variety of identity attributes pertaining to gender, sexual orientation, religion, race, and ethnicity. Each identity annotation column contains a value that represents the percentage of annotators who categorized a comment as containing references to that identity. Multiple identities may be present in a comment.\n", - "\n", - "**NOTE:** These identity attributes are intended *for evaluation purposes only*, to assess how well a classifier trained solely on the comment text performs on different tag sets.\n", - "\n", - "To collect these identity labels, each comment was reviewed by up to 10 annotators, who were asked to indicate all identities that were mentioned in the comment. For example, annotators were posed the question: \"What genders are mentioned in the comment?\", and asked to choose all of the following categories that were applicable.\n", - "\n", - "* Male\n", - "* Female\n", - "* Transgender\n", - "* Other gender\n", - "* No gender mentioned\n", - "\n", - "**NOTE:** *We recognize the limitations of the categories used in the original dataset, and acknowledge that these terms do not encompass the full range of vocabulary used in describing gender.*\n", - "\n", - "Jigsaw used these ratings to generate an aggregate score for each identity attribute representing the percentage of raters who said the identity was mentioned in the comment. For example, if 10 annotators reviewed a comment, and 6 said that the comment mentioned the identity \"female\" and 0 said that the comment mentioned the identity \"male,\" the comment would receive a `female` score of `0.6` and a `male` score of `0.0`.\n", - "\n", - "**NOTE:** For the purposes of annotation, a comment was considered to \"mention\" gender if it contained a comment about gender issues (e.g., a discussion about feminism, wage gap between men and women, transgender rights, etc.), gendered language, or gendered insults. Use of \"he,\" \"she,\" or gendered names (e.g., Donald, Margaret) did not require a gender label. \n", - "\n", - "#### Label\n", - "\n", - "Each comment was rated by up to 10 annotators for toxicity, who each classified it with one of the following ratings.\n", - "\n", - "* Very Toxic\n", - "* Toxic\n", - "* Hard to Say\n", - "* Not Toxic\n", - "\n", - "Again, Jigsaw used these ratings to generate an aggregate toxicity \"score\" for each comment (ranging from `0.0` to `1.0`) to serve as the [label](https://developers.google.com/machine-learning/glossary?utm_source=Colab&utm_medium=fi-colab&utm_campaign=fi-practicum&utm_content=glossary&utm_term=label#label), representing the fraction of annotators who labeled the comment either \"Very Toxic\" or \"Toxic.\" For example, if 10 annotators rated a comment, and 3 of them labeled it \"Very Toxic\" and 5 of them labeled it \"Toxic\", the comment would receive a toxicity score of `0.8`.\n", - "\n", - "**NOTE:** For more information on the Civil Comments labeling schema, see the [Data](https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification/data) section of the Jigsaw Untended Bias in Toxicity Classification Kaggle competition.\n", - "\n", - "### Preprocessing the data\n", - "For the purposes of this exercise, we converted toxicity and identity columns to booleans in order to work with our neural net and metrics calculations. In the preprocessed dataset, we considered any value ≥ 0.5 as True (i.e., a comment is considered toxic if 50% or more crowd raters labeled it as toxic).\n", - "\n", - "For identity labels, the threshold 0.5 was chosen and the identities were grouped together by their categories. For example, if one comment has `{ male: 0.3, female: 1.0, transgender: 0.0, heterosexual: 0.8, homosexual_gay_or_lesbian: 1.0 }`, after processing, the data will be `{ gender: [female], sexual_orientation: [heterosexual, homosexual_gay_or_lesbian] }`.\n", - "\n", - "**NOTE:** Missing identity fields were converted to False.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "0YNqAJW5JjZD" - }, - "source": [ - "### Use TFRecords to load record-oriented binary format data\n", - "\n", - "\n", - "\n", - "\n", - "-------------------------------------------------------------------------------------------------------\n", - "\n", - "The [TFRecord format](https://www.tensorflow.org/tutorials/load_data/tfrecord) is a simple [Protobuf](https://developers.google.com/protocol-buffers)-based format for storing a sequence of binary records. It gives you and your machine learning models to handle arbitrarily large datasets over the network because it:\n", - "1. Splits up large files into 100-200MB chunks\n", - "2. Stores the results as serialized binary messages for faster ingestion\n", - "\n", - "If you already have a dataset in TFRecord format, you can use the tf.keras.utils functions for accessing the data (as you will below!). If you want to practice creating your own TFRecord datasets you can do so outside of this lab by [viewing the documentation here](https://www.tensorflow.org/tutorials/load_data/tfrecord). \n", - "\n", - "#### TODO 1: Use the utility functions tf.keras to download and import our datasets\n", - "Run the following cell to download and import the training and validation preprocessed datasets. " - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "duPWGTQAvYKK" - }, - "outputs": [], - "source": [ - "download_original_data = False # @param {type:\"boolean\"}\n", - "\n", - "\n", - "# TODO 1\n", - "\n", - "# TODO: Your code goes here\n", - "\n", - "# The identity terms list will be grouped together by their categories\n", - "# (see 'IDENTITY_COLUMNS') on threshould 0.5. Only the identity term column,\n", - "# text column and label column will be kept after processing.\n", - "# TODO: Your code goes here\n", - "\n", - "# TODO 1a\n", - "\n", - "# TODO: Your code goes here" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "aLup7wY0_Q3K" - }, - "source": [ - "### Use TFDV to generate statistics and Facets to visualize the data\n", - " \n", - "\n", - "TensorFlow Data Validation supports data stored in a TFRecord file, a CSV input format, with extensibility for other common formats. You can find the available data decoders [here](https://github.com/tensorflow/data-validation/tree/master/tensorflow_data_validation/coders). In addition, TFDV provides the [tfdv.generate_statistics_from_dataframe](https://www.tensorflow.org/tfx/data_validation/api_docs/python/tfdv/generate_statistics_from_dataframe) utility function for users with in-memory data represented as a pandas DataFrame.\n", - "\n", - "In addition to computing a default set of data statistics, TFDV can also compute statistics for semantic domains (e.g., images, text). To enable computation of semantic domain statistics, pass a tfdv.StatsOptions object with enable_semantic_domain_stats set to True to tfdv.generate_statistics_from_tfrecord.Before we train the model, let's do a quick audit of our training data using [TensorFlow Data Validation](https://www.tensorflow.org/tfx/data_validation/get_started), so we can better understand our data distribution. \n", - "\n", - "#### TODO 2: Use TFDV to get quick statistics on your dataset\n", - "\n", - "The following cell may take 2–3 minutes to run. **NOTE:** Please ignore the deprecation warnings. " - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "vkzcE_g8_m_h" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# TODO 2\n", - "\n", - "# The computation of statistics using TFDV. The returned value is a DatasetFeatureStatisticsList protocol buffer.\n", - "# TODO: Your code goes here\n", - "\n", - "# TODO 2a\n", - "\n", - "# A visualization of the statistics using Facets Overview.\n", - "# TODO: Your code goes here" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "wZU1Djze6E-s" - }, - "source": [ - "### TODO 3: Use the TensorFlow Data Validation widget above to answer the following questions." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ne2_vKAb-XGD" - }, - "source": [ - "#### **1. How many total examples are in the training dataset?**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "UFBqqnRD-Zkj" - }, - "source": [ - "#### Solution\n", - "\n", - "See below solution.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "XSkOfchI-arC" - }, - "source": [ - "**There are 1.08 million total examples in the training dataset.**\n", - "\n", - " The count column tells us how many examples there are for a given feature. Each feature (`sexual_orientation`, `comment_text`, `gender`, etc.) has 1.08 million examples. The missing column tells us what percentage of examples are missing that feature. \n", - "\n", - "![Screenshot of first row of Categorical Features table in the TFDV widget, with 1.08 million count of examples and 0% missing examples highlighted](https://developers.google.com/machine-learning/practica/fairness-indicators/colab-images/tfdv_screenshot_exercise1.png) \n", - " \n", - "Each feature is missing from 0% of examples, so we know that the per-feature example count of 1.08 million is also the total number of examples in the dataset." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "_PgFNm6sAZB2" - }, - "source": [ - "#### **2. How many unique values are there for the `gender` feature? What are they, and what are the frequencies of each of these values?**\n", - "\n", - "**NOTE #1:** `gender` and the other identity features (`sexual_orientation`, `religion`, `disability`, and `race`) are included in this dataset for evaluation purposes only, so we can assess model performance on different identity slices. The only feature we will use for model training is `comment_text`.\n", - "\n", - "**NOTE #2:** *We recognize the limitations of the categories used in the original dataset, and acknowledge that these terms do not encompass the full range of vocabulary used in describing gender.*" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "6KmrCS-uAz0s" - }, - "source": [ - "#### Solution\n", - "\n", - "See below solution." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "wkc7P1nvA4cw" - }, - "source": [ - "The **unique** column of the **Categorical Features** table tells us that there are 4 unique values for the `gender` feature.\n", - "\n", - "To view the 4 values and their frequencies, we can click on the **SHOW RAW DATA** button:\n", - "\n", - "![\"gender\" row of the \"Categorical Data\" table in the TFDV widget, with raw data highlighted.](https://developers.google.com/machine-learning/practica/fairness-indicators/colab-images/tfdv_screenshot_exercise2.png)\n", - "\n", - "The raw data table shows that there are 32,208 examples with a gender value of `female`, 26,758 examples with a value of `male`, 1,551 examples with a value of `transgender`, and 4 examples with a value of `other gender`.\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "NDUO57bdNUQR" - }, - "source": [ - "**NOTE:** As described [earlier](#scrollTo=J3R2QWkru1WN), a `gender` feature can contain zero or more of these 4 values, depending on the content of the comment. For example, a comment containing the text \"I am a transgender man\" will have both `transgender` and `male` as `gender` values, whereas a comment that does not reference gender at all will have an empty/false `gender` value." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "wX62Ktwp-qoF" - }, - "source": [ - "#### **3. What percentage of total examples are labeled toxic? Overall, is this a class-balanced dataset (relatively even split of examples between positive and negative classes) or a class-imbalanced dataset (majority of examples are in one class)?**\n", - "\n", - "**NOTE:** In this dataset, a `toxicity` value of `0` signifies \"not toxic,\" and a `toxicity` value of `1` signifies \"toxic.\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "IvvxNMgM-6A2" - }, - "source": [ - "#### Solution\n", - "\n", - "See below solution." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "QmCtkzZqOvC2" - }, - "source": [ - "**7.98 percent of examples are toxic.**\n", - "\n", - "Under **Numeric Features**, we can see the distribution of values for the `toxicity` feature. 92.02% of examples have a value of 0 (which signifies \"non-toxic\"), so 7.98% of examples are toxic.\n", - "\n", - "![Screenshot of the \"toxicity\" row in the Numeric Features table in the TFDV widget, highlighting the \"zeros\" column showing that 92.01% of examples have a toxicity value of 0.](https://developers.google.com/machine-learning/practica/fairness-indicators/colab-images/tfdv_screenshot_exercise3.png)\n", - "\n", - "This is a [**class-imbalanced dataset**](https://developers.google.com/machine-learning/glossary?utm_source=Colab&utm_medium=fi-colab&utm_campaign=fi-practicum&utm_content=glossary&utm_term=class-imbalanced-dataset#class-imbalanced-dataset), as the overwhelming majority of examples (over 90%) are classified as nontoxic." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Notice that there is one numeric feature (count of toxic comments) and six categorical features." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### TODO 4: Analyze label distribution for subset groups " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "9MGLCsVhGWz0" - }, - "source": [ - "Run the following code to analyze label distribution for the subset of examples that contain a `gender` value**\n", - "\n", - "\n", - "**NOTE:** *The cell run should for just a few minutes*" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "cellView": "form", - "colab": {}, - "colab_type": "code", - "id": "f5pEWIkgLTKz" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Toxic Gender Examples: 7189\n", - "Nontoxic Gender Examples: 41572\n" - ] - } - ], - "source": [ - "# @title Calculate label distribution for gender-related examples\n", - "raw_dataset = tf.data.TFRecordDataset(train_tf_file)\n", - "\n", - "toxic_gender_examples = 0\n", - "nontoxic_gender_examples = 0\n", - "\n", - "# TODO 4\n", - "\n", - "# There are 1,082,924 examples in the dataset\n", - "# TODO: Your code goes here\n", - "\n", - "# TODO 4a\n", - "\n", - "# TODO: Your code goes here" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "WJag4cEKNINy" - }, - "source": [ - "#### **What percentage of `gender` examples are labeled toxic? Compare this percentage to the percentage of total examples that are labeled toxic from #3 above. What, if any, fairness concerns can you identify based on this comparison?**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "-J4hbOhgHZid" - }, - "source": [ - "#### Solution\n", - "\n", - "Click below for one possible solution." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2KK3VWzkHmJ7" - }, - "source": [ - "There are 7,189 gender-related examples that are labeled toxic, which represent 14.7% of all gender-related examples.\n", - "\n", - "The percentage of gender-related examples that are toxic (14.7%) is nearly double the percentage of toxic examples overall (7.98%). In other words, in our dataset, gender-related comments are almost two times more likely than comments overall to be labeled as toxic.\n", - "\n", - "This skew suggests that a model trained on this dataset might learn a correlation between gender-related content and toxicity. This raises fairness considerations, as the model might be more likely to classify nontoxic comments as toxic if they contain gender terminology, which could lead to [disparate impact](https://developers.google.com/machine-learning/glossary?utm_source=Colab&utm_medium=fi-colab&utm_campaign=fi-practicum&utm_content=glossary&utm_term=disparate-impact#disparate-impact) for gender subgroups. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright 2020 Google Inc.\n", - "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", - "http://www.apache.org/licenses/LICENSE-2.0\n", - "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "J3R2QWkru1WN", - "UFBqqnRD-Zkj", - "6KmrCS-uAz0s", - "IvvxNMgM-6A2", - "-J4hbOhgHZid", - "tGyACRd8oFwP", - "FQGWSdrJy08B", - "LlkfgynX0yfF", - "FBhBsevUOinO", - "P5MBQR7EF6ny", - "OaL3qgHCcmwG" - ], - "name": "Copy of Fairness Exercise 1: Explore the Model", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/labs/basic_intro_logistic_regression.ipynb b/notebooks/introduction_to_tensorflow/labs/basic_intro_logistic_regression.ipynb deleted file mode 100644 index 8c6c9dc3..00000000 --- a/notebooks/introduction_to_tensorflow/labs/basic_intro_logistic_regression.ipynb +++ /dev/null @@ -1,1104 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "03_int_logistic_regression (2).ipynb", - "provenance": [], - "private_outputs": true, - "collapsed_sections": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.3" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "LDrzLFXE8T1l" - }, - "source": [ - "# Basic Introduction to Logistic Regression\n", - "\n", - "## Learning Objectives\n", - "\n", - "1. Build a model\n", - "2. Train this model on example data\n", - "3. Use the model to make predictions about unknown data\n", - "\n", - "## Introduction\n", - "\n", - "In this notebook, you use machine learning to *categorize* Iris flowers by species. It uses TensorFlow to:\n", - "\n", - "* Use TensorFlow's default eager execution development environment\n", - "* Import data with the Datasets API\n", - "* Build models and layers with TensorFlow's Keras API\n", - "\n", - "Here firstly we will Import and parse the dataset, then select the type of model. After that Train the model.\n", - "\n", - "At last we will Evaluate the model's effectiveness and then use the trained model to make predictions.\n", - "\n", - "Each learning objective will correspond to a _#TODO_ in this student lab notebook -- try to complete this notebook first and then review the [solution notebook](https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/introduction_to_tensorflow/solutions/basic_intro_logistic_regression.ipynb)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!sudo chown -R jupyter:jupyter /home/jupyter/training-data-analyst" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Ensure the right version of Tensorflow is installed.\n", - "!pip freeze | grep tensorflow==2.1 || pip install tensorflow==2.1" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "1J3AuPBT9gyR" - }, - "source": [ - "### Configure imports\n", - "\n", - "Import TensorFlow and the other required Python modules. By default, TensorFlow uses eager execution to evaluate operations immediately, returning concrete values instead of creating a computational graph that is executed later. If you are used to a REPL or the `python` interactive console, this feels familiar." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "jElLULrDhQZR", - "colab": {} - }, - "source": [ - "import os\n", - "\n", - "import matplotlib.pyplot as plt" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "bfV2Dai0Ow2o", - "colab": {} - }, - "source": [ - "import tensorflow as tf" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "g4Wzg69bnwK2", - "colab": {} - }, - "source": [ - "print(f\"TensorFlow version: {tf.__version__}\")\n", - "print(f\"Eager execution: {tf.executing_eagerly()}\")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Zx7wc0LuuxaJ" - }, - "source": [ - "## The Iris classification problem\n", - "\n", - "Imagine you are a botanist seeking an automated way to categorize each Iris flower you find. Machine learning provides many algorithms to classify flowers statistically. For instance, a sophisticated machine learning program could classify flowers based on photographs. Our ambitions are more modest—we're going to classify Iris flowers based on the length and width measurements of their [sepals](https://en.wikipedia.org/wiki/Sepal) and [petals](https://en.wikipedia.org/wiki/Petal).\n", - "\n", - "The Iris genus entails about 300 species, but our program will only classify the following three:\n", - "\n", - "* Iris setosa\n", - "* Iris virginica\n", - "* Iris versicolor\n", - "\n", - "\n", - " \n", - " \n", - "
\n", - " \"Petal\n", - "
\n", - " Figure 1. Iris setosa (by Radomil, CC BY-SA 3.0), Iris versicolor, (by Dlanglois, CC BY-SA 3.0), and Iris virginica (by Frank Mayfield, CC BY-SA 2.0).
 \n", - "
\n", - "\n", - "Fortunately, someone has already created a [dataset of 120 Iris flowers](https://en.wikipedia.org/wiki/Iris_flower_data_set) with the sepal and petal measurements. This is a classic dataset that is popular for beginner machine learning classification problems." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "3Px6KAg0Jowz" - }, - "source": [ - "## Import and parse the training dataset\n", - "\n", - "Download the dataset file and convert it into a structure that can be used by this Python program.\n", - "\n", - "### Download the dataset\n", - "\n", - "Download the training dataset file using the `tf.keras.utils.get_file` function. This returns the file path of the downloaded file:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "J6c7uEU9rjRM", - "colab": {} - }, - "source": [ - "train_dataset_url = \"https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv\"\n", - "\n", - "train_dataset_fp = tf.keras.utils.get_file(\n", - " fname=os.path.basename(train_dataset_url), origin=train_dataset_url\n", - ")\n", - "\n", - "print(f\"Local copy of the dataset file: {train_dataset_fp}\")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "qnX1-aLors4S" - }, - "source": [ - "### Inspect the data\n", - "\n", - "This dataset, `iris_training.csv`, is a plain text file that stores tabular data formatted as comma-separated values (CSV). Use the `head -n5` command to take a peek at the first five entries:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "FQvb_JYdrpPm", - "colab": {} - }, - "source": [ - "!head -n5 {train_dataset_fp}" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "kQhzD6P-uBoq" - }, - "source": [ - "From this view of the dataset, notice the following:\n", - "\n", - "1. The first line is a header containing information about the dataset:\n", - " * There are 120 total examples. Each example has four features and one of three possible label names.\n", - "2. Subsequent rows are data records, one [example](https://developers.google.com/machine-learning/glossary/#example) per line, where:\n", - " * The first four fields are [features](https://developers.google.com/machine-learning/glossary/#feature): these are the characteristics of an example. Here, the fields hold float numbers representing flower measurements.\n", - " * The last column is the [label](https://developers.google.com/machine-learning/glossary/#label): this is the value we want to predict. For this dataset, it's an integer value of 0, 1, or 2 that corresponds to a flower name.\n", - "\n", - "Let's write that out in code:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "9Edhevw7exl6", - "colab": {} - }, - "source": [ - "# column order in CSV file\n", - "column_names = [\n", - " \"sepal_length\",\n", - " \"sepal_width\",\n", - " \"petal_length\",\n", - " \"petal_width\",\n", - " \"species\",\n", - "]\n", - "\n", - "feature_names = column_names[:-1]\n", - "label_name = column_names[-1]\n", - "\n", - "print(f\"Features: {feature_names}\")\n", - "print(f\"Label: {label_name}\")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "CCtwLoJhhDNc" - }, - "source": [ - "Each label is associated with string name (for example, \"setosa\"), but machine learning typically relies on numeric values. The label numbers are mapped to a named representation, such as:\n", - "\n", - "* `0`: Iris setosa\n", - "* `1`: Iris versicolor\n", - "* `2`: Iris virginica\n", - "\n", - "For more information about features and labels, see the [ML Terminology section of the Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/framing/ml-terminology)." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "sVNlJlUOhkoX", - "colab": {} - }, - "source": [ - "class_names = [\"Iris setosa\", \"Iris versicolor\", \"Iris virginica\"]" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "dqPkQExM2Pwt" - }, - "source": [ - "### Create a `tf.data.Dataset`\n", - "\n", - "TensorFlow's Dataset API handles many common cases for loading data into a model. This is a high-level API for reading data and transforming it into a form used for training.\n", - "\n", - "\n", - "Since the dataset is a CSV-formatted text file, use the `tf.data.experimental.make_csv_dataset` function to parse the data into a suitable format. Since this function generates data for training models, the default behavior is to shuffle the data (`shuffle=True, shuffle_buffer_size=10000`), and repeat the dataset forever (`num_epochs=None`). We also set the [batch_size](https://developers.google.com/machine-learning/glossary/#batch_size) parameter:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "WsxHnz1ebJ2S", - "colab": {} - }, - "source": [ - "batch_size = 32\n", - "\n", - "train_dataset = tf.data.experimental.make_csv_dataset(\n", - " train_dataset_fp,\n", - " batch_size,\n", - " column_names=column_names,\n", - " label_name=label_name,\n", - " num_epochs=1,\n", - ")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "gB_RSn62c-3G" - }, - "source": [ - "The `make_csv_dataset` function returns a `tf.data.Dataset` of `(features, label)` pairs, where `features` is a dictionary: `{'feature_name': value}`\n", - "\n", - "These `Dataset` objects are iterable. Let's look at a batch of features:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "iDuG94H-C122", - "colab": {} - }, - "source": [ - "features, labels = next(iter(train_dataset))\n", - "\n", - "print(features)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "E63mArnQaAGz" - }, - "source": [ - "Notice that like-features are grouped together, or *batched*. Each example row's fields are appended to the corresponding feature array. Change the `batch_size` to set the number of examples stored in these feature arrays.\n", - "\n", - "You can start to see some clusters by plotting a few features from the batch:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "me5Wn-9FcyyO", - "colab": {} - }, - "source": [ - "plt.scatter(\n", - " features[\"petal_length\"], features[\"sepal_length\"], c=labels, cmap=\"viridis\"\n", - ")\n", - "\n", - "plt.xlabel(\"Petal length\")\n", - "plt.ylabel(\"Sepal length\")\n", - "plt.show()" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "YlxpSyHlhT6M" - }, - "source": [ - "To simplify the model building step, create a function to repackage the features dictionary into a single array with shape: `(batch_size, num_features)`.\n", - "\n", - "This function uses the `tf.stack` method which takes values from a list of tensors and creates a combined tensor at the specified dimension:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "jm932WINcaGU", - "colab": {} - }, - "source": [ - "def pack_features_vector(features, labels):\n", - " \"\"\"Pack the features into a single array.\"\"\"\n", - " features = tf.stack(list(features.values()), axis=1)\n", - " return features, labels" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "V1Vuph_eDl8x" - }, - "source": [ - "Then use the `tf.data.Dataset#map` method to pack the `features` of each `(features,label)` pair into the training dataset:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "ZbDkzGZIkpXf", - "colab": {} - }, - "source": [ - "train_dataset = train_dataset.map(pack_features_vector)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "NLy0Q1xCldVO" - }, - "source": [ - "The features element of the `Dataset` are now arrays with shape `(batch_size, num_features)`. Let's look at the first few examples:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "kex9ibEek6Tr", - "colab": {} - }, - "source": [ - "features, labels = next(iter(train_dataset))\n", - "\n", - "print(features[:5])" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "LsaVrtNM3Tx5" - }, - "source": [ - "## Select the type of model\n", - "\n", - "### Why model?\n", - "\n", - "A [model](https://developers.google.com/machine-learning/crash-course/glossary#model) is a relationship between features and the label. For the Iris classification problem, the model defines the relationship between the sepal and petal measurements and the predicted Iris species. Some simple models can be described with a few lines of algebra, but complex machine learning models have a large number of parameters that are difficult to summarize.\n", - "\n", - "Could you determine the relationship between the four features and the Iris species *without* using machine learning? That is, could you use traditional programming techniques (for example, a lot of conditional statements) to create a model? Perhaps—if you analyzed the dataset long enough to determine the relationships between petal and sepal measurements to a particular species. And this becomes difficult—maybe impossible—on more complicated datasets. A good machine learning approach *determines the model for you*. If you feed enough representative examples into the right machine learning model type, the program will figure out the relationships for you.\n", - "\n", - "### Select the model\n", - "\n", - "We need to select the kind of model to train. There are many types of models and picking a good one takes experience. This tutorial uses a neural network to solve the Iris classification problem. [Neural networks](https://developers.google.com/machine-learning/glossary/#neural_network) can find complex relationships between features and the label. It is a highly-structured graph, organized into one or more [hidden layers](https://developers.google.com/machine-learning/glossary/#hidden_layer). Each hidden layer consists of one or more [neurons](https://developers.google.com/machine-learning/glossary/#neuron). There are several categories of neural networks and this program uses a dense, or [fully-connected neural network](https://developers.google.com/machine-learning/glossary/#fully_connected_layer): the neurons in one layer receive input connections from *every* neuron in the previous layer. For example, Figure 2 illustrates a dense neural network consisting of an input layer, two hidden layers, and an output layer:\n", - "\n", - "\n", - " \n", - " \n", - "
\n", - " \n", - "
\n", - " Figure 2. A neural network with features, hidden layers, and predictions.
 \n", - "
\n", - "\n", - "When the model from Figure 2 is trained and fed an unlabeled example, it yields three predictions: the likelihood that this flower is the given Iris species. This prediction is called [inference](https://developers.google.com/machine-learning/crash-course/glossary#inference). For this example, the sum of the output predictions is 1.0. In Figure 2, this prediction breaks down as: `0.02` for *Iris setosa*, `0.95` for *Iris versicolor*, and `0.03` for *Iris virginica*. This means that the model predicts—with 95% probability—that an unlabeled example flower is an *Iris versicolor*." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "W23DIMVPQEBt" - }, - "source": [ - "### Create a model using Keras\n", - "\n", - "The TensorFlow `tf.keras` API is the preferred way to create models and layers. This makes it easy to build models and experiment while Keras handles the complexity of connecting everything together.\n", - "\n", - "The `tf.keras.Sequential` model is a linear stack of layers. Its constructor takes a list of layer instances, in this case, two `tf.keras.layers.Dense` layers with 10 nodes each, and an output layer with 3 nodes representing our label predictions. The first layer's `input_shape` parameter corresponds to the number of features from the dataset, and is required:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Lab Task #1:** Building the model" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "2fZ6oL2ig3ZK", - "colab": {} - }, - "source": [ - "# TODO 1\n", - "# TODO -- Your code here." - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "FHcbEzMpxbHL" - }, - "source": [ - "The [activation function](https://developers.google.com/machine-learning/crash-course/glossary#activation_function) determines the output shape of each node in the layer. These non-linearities are important—without them the model would be equivalent to a single layer. There are many `tf.keras.activations`, but [ReLU](https://developers.google.com/machine-learning/crash-course/glossary#ReLU) is common for hidden layers.\n", - "\n", - "The ideal number of hidden layers and neurons depends on the problem and the dataset. Like many aspects of machine learning, picking the best shape of the neural network requires a mixture of knowledge and experimentation. As a rule of thumb, increasing the number of hidden layers and neurons typically creates a more powerful model, which requires more data to train effectively." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2wFKnhWCpDSS" - }, - "source": [ - "### Using the model\n", - "\n", - "Let's have a quick look at what this model does to a batch of features:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "xe6SQ5NrpB-I", - "colab": {} - }, - "source": [ - "predictions = model(features)\n", - "predictions[:5]" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "wxyXOhwVr5S3" - }, - "source": [ - "Here, each example returns a [logit](https://developers.google.com/machine-learning/crash-course/glossary#logits) for each class.\n", - "\n", - "To convert these logits to a probability for each class, use the [softmax](https://developers.google.com/machine-learning/crash-course/glossary#softmax) function:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "_tRwHZmTNTX2", - "colab": {} - }, - "source": [ - "tf.nn.softmax(predictions[:5])" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "uRZmchElo481" - }, - "source": [ - "Taking the `tf.argmax` across classes gives us the predicted class index. But, the model hasn't been trained yet, so these aren't good predictions:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "-Jzm_GoErz8B", - "colab": {} - }, - "source": [ - "print(f\"Prediction: {tf.argmax(predictions, axis=1)}\")\n", - "print(f\"Labels: {labels}\")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Vzq2E5J2QMtw" - }, - "source": [ - "## Train the model\n", - "\n", - "[Training](https://developers.google.com/machine-learning/crash-course/glossary#training) is the stage of machine learning when the model is gradually optimized, or the model *learns* the dataset. The goal is to learn enough about the structure of the training dataset to make predictions about unseen data. If you learn *too much* about the training dataset, then the predictions only work for the data it has seen and will not be generalizable. This problem is called [overfitting](https://developers.google.com/machine-learning/crash-course/glossary#overfitting)—it's like memorizing the answers instead of understanding how to solve a problem.\n", - "\n", - "The Iris classification problem is an example of [supervised machine learning](https://developers.google.com/machine-learning/glossary/#supervised_machine_learning): the model is trained from examples that contain labels. In [unsupervised machine learning](https://developers.google.com/machine-learning/glossary/#unsupervised_machine_learning), the examples don't contain labels. Instead, the model typically finds patterns among the features." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "RaKp8aEjKX6B" - }, - "source": [ - "### Define the loss and gradient function\n", - "\n", - "Both training and evaluation stages need to calculate the model's [loss](https://developers.google.com/machine-learning/crash-course/glossary#loss). This measures how off a model's predictions are from the desired label, in other words, how bad the model is performing. We want to minimize, or optimize, this value.\n", - "\n", - "Our model will calculate its loss using the `tf.keras.losses.SparseCategoricalCrossentropy` function which takes the model's class probability predictions and the desired label, and returns the average loss across the examples." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Lab Task #2:** Training Model on example data." - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "QOsi6b-1CXIn", - "colab": {} - }, - "source": [ - "loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "tMAT4DcMPwI-", - "colab": {} - }, - "source": [ - "def loss(model, x, y, training):\n", - "# TODO 2\n", - "# TODO -- Your code here." - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "3IcPqA24QM6B" - }, - "source": [ - "Use the `tf.GradientTape` context to calculate the [gradients](https://developers.google.com/machine-learning/crash-course/glossary#gradient) used to optimize your model:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "x57HcKWhKkei", - "colab": {} - }, - "source": [ - "def grad(model, inputs, targets):\n", - " with tf.GradientTape() as tape:\n", - " loss_value = loss(model, inputs, targets, training=True)\n", - " return loss_value, tape.gradient(loss_value, model.trainable_variables)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "lOxFimtlKruu" - }, - "source": [ - "### Create an optimizer\n", - "\n", - "An [optimizer](https://developers.google.com/machine-learning/crash-course/glossary#optimizer) applies the computed gradients to the model's variables to minimize the `loss` function. You can think of the loss function as a curved surface (see Figure 3) and we want to find its lowest point by walking around. The gradients point in the direction of steepest ascent—so we'll travel the opposite way and move down the hill. By iteratively calculating the loss and gradient for each batch, we'll adjust the model during training. Gradually, the model will find the best combination of weights and bias to minimize loss. And the lower the loss, the better the model's predictions.\n", - "\n", - "\n", - " \n", - " \n", - "
\n", - " \"Optimization\n", - "
\n", - " Figure 3. Optimization algorithms visualized over time in 3D space.
(Source: Stanford class CS231n, MIT License, Image credit: Alec Radford)\n", - "
\n", - "\n", - "TensorFlow has many optimization algorithms available for training. This model uses the `tf.keras.optimizers.SGD` that implements the [stochastic gradient descent](https://developers.google.com/machine-learning/crash-course/glossary#gradient_descent) (SGD) algorithm. The `learning_rate` sets the step size to take for each iteration down the hill. This is a *hyperparameter* that you'll commonly adjust to achieve better results." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "XkUd6UiZa_dF" - }, - "source": [ - "Let's setup the optimizer:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "8xxi2NNGKwG_", - "colab": {} - }, - "source": [ - "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "pJVRZ0hP52ZB" - }, - "source": [ - "We'll use this to calculate a single optimization step:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "rxRNTFVe56RG", - "colab": {} - }, - "source": [ - "loss_value, grads = grad(model, features, labels)\n", - "\n", - "print(\n", - " \"Step: {}, Initial Loss: {}\".format(\n", - " optimizer.iterations.numpy(), loss_value.numpy()\n", - " )\n", - ")\n", - "\n", - "optimizer.apply_gradients(zip(grads, model.trainable_variables))\n", - "\n", - "print(\n", - " \"Step: {},Loss: {}\".format(\n", - " optimizer.iterations.numpy(),\n", - " loss(model, features, labels, training=True).numpy(),\n", - " )\n", - ")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "7Y2VSELvwAvW" - }, - "source": [ - "### Training loop\n", - "\n", - "With all the pieces in place, the model is ready for training! A training loop feeds the dataset examples into the model to help it make better predictions. The following code block sets up these training steps:\n", - "\n", - "1. Iterate each *epoch*. An epoch is one pass through the dataset.\n", - "2. Within an epoch, iterate over each example in the training `Dataset` grabbing its *features* (`x`) and *label* (`y`).\n", - "3. Using the example's features, make a prediction and compare it with the label. Measure the inaccuracy of the prediction and use that to calculate the model's loss and gradients.\n", - "4. Use an `optimizer` to update the model's variables.\n", - "5. Keep track of some stats for visualization.\n", - "6. Repeat for each epoch.\n", - "\n", - "The `num_epochs` variable is the number of times to loop over the dataset collection. Counter-intuitively, training a model longer does not guarantee a better model. `num_epochs` is a [hyperparameter](https://developers.google.com/machine-learning/glossary/#hyperparameter) that you can tune. Choosing the right number usually requires both experience and experimentation:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "AIgulGRUhpto", - "colab": {} - }, - "source": [ - "## Note: Rerunning this cell uses the same model variables\n", - "\n", - "# Keep results for plotting\n", - "train_loss_results = []\n", - "train_accuracy_results = []\n", - "\n", - "num_epochs = 201\n", - "\n", - "for epoch in range(num_epochs):\n", - " epoch_loss_avg = tf.keras.metrics.Mean()\n", - " epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()\n", - "\n", - " # Training loop - using batches of 32\n", - " for x, y in train_dataset:\n", - " # Optimize the model\n", - " loss_value, grads = grad(model, x, y)\n", - " optimizer.apply_gradients(zip(grads, model.trainable_variables))\n", - "\n", - " # Track progress\n", - " epoch_loss_avg.update_state(loss_value) # Add current batch loss\n", - " # Compare predicted label to actual label\n", - " # training=True is needed only if there are layers with different\n", - " # behavior during training versus inference (e.g. Dropout).\n", - " epoch_accuracy.update_state(y, model(x, training=True))\n", - "\n", - " # End epoch\n", - " train_loss_results.append(epoch_loss_avg.result())\n", - " train_accuracy_results.append(epoch_accuracy.result())\n", - "\n", - " if epoch % 50 == 0:\n", - " print(\n", - " \"Epoch {:03d}: Loss: {:.3f}, Accuracy: {:.3%}\".format(\n", - " epoch, epoch_loss_avg.result(), epoch_accuracy.result()\n", - " )\n", - " )" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2FQHVUnm_rjw" - }, - "source": [ - "### Visualize the loss function over time" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "j3wdbmtLVTyr" - }, - "source": [ - "While it's helpful to print out the model's training progress, it's often *more* helpful to see this progress. [TensorBoard](https://www.tensorflow.org/tensorboard) is a nice visualization tool that is packaged with TensorFlow, but we can create basic charts using the `matplotlib` module.\n", - "\n", - "Interpreting these charts takes some experience, but you really want to see the *loss* go down and the *accuracy* go up:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "agjvNd2iUGFn", - "colab": {} - }, - "source": [ - "fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8))\n", - "fig.suptitle(\"Training Metrics\")\n", - "\n", - "axes[0].set_ylabel(\"Loss\", fontsize=14)\n", - "axes[0].plot(train_loss_results)\n", - "\n", - "axes[1].set_ylabel(\"Accuracy\", fontsize=14)\n", - "axes[1].set_xlabel(\"Epoch\", fontsize=14)\n", - "axes[1].plot(train_accuracy_results)\n", - "plt.show()" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Zg8GoMZhLpGH" - }, - "source": [ - "## Evaluate the model's effectiveness\n", - "\n", - "Now that the model is trained, we can get some statistics on its performance.\n", - "\n", - "*Evaluating* means determining how effectively the model makes predictions. To determine the model's effectiveness at Iris classification, pass some sepal and petal measurements to the model and ask the model to predict what Iris species they represent. Then compare the model's predictions against the actual label. For example, a model that picked the correct species on half the input examples has an [accuracy](https://developers.google.com/machine-learning/glossary/#accuracy) of `0.5`. Figure 4 shows a slightly more effective model, getting 4 out of 5 predictions correct at 80% accuracy:\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Example featuresLabelModel prediction
5.93.04.31.511
6.93.15.42.122
5.13.31.70.500
6.0 3.4 4.5 1.6 12
5.52.54.01.311
\n", - " Figure 4. An Iris classifier that is 80% accurate.
 \n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "z-EvK7hGL0d8" - }, - "source": [ - "### Setup the test dataset\n", - "\n", - "Evaluating the model is similar to training the model. The biggest difference is the examples come from a separate [test set](https://developers.google.com/machine-learning/crash-course/glossary#test_set) rather than the training set. To fairly assess a model's effectiveness, the examples used to evaluate a model must be different from the examples used to train the model.\n", - "\n", - "The setup for the test `Dataset` is similar to the setup for training `Dataset`. Download the CSV text file and parse that values, then give it a little shuffle:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "Ps3_9dJ3Lodk", - "colab": {} - }, - "source": [ - "test_url = (\n", - " \"https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv\"\n", - ")\n", - "\n", - "test_fp = tf.keras.utils.get_file(\n", - " fname=os.path.basename(test_url), origin=test_url\n", - ")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "SRMWCu30bnxH", - "colab": {} - }, - "source": [ - "test_dataset = tf.data.experimental.make_csv_dataset(\n", - " test_fp,\n", - " batch_size,\n", - " column_names=column_names,\n", - " label_name=\"species\",\n", - " num_epochs=1,\n", - " shuffle=False,\n", - ")\n", - "\n", - "test_dataset = test_dataset.map(pack_features_vector)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "HFuOKXJdMAdm" - }, - "source": [ - "### Evaluate the model on the test dataset\n", - "\n", - "Unlike the training stage, the model only evaluates a single [epoch](https://developers.google.com/machine-learning/glossary/#epoch) of the test data. In the following code cell, we iterate over each example in the test set and compare the model's prediction against the actual label. This is used to measure the model's accuracy across the entire test set:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "Tw03-MK1cYId", - "colab": {} - }, - "source": [ - "test_accuracy = tf.keras.metrics.Accuracy()\n", - "\n", - "for x, y in test_dataset:\n", - " # training=False is needed only if there are layers with different\n", - " # behavior during training versus inference (e.g. Dropout).\n", - " logits = model(x, training=False)\n", - " prediction = tf.argmax(logits, axis=1, output_type=tf.int32)\n", - " test_accuracy(prediction, y)\n", - "\n", - "print(f\"Test set accuracy: {test_accuracy.result():.3%}\")" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "HcKEZMtCOeK-" - }, - "source": [ - "We can see on the last batch, for example, the model is usually correct:" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "uNwt2eMeOane", - "colab": {} - }, - "source": [ - "tf.stack([y, prediction], axis=1)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "7Li2r1tYvW7S" - }, - "source": [ - "## Use the trained model to make predictions\n", - "\n", - "We've trained a model and \"proven\" that it's good—but not perfect—at classifying Iris species. Now let's use the trained model to make some predictions on [unlabeled examples](https://developers.google.com/machine-learning/glossary/#unlabeled_example); that is, on examples that contain features but not a label.\n", - "\n", - "In real-life, the unlabeled examples could come from lots of different sources including apps, CSV files, and data feeds. For now, we're going to manually provide three unlabeled examples to predict their labels. Recall, the label numbers are mapped to a named representation as:\n", - "\n", - "* `0`: Iris setosa\n", - "* `1`: Iris versicolor\n", - "* `2`: Iris virginica" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Lab Task #3:** Use model to make predictions" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab_type": "code", - "id": "kesTS5Lzv-M2", - "colab": {} - }, - "source": [ - "# TODO 3\n", - "# TODO -- Your code here." - ], - "execution_count": null, - "outputs": [] - } - ] -} diff --git a/notebooks/introduction_to_tensorflow/labs/feat.cols_tf.data.ipynb b/notebooks/introduction_to_tensorflow/labs/feat.cols_tf.data.ipynb deleted file mode 100644 index 3244685f..00000000 --- a/notebooks/introduction_to_tensorflow/labs/feat.cols_tf.data.ipynb +++ /dev/null @@ -1,1083 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "rNdWfPXCjTjY" - }, - "source": [ - "# Introduction to Feature Columns \n", - "**Learning Objectives**\n", - "\n", - "\n", - "1. Load a CSV file using [Pandas](https://pandas.pydata.org/)\n", - "2. Create an input pipeline using tf.data\n", - "3. Create multiple types of feature columns\n", - "\n", - " \n", - "\n", - "## Introduction \n", - "\n", - "In this notebook, you classify structured data (e.g. tabular data in a CSV file) using [feature columns](https://www.tensorflow.org/guide/feature_columns). Feature columns serve as a bridge to map from columns in a CSV file to features used to train a model. In a subsequent lab, we will use [Keras](https://www.tensorflow.org/guide/keras) to define the model.\n", - "\n", - "Each learning objective will correspond to a __#TODO__ in this student lab notebook -- try to complete this notebook first and then review the [solution notebook](../solutions/feat.cols_tf.data.ipynb). \n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "K1y4OHpGgss7" - }, - "source": [ - "## The Dataset\n", - "\n", - "We will use a small [dataset](https://archive.ics.uci.edu/ml/datasets/heart+Disease) provided by the Cleveland Clinic Foundation for Heart Disease. There are several hundred rows in the CSV. Each row describes a patient, and each column describes an attribute. We will use this information to predict whether a patient has heart disease, which in this dataset is a binary classification task.\n", - "\n", - "Following is a [description](https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/heart-disease.names) of this dataset. Notice there are both numeric and categorical columns.\n", - "\n", - ">Column| Description| Feature Type | Data Type\n", - ">------------|--------------------|----------------------|-----------------\n", - ">Age | Age in years | Numerical | integer\n", - ">Sex | (1 = male; 0 = female) | Categorical | integer\n", - ">CP | Chest pain type (0, 1, 2, 3, 4) | Categorical | integer\n", - ">Trestbpd | Resting blood pressure (in mm Hg on admission to the hospital) | Numerical | integer\n", - ">Chol | Serum cholestoral in mg/dl | Numerical | integer\n", - ">FBS | (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false) | Categorical | integer\n", - ">RestECG | Resting electrocardiographic results (0, 1, 2) | Categorical | integer\n", - ">Thalach | Maximum heart rate achieved | Numerical | integer\n", - ">Exang | Exercise induced angina (1 = yes; 0 = no) | Categorical | integer\n", - ">Oldpeak | ST depression induced by exercise relative to rest | Numerical | float\n", - ">Slope | The slope of the peak exercise ST segment | Numerical | integer\n", - ">CA | Number of major vessels (0-3) colored by flourosopy | Numerical | integer\n", - ">Thal | 3 = normal; 6 = fixed defect; 7 = reversable defect | Categorical | string\n", - ">Target | Diagnosis of heart disease (1 = true; 0 = false) | Classification | integer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "VxyBFc_kKazA" - }, - "source": [ - "## Import TensorFlow and other libraries" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "9dEreb4QKizj" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TensorFlow version: 2.1.0\n" - ] - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import pandas as pd\n", - "import seaborn as sns\n", - "\n", - "%matplotlib inline\n", - "\n", - "import tensorflow as tf\n", - "from sklearn.model_selection import train_test_split\n", - "from tensorflow import feature_column\n", - "from tensorflow.keras import layers\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "KCEhSZcULZ9n" - }, - "source": [ - "## Lab Task 1: Use Pandas to create a dataframe\n", - "\n", - "[Pandas](https://pandas.pydata.org/) is a Python library with many helpful utilities for loading and working with structured data. We will use Pandas to download the dataset from a URL, and load it into a dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "REZ57BXCLdfG" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
063111452331215002.330fixed0
167141602860210811.523normal1
267141202290212912.622reversible0
337131302500018703.530normal0
441021302040217201.410normal0
\n", - "
" - ], - "text/plain": [ - " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", - "0 63 1 1 145 233 1 2 150 0 2.3 3 \n", - "1 67 1 4 160 286 0 2 108 1 1.5 2 \n", - "2 67 1 4 120 229 0 2 129 1 2.6 2 \n", - "3 37 1 3 130 250 0 0 187 0 3.5 3 \n", - "4 41 0 2 130 204 0 2 172 0 1.4 1 \n", - "\n", - " ca thal target \n", - "0 0 fixed 0 \n", - "1 3 normal 1 \n", - "2 2 reversible 0 \n", - "3 0 normal 0 \n", - "4 0 normal 0 " - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "URL = \"https://storage.googleapis.com/applied-dl/heart.csv\"\n", - "dataframe = pd.read_csv(URL)\n", - "dataframe.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 303 entries, 0 to 302\n", - "Data columns (total 14 columns):\n", - "age 303 non-null int64\n", - "sex 303 non-null int64\n", - "cp 303 non-null int64\n", - "trestbps 303 non-null int64\n", - "chol 303 non-null int64\n", - "fbs 303 non-null int64\n", - "restecg 303 non-null int64\n", - "thalach 303 non-null int64\n", - "exang 303 non-null int64\n", - "oldpeak 303 non-null float64\n", - "slope 303 non-null int64\n", - "ca 303 non-null int64\n", - "thal 303 non-null object\n", - "target 303 non-null int64\n", - "dtypes: float64(1), int64(12), object(1)\n", - "memory usage: 33.3+ KB\n" - ] - } - ], - "source": [ - "dataframe.info()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Split the dataframe into train, validation, and test\n", - "\n", - "The dataset we downloaded was a single CSV file. As a best practice, Complete the below TODO by splitting this into train, validation, and test sets." - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "193 train examples\n", - "49 validation examples\n", - "61 test examples\n" - ] - } - ], - "source": [ - "# TODO 1a\n", - "# TODO: Your code goes here\n", - "print(len(train), \"train examples\")\n", - "print(len(val), \"validation examples\")\n", - "print(len(test), \"test examples\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Lab Task 2: Create an input pipeline using tf.data\n", - "\n", - "Next, we will wrap the dataframes with [tf.data](https://www.tensorflow.org/guide/datasets). This will enable us to use feature columns as a bridge to map from the columns in the Pandas dataframe to features used to train a model. If we were working with a very large CSV file (so large that it does not fit into memory), we would use tf.data to read it from disk directly. That is not covered in this lab." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Complete the `TODOs` in the below cells using `df_to_dataset` function. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ - "# A utility method to create a tf.data dataset from a Pandas Dataframe\n", - "def df_to_dataset(dataframe, shuffle=True, batch_size=32):\n", - " dataframe = dataframe.copy()\n", - " labels = dataframe.pop('target')\n", - " ds = # TODO 2a: Your code goes here\n", - " if shuffle:\n", - " ds = ds.shuffle(buffer_size=len(dataframe))\n", - " ds = ds.batch(batch_size)\n", - " return ds" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "batch_size = 5 # A small batch sized is used for demonstration purposes" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO 2b\n", - "train_ds = # Your code goes here\n", - "val_ds = # Your code goes here\n", - "test_ds = # Your code goes here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Understand the input pipeline\n", - "\n", - "Now that we have created the input pipeline, let's call it to see the format of the data it returns. We have used a small batch size to keep the output readable." - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Every feature: ['ca', 'thal', 'trestbps', 'restecg', 'oldpeak', 'exang', 'sex', 'age', 'slope', 'chol', 'fbs', 'thalach', 'cp']\n", - "A batch of ages: tf.Tensor([49 68 41 51 63], shape=(5,), dtype=int32)\n", - "A batch of targets: tf.Tensor([0 0 0 0 0], shape=(5,), dtype=int32)\n" - ] - } - ], - "source": [ - "for feature_batch, label_batch in train_ds.take(1):\n", - " print(\"Every feature:\", list(feature_batch.keys()))\n", - " print(\"A batch of ages:\", feature_batch[\"age\"])\n", - " print(\"A batch of targets:\", label_batch)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ttIvgLRaNoOQ" - }, - "source": [ - "## Lab Task 3: Demonstrate several types of feature column\n", - "TensorFlow provides many types of feature columns. In this section, we will create several types of feature columns, and demonstrate how they transform a column from the dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [], - "source": [ - "# We will use this batch to demonstrate several types of feature columns\n", - "example_batch = next(iter(train_ds))[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [], - "source": [ - "# A utility method to create a feature column\n", - "# and to transform a batch of data\n", - "def demo(feature_column):\n", - " feature_layer = layers.DenseFeatures(feature_column)\n", - " print(feature_layer(example_batch).numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Q7OEKe82N-Qb" - }, - "source": [ - "### Numeric columns\n", - "The output of a feature column becomes the input to the model. A [numeric column](https://www.tensorflow.org/api_docs/python/tf/feature_column/numeric_column) is the simplest type of column. It is used to represent real valued features. When using this column, your model will receive the column value from the dataframe unchanged." - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NumericColumn(key='age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)\n" - ] - } - ], - "source": [ - "age = feature_column.numeric_column(\"age\")\n", - "tf.feature_column.numeric_column\n", - "print(age)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "7a6ddSyzOKpq" - }, - "source": [ - "### Let's have a look at the output:\n", - "\n", - "#### key='age'\n", - "A unique string identifying the input feature. It is used as the column name and the dictionary key for feature parsing configs, feature Tensor objects, and feature columns.\n", - "\n", - "#### shape=(1,)\n", - "In the heart disease dataset, most columns from the dataframe are numeric. Recall that tensors have a rank. \"Age\" is a \"vector\" or \"rank-1\" tensor, which is like a list of values. A vector has 1-axis, thus the shape will always look like this: shape=(3,), where 3 is a scalar (or single number) and with 1-axis. \n", - "\n", - "#### default_value=None\n", - "A single value compatible with dtype or an iterable of values compatible with dtype which the column takes on during tf.Example parsing if data is missing. A default value of None will cause tf.io.parse_example to fail if an example does not contain this column. If a single value is provided, the same value will be applied as the default value for every item. If an iterable of values is provided, the shape of the default_value should be equal to the given shape.\n", - "\n", - "#### dtype=tf.float32\n", - "defines the type of values. Default value is tf.float32. Must be a non-quantized, real integer or floating point type.\n", - "\n", - "\n", - "#### normalizer_fn=None\n", - "If not None, a function that can be used to normalize the value of the tensor after default_value is applied for parsing. Normalizer function takes the input Tensor as its argument, and returns the output Tensor. (e.g. lambda x: (x - 3.0) / 4.2). Please note that even though the most common use case of this function is normalization, it can be used for any kind of Tensorflow transformations.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Layer dense_features_22 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", - "\n", - "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", - "\n", - "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", - "\n", - "[[60.]\n", - " [58.]\n", - " [55.]\n", - " [54.]\n", - " [51.]]\n" - ] - } - ], - "source": [ - "demo(age)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "IcSxUoYgOlA1" - }, - "source": [ - "### Bucketized columns\n", - "Often, you don't want to feed a number directly into the model, but instead split its value into different categories based on numerical ranges. Consider raw data that represents a person's age. Instead of representing age as a numeric column, we could split the age into several buckets using a [bucketized column](https://www.tensorflow.org/api_docs/python/tf/feature_column/bucketized_column). Notice the one-hot values below describe which age range each row matches." - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "wJ4Wt3SAOpTQ" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Layer dense_features_23 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", - "\n", - "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", - "\n", - "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", - "\n", - "[[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]\n", - " [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]\n", - " [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]\n", - " [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]\n", - " [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]]\n" - ] - } - ], - "source": [ - "age_buckets = tf.feature_column.bucketized_column(\n", - " age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]\n", - ")\n", - "demo(____) # TODO 3a: Replace the blanks with a correct value" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "r1tArzewPb-b" - }, - "source": [ - "### Categorical columns\n", - "In this dataset, thal is represented as a string (e.g. 'fixed', 'normal', or 'reversible'). We cannot feed strings directly to a model. Instead, we must first map them to numeric values. The categorical vocabulary columns provide a way to represent strings as a one-hot vector (much like you have seen above with age buckets). The vocabulary can be passed as a list using [categorical_column_with_vocabulary_list](https://www.tensorflow.org/api_docs/python/tf/feature_column/categorical_column_with_vocabulary_list), or loaded from a file using [categorical_column_with_vocabulary_file](https://www.tensorflow.org/api_docs/python/tf/feature_column/categorical_column_with_vocabulary_file)." - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "DJ6QnSHkPtOC" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Layer dense_features_24 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", - "\n", - "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", - "\n", - "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", - "\n", - "[[0. 0. 1.]\n", - " [0. 1. 0.]\n", - " [0. 0. 1.]\n", - " [0. 1. 0.]\n", - " [0. 1. 0.]]\n" - ] - } - ], - "source": [ - "thal = tf.feature_column.categorical_column_with_vocabulary_list(\n", - " \"thal\", [\"fixed\", \"normal\", \"reversible\"]\n", - ")\n", - "\n", - "thal_one_hot = tf.feature_column.indicator_column(thal)\n", - "demo(thal_one_hot)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "dxQloQ9jOoXL" - }, - "source": [ - "In a more complex dataset, many columns would be categorical (e.g. strings). Feature columns are most valuable when working with categorical data. Although there is only one categorical column in this dataset, we will use it to demonstrate several important types of feature columns that you could use when working with other datasets." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "LEFPjUr6QmwS" - }, - "source": [ - "### Embedding columns\n", - "Suppose instead of having just a few possible strings, we have thousands (or more) values per category. For a number of reasons, as the number of categories grow large, it becomes infeasible to train a neural network using one-hot encodings. We can use an embedding column to overcome this limitation. Instead of representing the data as a one-hot vector of many dimensions, an [embedding column](https://www.tensorflow.org/api_docs/python/tf/feature_column/embedding_column) represents that data as a lower-dimensional, dense vector in which each cell can contain any number, not just 0 or 1. The size of the embedding (8, in the example below) is a parameter that must be tuned.\n", - "\n", - "Key point: using an embedding column is best when a categorical column has many possible values. We are using one here for demonstration purposes, so you have a complete example you can modify for a different dataset in the future." - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "hSlohmr2Q_UU" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Layer dense_features_25 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", - "\n", - "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", - "\n", - "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", - "\n", - "[[ 0.26216975 -0.66194284 0.33328214 -0.09756625 0.20408471 0.57926923\n", - " -0.07685163 0.4386801 ]\n", - " [-0.24602154 0.0877578 0.07975551 0.34634778 0.2708743 -0.6707659\n", - " -0.15825593 -0.08179379]\n", - " [ 0.26216975 -0.66194284 0.33328214 -0.09756625 0.20408471 0.57926923\n", - " -0.07685163 0.4386801 ]\n", - " [-0.24602154 0.0877578 0.07975551 0.34634778 0.2708743 -0.6707659\n", - " -0.15825593 -0.08179379]\n", - " [-0.24602154 0.0877578 0.07975551 0.34634778 0.2708743 -0.6707659\n", - " -0.15825593 -0.08179379]]\n" - ] - } - ], - "source": [ - "# Notice the input to the embedding column is the categorical column\n", - "# we previously created\n", - "thal_embedding = tf.feature_column.embedding_column(thal, dimension=8)\n", - "demo(thal_embedding)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "urFCAvTVRMpB" - }, - "source": [ - "### Hashed feature columns\n", - "\n", - "Another way to represent a categorical column with a large number of values is to use a [categorical_column_with_hash_bucket](https://www.tensorflow.org/api_docs/python/tf/feature_column/categorical_column_with_hash_bucket). This feature column calculates a hash value of the input, then selects one of the `hash_bucket_size` buckets to encode a string. When using this column, you do not need to provide the vocabulary, and you can choose to make the number of hash_buckets significantly smaller than the number of actual categories to save space.\n", - "\n", - "Key point: An important downside of this technique is that there may be collisions in which different strings are mapped to the same bucket. In practice, this can work well for some datasets regardless." - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "YHU_Aj2nRRDC" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Layer dense_features_26 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", - "\n", - "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", - "\n", - "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", - "\n", - "[[0. 0. 0. ... 0. 0. 0.]\n", - " [0. 0. 0. ... 0. 0. 0.]\n", - " [0. 0. 0. ... 0. 0. 0.]\n", - " [0. 0. 0. ... 0. 0. 0.]\n", - " [0. 0. 0. ... 0. 0. 0.]]\n" - ] - } - ], - "source": [ - "thal_hashed = tf.feature_column.categorical_column_with_hash_bucket(\n", - " \"thal\", hash_bucket_size=1000\n", - ")\n", - "demo(tf.feature_column.indicator_column(thal_hashed))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "fB94M27DRXtZ" - }, - "source": [ - "### Crossed feature columns\n", - "Combining features into a single feature, better known as [feature crosses](https://developers.google.com/machine-learning/glossary/#feature_cross), enables a model to learn separate weights for each combination of features. Here, we will create a new feature that is the cross of age and thal. Note that `crossed_column` does not build the full table of all possible combinations (which could be very large). Instead, it is backed by a `hashed_column`, so you can choose how large the table is." - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "oaPVERd9Rep6" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Layer dense_features_27 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", - "\n", - "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", - "\n", - "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", - "\n", - "[[0. 0. 0. ... 0. 0. 0.]\n", - " [0. 0. 0. ... 0. 0. 0.]\n", - " [0. 0. 0. ... 0. 0. 0.]\n", - " [0. 0. 0. ... 0. 0. 0.]\n", - " [0. 0. 0. ... 0. 0. 0.]]\n" - ] - } - ], - "source": [ - "crossed_feature = tf.feature_column.crossed_column(\n", - " [age_buckets, thal], hash_bucket_size=1000\n", - ")\n", - "demo(tf.feature_column.indicator_column(crossed_feature))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ypkI9zx6Rj1q" - }, - "source": [ - "## Choose which columns to use\n", - "We have seen how to use several types of feature columns. Now we will use them to train a model. The goal of this tutorial is to show you the complete code (e.g. mechanics) needed to work with feature columns. We have selected a few columns to train our model below arbitrarily.\n", - "\n", - "Key point: If your aim is to build an accurate model, try a larger dataset of your own, and think carefully about which features are the most meaningful to include, and how they should be represented." - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "4PlLY7fORuzA" - }, - "outputs": [], - "source": [ - "feature_columns = []\n", - "\n", - "# numeric cols\n", - "for header in [\"age\", \"trestbps\", \"chol\", \"thalach\", \"oldpeak\", \"slope\", \"ca\"]:\n", - " feature_columns.append(feature_column.numeric_column(header))\n", - "\n", - "# bucketized cols\n", - "age_buckets = feature_column.bucketized_column(\n", - " age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]\n", - ")\n", - "feature_columns.append(age_buckets)\n", - "\n", - "# indicator cols\n", - "thal = feature_column.categorical_column_with_vocabulary_list(\n", - " \"thal\", [\"fixed\", \"normal\", \"reversible\"]\n", - ")\n", - "thal_one_hot = feature_column.indicator_column(thal)\n", - "feature_columns.append(thal_one_hot)\n", - "\n", - "# embedding cols\n", - "thal_embedding = feature_column.embedding_column(thal, dimension=8)\n", - "feature_columns.append(thal_embedding)\n", - "\n", - "# crossed cols\n", - "crossed_feature = feature_column.crossed_column(\n", - " [age_buckets, thal], hash_bucket_size=1000\n", - ")\n", - "crossed_feature = feature_column.indicator_column(crossed_feature)\n", - "feature_columns.append(crossed_feature)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "M-nDp8krS_ts" - }, - "source": [ - "### How to Input Feature Columns to a Keras Model\n", - "Now that we have defined our feature columns, we now use a [DenseFeatures](https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/DenseFeatures) layer to input them to a Keras model. Don't worry if you have not used Keras before. There is a more detailed video and lab introducing the Keras Sequential and Functional models." - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "6o-El1R2TGQP" - }, - "outputs": [], - "source": [ - "feature_layer = tf.keras.layers.DenseFeatures(feature_columns)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "8cf6vKfgTH0U" - }, - "source": [ - "Earlier, we used a small batch size to demonstrate how feature columns worked. We create a new input pipeline with a larger batch size." - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "gcemszoGSse_" - }, - "outputs": [], - "source": [ - "batch_size = 32\n", - "train_ds = df_to_dataset(train, batch_size=batch_size)\n", - "val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)\n", - "test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "bBx4Xu0eTXWq" - }, - "source": [ - "## Create, compile, and train the model" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "_YJPPb3xTPeZ" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train for 7 steps, validate for 2 steps\n", - "Epoch 1/5\n", - "7/7 [==============================] - 1s 157ms/step - loss: 1.1446 - accuracy: 0.6580 - val_loss: 1.4723 - val_accuracy: 0.4694\n", - "Epoch 2/5\n", - "7/7 [==============================] - 0s 10ms/step - loss: 0.7330 - accuracy: 0.6632 - val_loss: 0.5254 - val_accuracy: 0.7143\n", - "Epoch 3/5\n", - "7/7 [==============================] - 0s 10ms/step - loss: 0.4610 - accuracy: 0.7565 - val_loss: 0.4916 - val_accuracy: 0.7755\n", - "Epoch 4/5\n", - "7/7 [==============================] - 0s 10ms/step - loss: 0.4359 - accuracy: 0.7617 - val_loss: 0.5403 - val_accuracy: 0.7551\n", - "Epoch 5/5\n", - "7/7 [==============================] - 0s 10ms/step - loss: 0.5650 - accuracy: 0.7409 - val_loss: 0.6612 - val_accuracy: 0.7551\n" - ] - } - ], - "source": [ - "model = tf.keras.Sequential(\n", - " [\n", - " feature_layer,\n", - " layers.Dense(128, activation=\"relu\"),\n", - " layers.Dense(128, activation=\"relu\"),\n", - " layers.Dense(1),\n", - " ]\n", - ")\n", - "\n", - "model.compile(\n", - " optimizer=\"adam\",\n", - " loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n", - " metrics=[\"accuracy\"],\n", - ")\n", - "\n", - "history = model.fit(train_ds, validation_data=val_ds, epochs=5)" - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "GnFmMOW0Tcaa" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2/2 [==============================] - 0s 4ms/step - loss: 0.4773 - accuracy: 0.7705\n", - "Accuracy 0.7704918\n" - ] - } - ], - "source": [ - "loss, accuracy = model.evaluate(test_ds)\n", - "print(\"Accuracy\", accuracy)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Visualize the model loss curve\n", - "\n", - "Next, we will use Matplotlib to draw the model's loss curves for training and validation. A line plot is also created showing the accuracy over the training epochs for both the train (blue) and test (orange) sets." - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "def plot_curves(history, metrics):\n", - " nrows = 1\n", - " ncols = 2\n", - " fig = plt.figure(figsize=(10, 5))\n", - "\n", - " for idx, key in enumerate(metrics):\n", - " ax = fig.add_subplot(nrows, ncols, idx + 1)\n", - " plt.plot(history.history[key])\n", - " plt.plot(history.history[f\"val_{key}\"])\n", - " plt.title(f\"model {key}\")\n", - " plt.ylabel(key)\n", - " plt.xlabel(\"epoch\")\n", - " plt.legend([\"train\", \"validation\"], loc=\"upper left\")\n", - "\n", - "\n", - "plot_curves(history, [\"loss\", \"accuracy\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "3bdfbq20V6zu" - }, - "source": [ - "You can see that accuracy is at 77% for both the training and validation data, while loss bottoms out at about .477 after four epochs.\n", - "\n", - "Key point: You will typically see best results with deep learning with much larger and more complex datasets. When working with a small dataset like this one, we recommend using a decision tree or random forest as a strong baseline. The goal of this tutorial is not to train an accurate model, but to demonstrate the mechanics of working with structured data, so you have code to use as a starting point when working with your own datasets in the future." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "SotnhVWuHQCw" - }, - "source": [ - "## Next steps\n", - "The best way to learn more about classifying structured data is to try it yourself. We suggest finding another dataset to work with, and training a model to classify it using code similar to the above. To improve accuracy, think carefully about which features to include in your model, and how they should be represented." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright 2020 Google Inc.\n", - "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", - "http://www.apache.org/licenses/LICENSE-2.0\n", - "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "feature_columns.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/labs/int_logistic_regression.ipynb b/notebooks/introduction_to_tensorflow/labs/int_logistic_regression.ipynb deleted file mode 100644 index 77b8aeb8..00000000 --- a/notebooks/introduction_to_tensorflow/labs/int_logistic_regression.ipynb +++ /dev/null @@ -1,1460 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "rwxGnsA92emp" - }, - "source": [ - "##### Copyright 2018 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "cellView": "form", - "colab": {}, - "colab_type": "code", - "id": "CPII1rGR2rF9" - }, - "outputs": [], - "source": [ - "# @title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "JtEZ1pCPn--z" - }, - "source": [ - "# Custom training: walkthrough" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "GV1F7tVTN3Dn" - }, - "source": [ - "\n", - " \n", - " \n", - " \n", - " \n", - "
\n", - " View on TensorFlow.org\n", - " \n", - " Run in Google Colab\n", - " \n", - " View source on GitHub\n", - " \n", - " Download notebook\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "LDrzLFXE8T1l" - }, - "source": [ - "This guide uses machine learning to *categorize* Iris flowers by species. It uses TensorFlow to:\n", - "1. Build a model,\n", - "2. Train this model on example data, and\n", - "3. Use the model to make predictions about unknown data.\n", - "\n", - "## TensorFlow programming\n", - "\n", - "This guide uses these high-level TensorFlow concepts:\n", - "\n", - "* Use TensorFlow's default [eager execution](../../guide/eager.ipynb) development environment,\n", - "* Import data with the [Datasets API](../../guide/datasets.ipynb),\n", - "* Build models and layers with TensorFlow's [Keras API](../../guide/keras/overview.ipynb).\n", - "\n", - "This tutorial is structured like many TensorFlow programs:\n", - "\n", - "1. Import and parse the dataset.\n", - "2. Select the type of model.\n", - "3. Train the model.\n", - "4. Evaluate the model's effectiveness.\n", - "5. Use the trained model to make predictions." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "yNr7H-AIoLOR" - }, - "source": [ - "## Setup program" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "1J3AuPBT9gyR" - }, - "source": [ - "### Configure imports\n", - "\n", - "Import TensorFlow and the other required Python modules. By default, TensorFlow uses [eager execution](../../guide/eager.ipynb) to evaluate operations immediately, returning concrete values instead of creating a computational graph that is executed later. If you are used to a REPL or the `python` interactive console, this feels familiar." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "jElLULrDhQZR" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "bfV2Dai0Ow2o" - }, - "outputs": [], - "source": [ - "import tensorflow as tf" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "g4Wzg69bnwK2" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TensorFlow version: 2.1.0\n", - "Eager execution: True\n" - ] - } - ], - "source": [ - "print(f\"TensorFlow version: {tf.__version__}\")\n", - "print(f\"Eager execution: {tf.executing_eagerly()}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Zx7wc0LuuxaJ" - }, - "source": [ - "## The Iris classification problem\n", - "\n", - "Imagine you are a botanist seeking an automated way to categorize each Iris flower you find. Machine learning provides many algorithms to classify flowers statistically. For instance, a sophisticated machine learning program could classify flowers based on photographs. Our ambitions are more modest—we're going to classify Iris flowers based on the length and width measurements of their [sepals](https://en.wikipedia.org/wiki/Sepal) and [petals](https://en.wikipedia.org/wiki/Petal).\n", - "\n", - "The Iris genus entails about 300 species, but our program will only classify the following three:\n", - "\n", - "* Iris setosa\n", - "* Iris virginica\n", - "* Iris versicolor\n", - "\n", - "\n", - " \n", - " \n", - "
\n", - " \"Petal\n", - "
\n", - " Figure 1. Iris setosa (by Radomil, CC BY-SA 3.0), Iris versicolor, (by Dlanglois, CC BY-SA 3.0), and Iris virginica (by Frank Mayfield, CC BY-SA 2.0).
 \n", - "
\n", - "\n", - "Fortunately, someone has already created a [dataset of 120 Iris flowers](https://en.wikipedia.org/wiki/Iris_flower_data_set) with the sepal and petal measurements. This is a classic dataset that is popular for beginner machine learning classification problems." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "3Px6KAg0Jowz" - }, - "source": [ - "## Import and parse the training dataset\n", - "\n", - "Download the dataset file and convert it into a structure that can be used by this Python program.\n", - "\n", - "### Download the dataset\n", - "\n", - "Download the training dataset file using the `tf.keras.utils.get_file` function. This returns the file path of the downloaded file:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "J6c7uEU9rjRM" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8192/2194 [================================================================================================================] - 0s 0us/step\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Local copy of the dataset file: /home/kbuilder/.keras/datasets/iris_training.csv\n" - ] - } - ], - "source": [ - "train_dataset_url = \"https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv\"\n", - "\n", - "train_dataset_fp = tf.keras.utils.get_file(\n", - " fname=os.path.basename(train_dataset_url), origin=train_dataset_url\n", - ")\n", - "\n", - "print(f\"Local copy of the dataset file: {train_dataset_fp}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "qnX1-aLors4S" - }, - "source": [ - "### Inspect the data\n", - "\n", - "This dataset, `iris_training.csv`, is a plain text file that stores tabular data formatted as comma-separated values (CSV). Use the `head -n5` command to take a peek at the first five entries:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "FQvb_JYdrpPm" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "120,4,setosa,versicolor,virginica\r\n", - "6.4,2.8,5.6,2.2,2\r\n", - "5.0,2.3,3.3,1.0,1\r\n", - "4.9,2.5,4.5,1.7,2\r\n", - "4.9,3.1,1.5,0.1,0\r\n" - ] - } - ], - "source": [ - "!head -n5 {train_dataset_fp}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "kQhzD6P-uBoq" - }, - "source": [ - "From this view of the dataset, notice the following:\n", - "\n", - "1. The first line is a header containing information about the dataset:\n", - " * There are 120 total examples. Each example has four features and one of three possible label names.\n", - "2. Subsequent rows are data records, one *[example](https://developers.google.com/machine-learning/glossary/#example)* per line, where:\n", - " * The first four fields are *[features](https://developers.google.com/machine-learning/glossary/#feature)*: these are the characteristics of an example. Here, the fields hold float numbers representing flower measurements.\n", - " * The last column is the *[label](https://developers.google.com/machine-learning/glossary/#label)*: this is the value we want to predict. For this dataset, it's an integer value of 0, 1, or 2 that corresponds to a flower name.\n", - "\n", - "Let's write that out in code:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "9Edhevw7exl6" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Features: ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']\n", - "Label: species\n" - ] - } - ], - "source": [ - "# column order in CSV file\n", - "column_names = [\n", - " \"sepal_length\",\n", - " \"sepal_width\",\n", - " \"petal_length\",\n", - " \"petal_width\",\n", - " \"species\",\n", - "]\n", - "\n", - "feature_names = column_names[:-1]\n", - "label_name = column_names[-1]\n", - "\n", - "print(f\"Features: {feature_names}\")\n", - "print(f\"Label: {label_name}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "CCtwLoJhhDNc" - }, - "source": [ - "Each label is associated with string name (for example, \"setosa\"), but machine learning typically relies on numeric values. The label numbers are mapped to a named representation, such as:\n", - "\n", - "* `0`: Iris setosa\n", - "* `1`: Iris versicolor\n", - "* `2`: Iris virginica\n", - "\n", - "For more information about features and labels, see the [ML Terminology section of the Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/framing/ml-terminology)." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "sVNlJlUOhkoX" - }, - "outputs": [], - "source": [ - "class_names = [\"Iris setosa\", \"Iris versicolor\", \"Iris virginica\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "dqPkQExM2Pwt" - }, - "source": [ - "### Create a `tf.data.Dataset`\n", - "\n", - "TensorFlow's [Dataset API](../../guide/data.ipynb) handles many common cases for loading data into a model. This is a high-level API for reading data and transforming it into a form used for training.\n", - "\n", - "\n", - "Since the dataset is a CSV-formatted text file, use the `tf.data.experimental.make_csv_dataset` function to parse the data into a suitable format. Since this function generates data for training models, the default behavior is to shuffle the data (`shuffle=True, shuffle_buffer_size=10000`), and repeat the dataset forever (`num_epochs=None`). We also set the [batch_size](https://developers.google.com/machine-learning/glossary/#batch_size) parameter:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "WsxHnz1ebJ2S" - }, - "outputs": [], - "source": [ - "batch_size = 32\n", - "\n", - "train_dataset = tf.data.experimental.make_csv_dataset(\n", - " train_dataset_fp,\n", - " batch_size,\n", - " column_names=column_names,\n", - " label_name=label_name,\n", - " num_epochs=1,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "gB_RSn62c-3G" - }, - "source": [ - "The `make_csv_dataset` function returns a `tf.data.Dataset` of `(features, label)` pairs, where `features` is a dictionary: `{'feature_name': value}`\n", - "\n", - "These `Dataset` objects are iterable. Let's look at a batch of features:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "iDuG94H-C122" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "OrderedDict([('sepal_length', ), ('sepal_width', ), ('petal_length', ), ('petal_width', )])\n" - ] - } - ], - "source": [ - "features, labels = next(iter(train_dataset))\n", - "\n", - "print(features)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "E63mArnQaAGz" - }, - "source": [ - "Notice that like-features are grouped together, or *batched*. Each example row's fields are appended to the corresponding feature array. Change the `batch_size` to set the number of examples stored in these feature arrays.\n", - "\n", - "You can start to see some clusters by plotting a few features from the batch:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "me5Wn-9FcyyO" - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plt.scatter(\n", - " features[\"petal_length\"], features[\"sepal_length\"], c=labels, cmap=\"viridis\"\n", - ")\n", - "\n", - "plt.xlabel(\"Petal length\")\n", - "plt.ylabel(\"Sepal length\")\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "YlxpSyHlhT6M" - }, - "source": [ - "To simplify the model building step, create a function to repackage the features dictionary into a single array with shape: `(batch_size, num_features)`.\n", - "\n", - "This function uses the `tf.stack` method which takes values from a list of tensors and creates a combined tensor at the specified dimension:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "jm932WINcaGU" - }, - "outputs": [], - "source": [ - "def pack_features_vector(features, labels):\n", - " \"\"\"Pack the features into a single array.\"\"\"\n", - " features = tf.stack(list(features.values()), axis=1)\n", - " return features, labels" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "V1Vuph_eDl8x" - }, - "source": [ - "Then use the `tf.data.Dataset#map` method to pack the `features` of each `(features,label)` pair into the training dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "ZbDkzGZIkpXf" - }, - "outputs": [], - "source": [ - "train_dataset = train_dataset.map(pack_features_vector)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "NLy0Q1xCldVO" - }, - "source": [ - "The features element of the `Dataset` are now arrays with shape `(batch_size, num_features)`. Let's look at the first few examples:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "kex9ibEek6Tr" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tf.Tensor(\n", - "[[5.8 2.6 4. 1.2]\n", - " [6.9 3.1 4.9 1.5]\n", - " [5. 3.4 1.5 0.2]\n", - " [4.8 3. 1.4 0.1]\n", - " [5.5 2.4 3.8 1.1]], shape=(5, 4), dtype=float32)\n" - ] - } - ], - "source": [ - "features, labels = next(iter(train_dataset))\n", - "\n", - "print(features[:5])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "LsaVrtNM3Tx5" - }, - "source": [ - "## Select the type of model\n", - "\n", - "### Why model?\n", - "\n", - "A *[model](https://developers.google.com/machine-learning/crash-course/glossary#model)* is a relationship between features and the label. For the Iris classification problem, the model defines the relationship between the sepal and petal measurements and the predicted Iris species. Some simple models can be described with a few lines of algebra, but complex machine learning models have a large number of parameters that are difficult to summarize.\n", - "\n", - "Could you determine the relationship between the four features and the Iris species *without* using machine learning? That is, could you use traditional programming techniques (for example, a lot of conditional statements) to create a model? Perhaps—if you analyzed the dataset long enough to determine the relationships between petal and sepal measurements to a particular species. And this becomes difficult—maybe impossible—on more complicated datasets. A good machine learning approach *determines the model for you*. If you feed enough representative examples into the right machine learning model type, the program will figure out the relationships for you.\n", - "\n", - "### Select the model\n", - "\n", - "We need to select the kind of model to train. There are many types of models and picking a good one takes experience. This tutorial uses a neural network to solve the Iris classification problem. *[Neural networks](https://developers.google.com/machine-learning/glossary/#neural_network)* can find complex relationships between features and the label. It is a highly-structured graph, organized into one or more *[hidden layers](https://developers.google.com/machine-learning/glossary/#hidden_layer)*. Each hidden layer consists of one or more *[neurons](https://developers.google.com/machine-learning/glossary/#neuron)*. There are several categories of neural networks and this program uses a dense, or *[fully-connected neural network](https://developers.google.com/machine-learning/glossary/#fully_connected_layer)*: the neurons in one layer receive input connections from *every* neuron in the previous layer. For example, Figure 2 illustrates a dense neural network consisting of an input layer, two hidden layers, and an output layer:\n", - "\n", - "\n", - " \n", - " \n", - "
\n", - " \n", - "
\n", - " Figure 2. A neural network with features, hidden layers, and predictions.
 \n", - "
\n", - "\n", - "When the model from Figure 2 is trained and fed an unlabeled example, it yields three predictions: the likelihood that this flower is the given Iris species. This prediction is called *[inference](https://developers.google.com/machine-learning/crash-course/glossary#inference)*. For this example, the sum of the output predictions is 1.0. In Figure 2, this prediction breaks down as: `0.02` for *Iris setosa*, `0.95` for *Iris versicolor*, and `0.03` for *Iris virginica*. This means that the model predicts—with 95% probability—that an unlabeled example flower is an *Iris versicolor*." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "W23DIMVPQEBt" - }, - "source": [ - "### Create a model using Keras\n", - "\n", - "The TensorFlow `tf.keras` API is the preferred way to create models and layers. This makes it easy to build models and experiment while Keras handles the complexity of connecting everything together.\n", - "\n", - "The `tf.keras.Sequential` model is a linear stack of layers. Its constructor takes a list of layer instances, in this case, two `tf.keras.layers.Dense` layers with 10 nodes each, and an output layer with 3 nodes representing our label predictions. The first layer's `input_shape` parameter corresponds to the number of features from the dataset, and is required:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "2fZ6oL2ig3ZK" - }, - "outputs": [], - "source": [ - "model = tf.keras.Sequential(\n", - " [\n", - " tf.keras.layers.Dense(\n", - " 10, activation=tf.nn.relu, input_shape=(4,)\n", - " ), # input shape required\n", - " tf.keras.layers.Dense(10, activation=tf.nn.relu),\n", - " tf.keras.layers.Dense(3),\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "FHcbEzMpxbHL" - }, - "source": [ - "The *[activation function](https://developers.google.com/machine-learning/crash-course/glossary#activation_function)* determines the output shape of each node in the layer. These non-linearities are important—without them the model would be equivalent to a single layer. There are many `tf.keras.activations`, but [ReLU](https://developers.google.com/machine-learning/crash-course/glossary#ReLU) is common for hidden layers.\n", - "\n", - "The ideal number of hidden layers and neurons depends on the problem and the dataset. Like many aspects of machine learning, picking the best shape of the neural network requires a mixture of knowledge and experimentation. As a rule of thumb, increasing the number of hidden layers and neurons typically creates a more powerful model, which requires more data to train effectively." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2wFKnhWCpDSS" - }, - "source": [ - "### Using the model\n", - "\n", - "Let's have a quick look at what this model does to a batch of features:" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "xe6SQ5NrpB-I" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "predictions = model(features)\n", - "predictions[:5]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "wxyXOhwVr5S3" - }, - "source": [ - "Here, each example returns a [logit](https://developers.google.com/machine-learning/crash-course/glossary#logits) for each class.\n", - "\n", - "To convert these logits to a probability for each class, use the [softmax](https://developers.google.com/machine-learning/crash-course/glossary#softmax) function:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "_tRwHZmTNTX2" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tf.nn.softmax(predictions[:5])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "uRZmchElo481" - }, - "source": [ - "Taking the `tf.argmax` across classes gives us the predicted class index. But, the model hasn't been trained yet, so these aren't good predictions:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "-Jzm_GoErz8B" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Prediction: [0 2 0 0 2 2 0 0 0 0 0 0 2 0 2 0 2 0 2 2 0 0 0 2 2 0 0 0 2 0 0 2]\n", - " Labels: [1 1 0 0 1 1 1 0 1 0 0 0 2 0 1 0 1 0 1 2 0 1 0 2 1 0 1 0 2 1 0 2]\n" - ] - } - ], - "source": [ - "print(f\"Prediction: {tf.argmax(predictions, axis=1)}\")\n", - "print(f\" Labels: {labels}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Vzq2E5J2QMtw" - }, - "source": [ - "## Train the model\n", - "\n", - "*[Training](https://developers.google.com/machine-learning/crash-course/glossary#training)* is the stage of machine learning when the model is gradually optimized, or the model *learns* the dataset. The goal is to learn enough about the structure of the training dataset to make predictions about unseen data. If you learn *too much* about the training dataset, then the predictions only work for the data it has seen and will not be generalizable. This problem is called *[overfitting](https://developers.google.com/machine-learning/crash-course/glossary#overfitting)*—it's like memorizing the answers instead of understanding how to solve a problem.\n", - "\n", - "The Iris classification problem is an example of *[supervised machine learning](https://developers.google.com/machine-learning/glossary/#supervised_machine_learning)*: the model is trained from examples that contain labels. In *[unsupervised machine learning](https://developers.google.com/machine-learning/glossary/#unsupervised_machine_learning)*, the examples don't contain labels. Instead, the model typically finds patterns among the features." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "RaKp8aEjKX6B" - }, - "source": [ - "### Define the loss and gradient function\n", - "\n", - "Both training and evaluation stages need to calculate the model's *[loss](https://developers.google.com/machine-learning/crash-course/glossary#loss)*. This measures how off a model's predictions are from the desired label, in other words, how bad the model is performing. We want to minimize, or optimize, this value.\n", - "\n", - "Our model will calculate its loss using the `tf.keras.losses.SparseCategoricalCrossentropy` function which takes the model's class probability predictions and the desired label, and returns the average loss across the examples." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "QOsi6b-1CXIn" - }, - "outputs": [], - "source": [ - "loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "tMAT4DcMPwI-" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loss test: 3.012593984603882\n" - ] - } - ], - "source": [ - "def loss(model, x, y, training):\n", - " # training=training is needed only if there are layers with different\n", - " # behavior during training versus inference (e.g. Dropout).\n", - " y_ = model(x, training=training)\n", - "\n", - " return loss_object(y_true=y, y_pred=y_)\n", - "\n", - "\n", - "l = loss(model, features, labels, training=False)\n", - "print(f\"Loss test: {l}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "3IcPqA24QM6B" - }, - "source": [ - "Use the `tf.GradientTape` context to calculate the *[gradients](https://developers.google.com/machine-learning/crash-course/glossary#gradient)* used to optimize your model:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "x57HcKWhKkei" - }, - "outputs": [], - "source": [ - "def grad(model, inputs, targets):\n", - " with tf.GradientTape() as tape:\n", - " loss_value = loss(model, inputs, targets, training=True)\n", - " return loss_value, tape.gradient(loss_value, model.trainable_variables)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "lOxFimtlKruu" - }, - "source": [ - "### Create an optimizer\n", - "\n", - "An *[optimizer](https://developers.google.com/machine-learning/crash-course/glossary#optimizer)* applies the computed gradients to the model's variables to minimize the `loss` function. You can think of the loss function as a curved surface (see Figure 3) and we want to find its lowest point by walking around. The gradients point in the direction of steepest ascent—so we'll travel the opposite way and move down the hill. By iteratively calculating the loss and gradient for each batch, we'll adjust the model during training. Gradually, the model will find the best combination of weights and bias to minimize loss. And the lower the loss, the better the model's predictions.\n", - "\n", - "\n", - " \n", - " \n", - "
\n", - " \"Optimization\n", - "
\n", - " Figure 3. Optimization algorithms visualized over time in 3D space.
(Source: Stanford class CS231n, MIT License, Image credit: Alec Radford)\n", - "
\n", - "\n", - "TensorFlow has many optimization algorithms available for training. This model uses the `tf.keras.optimizers.SGD` that implements the *[stochastic gradient descent](https://developers.google.com/machine-learning/crash-course/glossary#gradient_descent)* (SGD) algorithm. The `learning_rate` sets the step size to take for each iteration down the hill. This is a *hyperparameter* that you'll commonly adjust to achieve better results." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "XkUd6UiZa_dF" - }, - "source": [ - "Let's setup the optimizer:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "8xxi2NNGKwG_" - }, - "outputs": [], - "source": [ - "optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "pJVRZ0hP52ZB" - }, - "source": [ - "We'll use this to calculate a single optimization step:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "rxRNTFVe56RG" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Step: 0, Initial Loss: 3.012593984603882\n", - "Step: 1, Loss: 2.4900705814361572\n" - ] - } - ], - "source": [ - "loss_value, grads = grad(model, features, labels)\n", - "\n", - "print(\n", - " \"Step: {}, Initial Loss: {}\".format(\n", - " optimizer.iterations.numpy(), loss_value.numpy()\n", - " )\n", - ")\n", - "\n", - "optimizer.apply_gradients(zip(grads, model.trainable_variables))\n", - "\n", - "print(\n", - " \"Step: {}, Loss: {}\".format(\n", - " optimizer.iterations.numpy(),\n", - " loss(model, features, labels, training=True).numpy(),\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "7Y2VSELvwAvW" - }, - "source": [ - "### Training loop\n", - "\n", - "With all the pieces in place, the model is ready for training! A training loop feeds the dataset examples into the model to help it make better predictions. The following code block sets up these training steps:\n", - "\n", - "1. Iterate each *epoch*. An epoch is one pass through the dataset.\n", - "2. Within an epoch, iterate over each example in the training `Dataset` grabbing its *features* (`x`) and *label* (`y`).\n", - "3. Using the example's features, make a prediction and compare it with the label. Measure the inaccuracy of the prediction and use that to calculate the model's loss and gradients.\n", - "4. Use an `optimizer` to update the model's variables.\n", - "5. Keep track of some stats for visualization.\n", - "6. Repeat for each epoch.\n", - "\n", - "The `num_epochs` variable is the number of times to loop over the dataset collection. Counter-intuitively, training a model longer does not guarantee a better model. `num_epochs` is a *[hyperparameter](https://developers.google.com/machine-learning/glossary/#hyperparameter)* that you can tune. Choosing the right number usually requires both experience and experimentation:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "AIgulGRUhpto" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 000: Loss: 1.594, Accuracy: 70.000%\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 050: Loss: 0.286, Accuracy: 97.500%\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 100: Loss: 0.172, Accuracy: 97.500%\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 150: Loss: 0.144, Accuracy: 98.333%\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 200: Loss: 0.122, Accuracy: 98.333%\n" - ] - } - ], - "source": [ - "## Note: Rerunning this cell uses the same model variables\n", - "\n", - "# Keep results for plotting\n", - "train_loss_results = []\n", - "train_accuracy_results = []\n", - "\n", - "num_epochs = 201\n", - "\n", - "for epoch in range(num_epochs):\n", - " epoch_loss_avg = tf.keras.metrics.Mean()\n", - " epoch_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()\n", - "\n", - " # Training loop - using batches of 32\n", - " for x, y in train_dataset:\n", - " # Optimize the model\n", - " loss_value, grads = grad(model, x, y)\n", - " optimizer.apply_gradients(zip(grads, model.trainable_variables))\n", - "\n", - " # Track progress\n", - " epoch_loss_avg.update_state(loss_value) # Add current batch loss\n", - " # Compare predicted label to actual label\n", - " # training=True is needed only if there are layers with different\n", - " # behavior during training versus inference (e.g. Dropout).\n", - " epoch_accuracy.update_state(y, model(x, training=True))\n", - "\n", - " # End epoch\n", - " train_loss_results.append(epoch_loss_avg.result())\n", - " train_accuracy_results.append(epoch_accuracy.result())\n", - "\n", - " if epoch % 50 == 0:\n", - " print(\n", - " \"Epoch {:03d}: Loss: {:.3f}, Accuracy: {:.3%}\".format(\n", - " epoch, epoch_loss_avg.result(), epoch_accuracy.result()\n", - " )\n", - " )" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2FQHVUnm_rjw" - }, - "source": [ - "### Visualize the loss function over time" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "j3wdbmtLVTyr" - }, - "source": [ - "While it's helpful to print out the model's training progress, it's often *more* helpful to see this progress. [TensorBoard](https://www.tensorflow.org/tensorboard) is a nice visualization tool that is packaged with TensorFlow, but we can create basic charts using the `matplotlib` module.\n", - "\n", - "Interpreting these charts takes some experience, but you really want to see the *loss* go down and the *accuracy* go up:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "agjvNd2iUGFn" - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "fig, axes = plt.subplots(2, sharex=True, figsize=(12, 8))\n", - "fig.suptitle(\"Training Metrics\")\n", - "\n", - "axes[0].set_ylabel(\"Loss\", fontsize=14)\n", - "axes[0].plot(train_loss_results)\n", - "\n", - "axes[1].set_ylabel(\"Accuracy\", fontsize=14)\n", - "axes[1].set_xlabel(\"Epoch\", fontsize=14)\n", - "axes[1].plot(train_accuracy_results)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Zg8GoMZhLpGH" - }, - "source": [ - "## Evaluate the model's effectiveness\n", - "\n", - "Now that the model is trained, we can get some statistics on its performance.\n", - "\n", - "*Evaluating* means determining how effectively the model makes predictions. To determine the model's effectiveness at Iris classification, pass some sepal and petal measurements to the model and ask the model to predict what Iris species they represent. Then compare the model's predictions against the actual label. For example, a model that picked the correct species on half the input examples has an *[accuracy](https://developers.google.com/machine-learning/glossary/#accuracy)* of `0.5`. Figure 4 shows a slightly more effective model, getting 4 out of 5 predictions correct at 80% accuracy:\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Example featuresLabelModel prediction
5.93.04.31.511
6.93.15.42.122
5.13.31.70.500
6.0 3.4 4.5 1.6 12
5.52.54.01.311
\n", - " Figure 4. An Iris classifier that is 80% accurate.
 \n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "z-EvK7hGL0d8" - }, - "source": [ - "### Setup the test dataset\n", - "\n", - "Evaluating the model is similar to training the model. The biggest difference is the examples come from a separate *[test set](https://developers.google.com/machine-learning/crash-course/glossary#test_set)* rather than the training set. To fairly assess a model's effectiveness, the examples used to evaluate a model must be different from the examples used to train the model.\n", - "\n", - "The setup for the test `Dataset` is similar to the setup for training `Dataset`. Download the CSV text file and parse that values, then give it a little shuffle:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Ps3_9dJ3Lodk" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r", - "8192/573 [============================================================================================================================================================================================================================================================================================================================================================================================================================================] - 0s 0us/step\n" - ] - } - ], - "source": [ - "test_url = (\n", - " \"https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv\"\n", - ")\n", - "\n", - "test_fp = tf.keras.utils.get_file(\n", - " fname=os.path.basename(test_url), origin=test_url\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "SRMWCu30bnxH" - }, - "outputs": [], - "source": [ - "test_dataset = tf.data.experimental.make_csv_dataset(\n", - " test_fp,\n", - " batch_size,\n", - " column_names=column_names,\n", - " label_name=\"species\",\n", - " num_epochs=1,\n", - " shuffle=False,\n", - ")\n", - "\n", - "test_dataset = test_dataset.map(pack_features_vector)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "HFuOKXJdMAdm" - }, - "source": [ - "### Evaluate the model on the test dataset\n", - "\n", - "Unlike the training stage, the model only evaluates a single [epoch](https://developers.google.com/machine-learning/glossary/#epoch) of the test data. In the following code cell, we iterate over each example in the test set and compare the model's prediction against the actual label. This is used to measure the model's accuracy across the entire test set:" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Tw03-MK1cYId" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Test set accuracy: 96.667%\n" - ] - } - ], - "source": [ - "test_accuracy = tf.keras.metrics.Accuracy()\n", - "\n", - "for x, y in test_dataset:\n", - " # training=False is needed only if there are layers with different\n", - " # behavior during training versus inference (e.g. Dropout).\n", - " logits = model(x, training=False)\n", - " prediction = tf.argmax(logits, axis=1, output_type=tf.int32)\n", - " test_accuracy(prediction, y)\n", - "\n", - "print(f\"Test set accuracy: {test_accuracy.result():.3%}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "HcKEZMtCOeK-" - }, - "source": [ - "We can see on the last batch, for example, the model is usually correct:" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "uNwt2eMeOane" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tf.stack([y, prediction], axis=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "7Li2r1tYvW7S" - }, - "source": [ - "## Use the trained model to make predictions\n", - "\n", - "We've trained a model and \"proven\" that it's good—but not perfect—at classifying Iris species. Now let's use the trained model to make some predictions on [unlabeled examples](https://developers.google.com/machine-learning/glossary/#unlabeled_example); that is, on examples that contain features but not a label.\n", - "\n", - "In real-life, the unlabeled examples could come from lots of different sources including apps, CSV files, and data feeds. For now, we're going to manually provide three unlabeled examples to predict their labels. Recall, the label numbers are mapped to a named representation as:\n", - "\n", - "* `0`: Iris setosa\n", - "* `1`: Iris versicolor\n", - "* `2`: Iris virginica" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "kesTS5Lzv-M2" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Example 0 prediction: Iris setosa (98.6%)\n", - "Example 1 prediction: Iris versicolor (96.7%)\n", - "Example 2 prediction: Iris virginica (57.1%)\n" - ] - } - ], - "source": [ - "predict_dataset = tf.convert_to_tensor(\n", - " [\n", - " [\n", - " 5.1,\n", - " 3.3,\n", - " 1.7,\n", - " 0.5,\n", - " ],\n", - " [\n", - " 5.9,\n", - " 3.0,\n", - " 4.2,\n", - " 1.5,\n", - " ],\n", - " [6.9, 3.1, 5.4, 2.1],\n", - " ]\n", - ")\n", - "\n", - "# training=False is needed only if there are layers with different\n", - "# behavior during training versus inference (e.g. Dropout).\n", - "predictions = model(predict_dataset, training=False)\n", - "\n", - "for i, logits in enumerate(predictions):\n", - " class_idx = tf.argmax(logits).numpy()\n", - " p = tf.nn.softmax(logits)[class_idx]\n", - " name = class_names[class_idx]\n", - " print(f\"Example {i} prediction: {name} ({100 * p:4.1f}%)\")" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "custom_training_walkthrough.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true, - "version": "0.3.2" - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.9" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/notebooks/introduction_to_tensorflow/labs/intro_logistic_regression_TF2.0.ipynb b/notebooks/introduction_to_tensorflow/labs/intro_logistic_regression_TF2.0.ipynb deleted file mode 100644 index 1fb08434..00000000 --- a/notebooks/introduction_to_tensorflow/labs/intro_logistic_regression_TF2.0.ipynb +++ /dev/null @@ -1,481 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "04QgGZc9bF5D" - }, - "source": [ - "# Introduction to Logistic Regression Using TF 2.0\n", - "\n", - "**Learning Objectives**\n", - "\n", - "\n", - "1. Build a neural network that classifies images.\n", - "2. Train this neural network.\n", - "3. Evaluate the accuracy of the model.\n", - "\n", - "\n", - "## Introduction \n", - "\n", - "This short introduction uses [Keras](https://keras.io/), a high-level API to build and train models in TensoFlow. In this lab, you Load and prepare the MNIST dataset, convert the samples from integers to floating-point numbers, build and train a neural network that classifies images and the evaluate then accuracy of the model.\n", - "\n", - "Each learning objective will correspond to a __#TODO__ in the [student lab notebook](https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/introduction_to_tensorflow/labs/intro_logistic_regression_TF2.0.ipynb) -- try to complete that notebook first before reviewing this solution notebook." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "nnrWf3PCEzXL" - }, - "source": [ - "## Load necessary libraries \n", - "We will start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "0trJmd6DjqBZ" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TensorFlow version: 2.1.0\n" - ] - } - ], - "source": [ - "import tensorflow as tf\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "7NAbSZiaoJ4z" - }, - "source": [ - "Load and prepare the [MNIST dataset](http://yann.lecun.com/exdb/mnist/). Convert the samples from integers to floating-point numbers:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "7FP5258xjs-v" - }, - "outputs": [], - "source": [ - "mnist = tf.keras.datasets.mnist\n", - "\n", - "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", - "x_train, x_test = x_train / 255.0, x_test / 255.0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "BPZ68wASog_I" - }, - "source": [ - "**Lab Task 1:** Build the `tf.keras.Sequential` model by stacking layers. Choose an optimizer and loss function for training:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "h3IKyzTCDNGo" - }, - "outputs": [], - "source": [ - "model = # TODO 1 -- Your code here." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "l2hiez2eIUz8" - }, - "source": [ - "For each example the model returns a vector of \"[logits](https://developers.google.com/machine-learning/glossary#logits)\" or \"[log-odds](https://developers.google.com/machine-learning/glossary#log-odds)\" scores, one for each class." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "OeOrNdnkEEcR" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Layer flatten_2 is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2. The layer has dtype float32 because it's dtype defaults to floatx.\n", - "\n", - "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n", - "\n", - "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "array([[ 0.06166657, 0.07144614, -0.07372011, 0.3451226 , -0.06205732,\n", - " -0.23894641, -0.00426888, 0.38629198, 0.11753443, 0.21888584]],\n", - " dtype=float32)" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "predictions = model(x_train[:1]).numpy()\n", - "predictions" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "tgjhDQGcIniO" - }, - "source": [ - "The `tf.nn.softmax` function converts these logits to \"probabilities\" for each class: " - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "zWSRnQ0WI5eq" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0.09631761, 0.09726418, 0.0841217 , 0.1278819 , 0.08510853,\n", - " 0.07131012, 0.09017171, 0.1332566 , 0.10185182, 0.11271589]],\n", - " dtype=float32)" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tf.nn.softmax(predictions).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "he5u_okAYS4a" - }, - "source": [ - "Note: It is possible to bake this `tf.nn.softmax` in as the activation function for the last layer of the network. While this can make the model output more directly interpretable, this approach is discouraged as it's impossible to\n", - "provide an exact and numerically stable loss calculation for all models when using a softmax output. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "hQyugpgRIyrA" - }, - "source": [ - "The `losses.SparseCategoricalCrossentropy` loss takes a vector of logits and a `True` index and returns a scalar loss for each example." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Lab Task #2:** Usage of losses.SparseCategoricalCrossentropy with logits vectors and a True index." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "RSkzdv8MD0tT" - }, - "outputs": [], - "source": [ - "loss_fn = # TODO 2 -- Your code here." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "SfR4MsSDU880" - }, - "source": [ - "This loss is equal to the negative log probability of the true class:\n", - "It is zero if the model is sure of the correct class.\n", - "\n", - "This untrained model gives probabilities close to random (1/10 for each class), so the initial loss should be close to `-tf.log(1/10) ~= 2.3`." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "NJWqEVrrJ7ZB" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "2.6407173" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "loss_fn(y_train[:1], predictions).numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "9foNKHzTD2Vo" - }, - "outputs": [], - "source": [ - "model.compile(optimizer=\"adam\", loss=loss_fn, metrics=[\"accuracy\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ix4mEL65on-w" - }, - "source": [ - "The `Model.fit` method adjusts the model parameters to minimize the loss: " - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "y7suUbJXVLqP" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train on 60000 samples\n", - "Epoch 1/5\n", - "60000/60000 [==============================] - 4s 74us/sample - loss: 0.2948 - accuracy: 0.9159\n", - "Epoch 2/5\n", - "60000/60000 [==============================] - 4s 68us/sample - loss: 0.1449 - accuracy: 0.9575\n", - "Epoch 3/5\n", - "60000/60000 [==============================] - 4s 67us/sample - loss: 0.1086 - accuracy: 0.9669\n", - "Epoch 4/5\n", - "60000/60000 [==============================] - 4s 67us/sample - loss: 0.0890 - accuracy: 0.9722\n", - "Epoch 5/5\n", - "60000/60000 [==============================] - 4s 67us/sample - loss: 0.0760 - accuracy: 0.9761\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.fit(x_train, y_train, epochs=5)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "4mDAAPFqVVgn" - }, - "source": [ - "The `Model.evaluate` method checks the models performance, usually on a \"[Validation-set](https://developers.google.com/machine-learning/glossary#validation-set)\" or \"[Test-set](https://developers.google.com/machine-learning/glossary#test-set)\"." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "F7dTAzgHDUh7" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10000/10000 - 0s - loss: 0.0789 - accuracy: 0.9762\n" - ] - }, - { - "data": { - "text/plain": [ - "[0.07894639570089057, 0.9762]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model.evaluate(x_test, y_test, verbose=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "T4JfEh7kvx6m" - }, - "source": [ - "The image classifier is now trained to ~98% accuracy on this dataset. To learn more, read the [TensorFlow tutorials](https://www.tensorflow.org/tutorials/)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Aj8NrlzlJqDG" - }, - "source": [ - "If you want your model to return a probability, you can wrap the trained model, and attach the softmax to it:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "rYb6DrEH0GMv" - }, - "outputs": [], - "source": [ - "probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "cnqOZtUp1YR_" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "probability_model(x_test[:5])" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "rX8mhOLljYeM" - ], - "name": "beginner.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/labs/load_diff_filedata.ipynb b/notebooks/introduction_to_tensorflow/labs/load_diff_filedata.ipynb deleted file mode 100644 index ddaf1a5d..00000000 --- a/notebooks/introduction_to_tensorflow/labs/load_diff_filedata.ipynb +++ /dev/null @@ -1,1327 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "sUtoed20cRJJ" - }, - "source": [ - "# How to Load CSV and Numpy File Types in TensorFlow 2.0\n", - "\n", - "\n", - "\n", - "## Learning Objectives\n", - "\n", - "1. Load a CSV file into a `tf.data.Dataset`. \n", - "2. Load Numpy data\n", - "\n", - "\n", - "\n", - "\n", - "## Introduction \n", - "\n", - "In this lab, you load CSV data from a file into a `tf.data.Dataset`. This tutorial provides an example of loading data from NumPy arrays into a `tf.data.Dataset` you also load text data.\n", - "\n", - "Each learning objective will correspond to a __#TODO__ in the [student lab notebook](https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/ml_on_gcloud_v2/labs/03_load_diff_filedata.ipynb) -- try to complete that notebook first before reviewing this solution notebook." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "fgZ9gjmPfSnK" - }, - "source": [ - "## Load necessary libraries \n", - "We will start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "baYFZMW_bJHh" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TensorFlow version: 2.3.0-dev20200613\n" - ] - } - ], - "source": [ - "import functools\n", - "\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Ncf5t6tgL5ZI" - }, - "outputs": [], - "source": [ - "TRAIN_DATA_URL = \"https://storage.googleapis.com/tf-datasets/titanic/train.csv\"\n", - "TEST_DATA_URL = \"https://storage.googleapis.com/tf-datasets/titanic/eval.csv\"\n", - "\n", - "train_file_path = tf.keras.utils.get_file(\"train.csv\", TRAIN_DATA_URL)\n", - "test_file_path = tf.keras.utils.get_file(\"eval.csv\", TEST_DATA_URL)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "4ONE94qulk6S" - }, - "outputs": [], - "source": [ - "# Make numpy values easier to read.\n", - "np.set_printoptions(precision=3, suppress=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Wuqj601Qw0Ml" - }, - "source": [ - "## Load data\n", - "\n", - "This section provides an example of how to load CSV data from a file into a `tf.data.Dataset`. The data used in this tutorial are taken from the Titanic passenger list. The model will predict the likelihood a passenger survived based on characteristics like age, gender, ticket class, and whether the person was traveling alone.\n", - "\n", - "To start, let's look at the top of the CSV file to see how it is formatted." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "54Dv7mCrf9Yw" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone\n", - "0,male,22.0,1,0,7.25,Third,unknown,Southampton,n\n", - "1,female,38.0,1,0,71.2833,First,C,Cherbourg,n\n", - "1,female,26.0,0,0,7.925,Third,unknown,Southampton,y\n", - "1,female,35.0,1,0,53.1,First,C,Southampton,n\n", - "0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y\n", - "0,male,2.0,3,1,21.075,Third,unknown,Southampton,n\n", - "1,female,27.0,0,2,11.1333,Third,unknown,Southampton,n\n", - "1,female,14.0,1,0,30.0708,Second,unknown,Cherbourg,n\n", - "1,female,4.0,1,1,16.7,Third,G,Southampton,n\n" - ] - } - ], - "source": [ - "!head {train_file_path}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "jC9lRhV-q_R3" - }, - "source": [ - "You can [load this using pandas](pandas_dataframe.ipynb), and pass the NumPy arrays to TensorFlow. If you need to scale up to a large set of files, or need a loader that integrates with [TensorFlow and tf.data](../../guide/data.ipynb) then use the `tf.data.experimental.make_csv_dataset` function:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "67mfwr4v-mN_" - }, - "source": [ - "The only column you need to identify explicitly is the one with the value that the model is intended to predict. " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "iXROZm5f3V4E" - }, - "outputs": [], - "source": [ - "# TODO 1: Add string name for label column\n", - "LABEL_COLUMN = \"\"\n", - "LABELS = []" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "t4N-plO4tDXd" - }, - "source": [ - "Now read the CSV data from the file and create a dataset. \n", - "\n", - "(For the full documentation, see `tf.data.experimental.make_csv_dataset`)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "yIbUscB9sqha" - }, - "outputs": [], - "source": [ - "def get_dataset(file_path, **kwargs):\n", - "# TODO 2\n", - "# TODO: Read the CSV data from the file and create a dataset \n", - "dataset = tf.data.experimental.make_csv_dataset( \n", - "# TODO: Your code goes here.\n", - "# TODO: Your code goes here.\n", - "# TODO: Your code goes here.\n", - "# TODO: Your code goes here.\n", - ") \n", - " return dataset\n", - "\n", - "raw_train_data = # TODO: Your code goes here.\n", - "raw_test_data = # TODO: Your code goes here." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "v4oMO9MIxgTG" - }, - "outputs": [], - "source": [ - "def show_batch(dataset):\n", - " for batch, label in dataset.take(1):\n", - " for key, value in batch.items():\n", - " print(f\"{key:20s}: {value.numpy()}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "vHUQFKoQI6G7" - }, - "source": [ - "Each item in the dataset is a batch, represented as a tuple of (*many examples*, *many labels*). The data from the examples is organized in column-based tensors (rather than row-based tensors), each with as many elements as the batch size (5 in this case).\n", - "\n", - "It might help to see this yourself." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "HjrkJROoxoll" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sex : [b'male' b'male' b'male' b'male' b'male']\n", - "age : [34. 18. 45. 46. 29.]\n", - "n_siblings_spouses : [1 0 1 1 1]\n", - "parch : [0 0 0 0 0]\n", - "fare : [26. 8.3 83.475 61.175 7.046]\n", - "class : [b'Second' b'Third' b'First' b'First' b'Third']\n", - "deck : [b'unknown' b'unknown' b'C' b'E' b'unknown']\n", - "embark_town : [b'Southampton' b'Southampton' b'Southampton' b'Southampton'\n", - " b'Southampton']\n", - "alone : [b'n' b'y' b'n' b'n' b'n']\n" - ] - } - ], - "source": [ - "show_batch(raw_train_data)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "YOYKQKmMj3D6" - }, - "source": [ - "As you can see, the columns in the CSV are named. The dataset constructor will pick these names up automatically. If the file you are working with does not contain the column names in the first line, pass them in a list of strings to the `column_names` argument in the `make_csv_dataset` function." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "2Av8_9L3tUg1" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sex : [b'male' b'female' b'male' b'male' b'male']\n", - "age : [30. 50. 18. 51. 28.]\n", - "n_siblings_spouses : [1 0 1 0 0]\n", - "parch : [0 1 1 0 0]\n", - "fare : [ 16.1 247.521 7.854 8.05 7.05 ]\n", - "class : [b'Third' b'First' b'Third' b'Third' b'Third']\n", - "deck : [b'unknown' b'B' b'unknown' b'unknown' b'unknown']\n", - "embark_town : [b'Southampton' b'Cherbourg' b'Southampton' b'Southampton' b'Southampton']\n", - "alone : [b'n' b'n' b'n' b'y' b'y']\n" - ] - } - ], - "source": [ - "CSV_COLUMNS = [\n", - " \"survived\",\n", - " \"sex\",\n", - " \"age\",\n", - " \"n_siblings_spouses\",\n", - " \"parch\",\n", - " \"fare\",\n", - " \"class\",\n", - " \"deck\",\n", - " \"embark_town\",\n", - " \"alone\",\n", - "]\n", - "\n", - "temp_dataset = get_dataset(train_file_path, column_names=CSV_COLUMNS)\n", - "\n", - "show_batch(temp_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "gZfhoX7bR9u4" - }, - "source": [ - "This example is going to use all the available columns. If you need to omit some columns from the dataset, create a list of just the columns you plan to use, and pass it into the (optional) `select_columns` argument of the constructor.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "S1TzSkUKwsNP" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "age : [28. 34. 28. 50. 2.]\n", - "n_siblings_spouses : [0 0 0 2 1]\n", - "class : [b'Third' b'First' b'Third' b'First' b'Second']\n", - "deck : [b'unknown' b'unknown' b'unknown' b'unknown' b'unknown']\n", - "alone : [b'y' b'y' b'y' b'n' b'n']\n" - ] - } - ], - "source": [ - "SELECT_COLUMNS = [\n", - " \"survived\",\n", - " \"age\",\n", - " \"n_siblings_spouses\",\n", - " \"class\",\n", - " \"deck\",\n", - " \"alone\",\n", - "]\n", - "\n", - "temp_dataset = get_dataset(train_file_path, select_columns=SELECT_COLUMNS)\n", - "\n", - "show_batch(temp_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "9cryz31lxs3e" - }, - "source": [ - "## Data preprocessing\n", - "\n", - "A CSV file can contain a variety of data types. Typically you want to convert from those mixed types to a fixed length vector before feeding the data into your model.\n", - "\n", - "TensorFlow has a built-in system for describing common input conversions: `tf.feature_column`, see [this tutorial](../keras/feature_columns) for details.\n", - "\n", - "\n", - "You can preprocess your data using any tool you like (like [nltk](https://www.nltk.org/) or [sklearn](https://scikit-learn.org/stable/)), and just pass the processed output to TensorFlow. \n", - "\n", - "\n", - "The primary advantage of doing the preprocessing inside your model is that when you export the model it includes the preprocessing. This way you can pass the raw data directly to your model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "9AsbaFmCeJtF" - }, - "source": [ - "### Continuous data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Xl0Q0DcfA_rt" - }, - "source": [ - "If your data is already in an appropriate numeric format, you can pack the data into a vector before passing it off to the model:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "4Yfji3J5BMxz" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "age : [28. 32.5 28. 32. 28. ]\n", - "n_siblings_spouses : [0. 1. 0. 0. 0.]\n", - "parch : [0. 0. 0. 0. 0.]\n", - "fare : [26.55 30.071 7.829 13. 7.75 ]\n" - ] - } - ], - "source": [ - "SELECT_COLUMNS = [\"survived\", \"age\", \"n_siblings_spouses\", \"parch\", \"fare\"]\n", - "DEFAULTS = [0, 0.0, 0.0, 0.0, 0.0]\n", - "temp_dataset = get_dataset(\n", - " train_file_path, select_columns=SELECT_COLUMNS, column_defaults=DEFAULTS\n", - ")\n", - "\n", - "show_batch(temp_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "zEUhI8kZCfq8" - }, - "outputs": [], - "source": [ - "example_batch, labels_batch = next(iter(temp_dataset))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "IP45_2FbEKzn" - }, - "source": [ - "Here's a simple function that will pack together all the columns:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "JQ0hNSL8CC3a" - }, - "outputs": [], - "source": [ - "def pack(features, label):\n", - " return tf.stack(list(features.values()), axis=-1), label" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "75LA9DisEIoE" - }, - "source": [ - "Apply this to each element of the dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "VnP2Z2lwCTRl" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:AutoGraph could not transform and will run it as-is.\n", - "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", - "Cause: 'arguments' object has no attribute 'posonlyargs'\n", - "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n", - "WARNING: AutoGraph could not transform and will run it as-is.\n", - "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", - "Cause: 'arguments' object has no attribute 'posonlyargs'\n", - "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n", - "[[ 18. 1. 0. 108.9 ]\n", - " [ 31. 0. 0. 50.496]\n", - " [ 70. 1. 1. 71. ]\n", - " [ 24. 1. 0. 16.1 ]\n", - " [ 31. 1. 1. 37.004]]\n", - "\n", - "[0 0 0 0 0]\n" - ] - } - ], - "source": [ - "packed_dataset = temp_dataset.map(pack)\n", - "\n", - "for features, labels in packed_dataset.take(1):\n", - " print(features.numpy())\n", - " print()\n", - " print(labels.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "1VBvmaFrFU6J" - }, - "source": [ - "If you have mixed datatypes you may want to separate out these simple-numeric fields. The `tf.feature_column` api can handle them, but this incurs some overhead and should be avoided unless really necessary. Switch back to the mixed dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "ad-IQ_JPFQge" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sex : [b'male' b'female' b'male' b'male' b'male']\n", - "age : [18. 28. 28. 28. 28.]\n", - "n_siblings_spouses : [0 0 0 3 1]\n", - "parch : [0 0 0 1 1]\n", - "fare : [ 7.75 7.879 7.75 25.467 15.246]\n", - "class : [b'Third' b'Third' b'Third' b'Third' b'Third']\n", - "deck : [b'unknown' b'unknown' b'unknown' b'unknown' b'unknown']\n", - "embark_town : [b'Southampton' b'Queenstown' b'Queenstown' b'Southampton' b'Cherbourg']\n", - "alone : [b'y' b'y' b'y' b'n' b'n']\n" - ] - } - ], - "source": [ - "show_batch(raw_train_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "HSrYNKKcIdav" - }, - "outputs": [], - "source": [ - "example_batch, labels_batch = next(iter(temp_dataset))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "p5VtThKfGPaQ" - }, - "source": [ - "So define a more general preprocessor that selects a list of numeric features and packs them into a single column:" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "5DRishYYGS-m" - }, - "outputs": [], - "source": [ - "class PackNumericFeatures:\n", - " def __init__(self, names):\n", - " self.names = names\n", - "\n", - " def __call__(self, features, labels):\n", - " numeric_features = [features.pop(name) for name in self.names]\n", - " numeric_features = [\n", - " tf.cast(feat, tf.float32) for feat in numeric_features\n", - " ]\n", - " numeric_features = tf.stack(numeric_features, axis=-1)\n", - " features[\"numeric\"] = numeric_features\n", - "\n", - " return features, labels" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "1SeZka9AHfqD" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:AutoGraph could not transform <__main__.PackNumericFeatures object at 0x7f52c06f77b8> and will run it as-is.\n", - "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", - "Cause: module 'gast' has no attribute 'Constant'\n", - "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n", - "WARNING: AutoGraph could not transform <__main__.PackNumericFeatures object at 0x7f52c06f77b8> and will run it as-is.\n", - "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", - "Cause: module 'gast' has no attribute 'Constant'\n", - "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n", - "WARNING:tensorflow:AutoGraph could not transform <__main__.PackNumericFeatures object at 0x7f52c06f7438> and will run it as-is.\n", - "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", - "Cause: module 'gast' has no attribute 'Constant'\n", - "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n", - "WARNING: AutoGraph could not transform <__main__.PackNumericFeatures object at 0x7f52c06f7438> and will run it as-is.\n", - "Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.\n", - "Cause: module 'gast' has no attribute 'Constant'\n", - "To silence this warning, decorate the function with @tf.autograph.experimental.do_not_convert\n" - ] - } - ], - "source": [ - "NUMERIC_FEATURES = [\"age\", \"n_siblings_spouses\", \"parch\", \"fare\"]\n", - "\n", - "packed_train_data = raw_train_data.map(PackNumericFeatures(NUMERIC_FEATURES))\n", - "\n", - "packed_test_data = raw_test_data.map(PackNumericFeatures(NUMERIC_FEATURES))" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "wFrw0YobIbUB" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sex : [b'male' b'male' b'male' b'female' b'male']\n", - "class : [b'Third' b'Second' b'Third' b'First' b'First']\n", - "deck : [b'unknown' b'unknown' b'unknown' b'B' b'B']\n", - "embark_town : [b'Southampton' b'Southampton' b'Southampton' b'Southampton' b'Cherbourg']\n", - "alone : [b'n' b'y' b'y' b'n' b'y']\n", - "numeric : [[ 4. 4. 2. 31.275]\n", - " [16. 0. 0. 26. ]\n", - " [25. 0. 0. 7.05 ]\n", - " [36. 0. 2. 71. ]\n", - " [32. 0. 0. 30.5 ]]\n" - ] - } - ], - "source": [ - "show_batch(packed_train_data)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "_EPUS8fPLUb1" - }, - "outputs": [], - "source": [ - "example_batch, labels_batch = next(iter(packed_train_data))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "o2maE8d2ijsq" - }, - "source": [ - "#### Data Normalization\n", - "\n", - "Continuous data should always be normalized." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "WKT1ASWpwH46" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agen_siblings_spousesparchfare
count627.000000627.000000627.000000627.000000
mean29.6313080.5454550.37958534.385399
std12.5118181.1510900.79299954.597730
min0.7500000.0000000.0000000.000000
25%23.0000000.0000000.0000007.895800
50%28.0000000.0000000.00000015.045800
75%35.0000001.0000000.00000031.387500
max80.0000008.0000005.000000512.329200
\n", - "
" - ], - "text/plain": [ - " age n_siblings_spouses parch fare\n", - "count 627.000000 627.000000 627.000000 627.000000\n", - "mean 29.631308 0.545455 0.379585 34.385399\n", - "std 12.511818 1.151090 0.792999 54.597730\n", - "min 0.750000 0.000000 0.000000 0.000000\n", - "25% 23.000000 0.000000 0.000000 7.895800\n", - "50% 28.000000 0.000000 0.000000 15.045800\n", - "75% 35.000000 1.000000 0.000000 31.387500\n", - "max 80.000000 8.000000 5.000000 512.329200" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "desc = pd.read_csv(train_file_path)[NUMERIC_FEATURES].describe()\n", - "desc" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "cHHstcKPsMXM" - }, - "outputs": [], - "source": [ - "# TODO 1\n", - "MEAN = # TODO: Your code goes here.\n", - "STD = # TODO: Your code goes here." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "REKqO_xHPNx0" - }, - "outputs": [], - "source": [ - "def normalize_numeric_data(data, mean, std):\n", - " # Center the data\n", - " # TODO 2" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[29.631 0.545 0.38 34.385] [12.512 1.151 0.793 54.598]\n" - ] - } - ], - "source": [ - "print(MEAN, STD)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "VPsoMUgRCpUM" - }, - "source": [ - "Now create a numeric column. The `tf.feature_columns.numeric_column` API accepts a `normalizer_fn` argument, which will be run on each batch.\n", - "\n", - "Bind the `MEAN` and `STD` to the normalizer fn using [`functools.partial`](https://docs.python.org/3/library/functools.html#functools.partial)." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Bw0I35xRS57V" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "NumericColumn(key='numeric', shape=(4,), default_value=None, dtype=tf.float32, normalizer_fn=functools.partial(, mean=array([29.631, 0.545, 0.38 , 34.385]), std=array([12.512, 1.151, 0.793, 54.598])))" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# See what you just created.\n", - "normalizer = functools.partial(normalize_numeric_data, mean=MEAN, std=STD)\n", - "\n", - "numeric_column = tf.feature_column.numeric_column(\n", - " \"numeric\", normalizer_fn=normalizer, shape=[len(NUMERIC_FEATURES)]\n", - ")\n", - "numeric_columns = [numeric_column]\n", - "numeric_column" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "HZxcHXc6LCa7" - }, - "source": [ - "When you train the model, include this feature column to select and center this block of numeric data:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "b61NM76Ot_kb" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "example_batch[\"numeric\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "j-r_4EAJAZoI" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[-2.208, 2.132, 0.782, -0.244],\n", - " [-0.13 , 0.395, -0.479, -0.335],\n", - " [-0.13 , 0.395, -0.479, -0.264],\n", - " [-1.089, -0.474, 0.782, 0.092],\n", - " [-0.45 , 1.264, -0.479, -0.187]], dtype=float32)" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)\n", - "numeric_layer(example_batch).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "M37oD2VcCO4R" - }, - "source": [ - "The mean based normalization used here requires knowing the means of each column ahead of time." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "tSyrkSQwYHKi" - }, - "source": [ - "### Categorical data\n", - "\n", - "Some of the columns in the CSV data are categorical columns. That is, the content should be one of a limited set of options.\n", - "\n", - "Use the `tf.feature_column` API to create a collection with a `tf.feature_column.indicator_column` for each categorical column.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "mWDniduKMw-C" - }, - "outputs": [], - "source": [ - "CATEGORIES = {\n", - " \"sex\": [\"male\", \"female\"],\n", - " \"class\": [\"First\", \"Second\", \"Third\"],\n", - " \"deck\": [\"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\", \"H\", \"I\", \"J\"],\n", - " \"embark_town\": [\"Cherbourg\", \"Southhampton\", \"Queenstown\"],\n", - " \"alone\": [\"y\", \"n\"],\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "kkxLdrsLwHPT" - }, - "outputs": [], - "source": [ - "categorical_columns = []\n", - "for feature, vocab in CATEGORIES.items():\n", - " cat_col = tf.feature_column.categorical_column_with_vocabulary_list(\n", - " key=feature, vocabulary_list=vocab\n", - " )\n", - " categorical_columns.append(tf.feature_column.indicator_column(cat_col))" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "H18CxpHY_Nma" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='class', vocabulary_list=('First', 'Second', 'Third'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),\n", - " IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Cherbourg', 'Southhampton', 'Queenstown'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),\n", - " IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='deck', vocabulary_list=('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),\n", - " IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0)),\n", - " IndicatorColumn(categorical_column=VocabularyListCategoricalColumn(key='alone', vocabulary_list=('y', 'n'), dtype=tf.string, default_value=-1, num_oov_buckets=0))]" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# See what you just created.\n", - "categorical_columns" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "p7mACuOsArUH" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]\n" - ] - } - ], - "source": [ - "categorical_layer = tf.keras.layers.DenseFeatures(categorical_columns)\n", - "print(categorical_layer(example_batch).numpy()[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "R7-1QG99_1sN" - }, - "source": [ - "This will be become part of a data processing input later when you build the model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "kPWkC4_1l3IG" - }, - "source": [ - "### Combined preprocessing layer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "R3QAjo1qD4p9" - }, - "source": [ - "Add the two feature column collections and pass them to a `tf.keras.layers.DenseFeatures` to create an input layer that will extract and preprocess both input types:" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3-OYK7GnaH0r" - }, - "outputs": [], - "source": [ - "# TODO 1\n", - "preprocessing_layer = # TODO: Your code goes here." - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "m7_U_K0UMSVS" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[ 0. 1. 0. 0. 1. 0. 0. 0. 0. 0.\n", - " 0. 0. 0. 0. 0. 0. 0. 0. -2.208 2.132\n", - " 0.782 -0.244 1. 0. ]\n" - ] - } - ], - "source": [ - "print(preprocessing_layer(example_batch).numpy()[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "DlF_omQqtnOP" - }, - "source": [ - "### Next Step\n", - "\n", - "A next step would be to build a build a `tf.keras.Sequential`, starting with the `preprocessing_layer`, which is beyond the scope of this lab. We will cover the Keras Sequential API in the next Lesson." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Load NumPy data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load necessary libraries \n", - "First, restart the Kernel. Then, we will start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TensorFlow version: 2.3.0-dev20200613\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "import tensorflow as tf\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Load data from `.npz` file\n", - "\n", - "We use the MNIST dataset in Keras." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "DATA_URL = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz'\n", - "\n", - "path = tf.keras.utils.get_file('mnist.npz', DATA_URL)\n", - "with np.load(path) as data:\n", - "# TODO 1\n", - " train_examples = # TODO: Your code goes here.\n", - " train_labels = # TODO: Your code goes here.\n", - " test_examples = # TODO: Your code goes here.\n", - " test_labels = # TODO: Your code goes here." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load NumPy arrays with `tf.data.Dataset`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Assuming you have an array of examples and a corresponding array of labels, pass the two arrays as a tuple into `tf.data.Dataset.from_tensor_slices` to create a `tf.data.Dataset`." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO 2\n", - "train_dataset = # TODO: Your code goes here.\n", - "test_dataset = # TODO: Your code goes here." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Next Step\n", - "\n", - "A next step would be to build a build a `tf.keras.Sequential`, starting with the `preprocessing_layer`, which is beyond the scope of this lab. We will cover the Keras Sequential API in the next Lesson." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Resources \n", - "1. Load text data - this link: https://www.tensorflow.org/tutorials/load_data/text\n", - "2. TF.text - this link: https://www.tensorflow.org/tutorials/tensorflow_text/intro\n", - "3. Load image daeta - https://www.tensorflow.org/tutorials/load_data/images\n", - "4. Read data into a Pandas DataFrame - https://www.tensorflow.org/tutorials/load_data/pandas_dataframe\n", - "5. How to represent Unicode strings in TensorFlow - https://www.tensorflow.org/tutorials/load_data/unicode\n", - "6. TFRecord and tf.Example - https://www.tensorflow.org/tutorials/load_data/tfrecord " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright 2020 Google Inc.\n", - "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", - "http://www.apache.org/licenses/LICENSE-2.0\n", - "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "csv.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/labs/load_images_tf.data.ipynb b/notebooks/introduction_to_tensorflow/labs/load_images_tf.data.ipynb deleted file mode 100644 index 0af2c94b..00000000 --- a/notebooks/introduction_to_tensorflow/labs/load_images_tf.data.ipynb +++ /dev/null @@ -1,744 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ucMoYase6URl" - }, - "source": [ - "# Loading Images Using tf.Data.Dataset\n", - "\n", - "**Learning Objectives**\n", - "\n", - "1. Retrieve Images using tf.keras.utils.get_file\n", - "2. Load Images using Keras Pre-Processing\n", - "3. Load Images using tf.Data.Dataset\n", - "4. Understand basic Methods for Training\n", - "\n", - "## Introduction \n", - "\n", - "In this notebook, we load an image dataset using tf.data. The dataset used in this example is distributed as directories of images, with one class of image per directory.\n", - "\n", - "\n", - "Each learning objective will correspond to a **#TODO** in the [student lab notebook](https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/introduction_to_tensorflow/solutions/load_images_tf.data.ipynb) -- try to complete that notebook first before reviewing this solution notebook." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "hoQQiZDB6URn" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3vhAMaIOBIee" - }, - "source": [ - "## Load necessary libraries \n", - "We will start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "gIksPgtT8B6B" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TensorFlow version: 2.3.0-dev20200613\n" - ] - } - ], - "source": [ - "import os\n", - "\n", - "import IPython.display as display\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "from PIL import Image\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "KT6CcaqgQewg" - }, - "outputs": [], - "source": [ - "AUTOTUNE = tf.data.experimental.AUTOTUNE" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "wO0InzL66URu" - }, - "source": [ - "### Retrieve the images\n", - "\n", - "Before you start any training, you will need a set of images to teach the network about the new classes you want to recognize. You can use an archive of creative-commons licensed flower photos from Google.\n", - "\n", - "Note: all images are licensed CC-BY, creators are listed in the `LICENSE.txt` file." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "rN-Pc6Zd6awg" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\n", - "228818944/228813984 [==============================] - 2s 0us/step\n" - ] - } - ], - "source": [ - "import pathlib\n", - "\n", - "data_dir = tf.keras.utils.get_file(\n", - " origin=\"https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\",\n", - " fname=\"flower_photos\",\n", - " untar=True,\n", - ")\n", - "data_dir = pathlib.Path(data_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "rFkFK74oO--g" - }, - "source": [ - "After downloading (218MB), you should now have a copy of the flower photos available.\n", - "\n", - "The directory contains 5 sub-directories, one per class:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "QhewYCxhXQBX" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "3670" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "image_count = len(list(data_dir.glob(\"*/*.jpg\")))\n", - "image_count" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "sJ1HKKdR4A7c" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['daisy', 'tulips', 'roses', 'dandelion', 'sunflowers'],\n", - " dtype='" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "roses = list(data_dir.glob(\"roses/*\"))\n", - "\n", - "for image_path in roses[:3]:\n", - " display.display(Image.open(str(image_path)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "6jobDTUs8Wxu" - }, - "source": [ - "## Load using `keras.preprocessing`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ehhW308g8soJ" - }, - "source": [ - "A simple way to load images is to use `tf.keras.preprocessing`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Lab Task #1:** load your images using tf.keras.preprocessing." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "syDdF_LWVrWE" - }, - "outputs": [], - "source": [ - "# The 1./255 is to convert from uint8 to float32 in range [0,1].\n", - "# TODO 1a\n", - "# TODO -- Your code here." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "lAmtzsnjDNhB" - }, - "source": [ - "Define some parameters for the loader:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "1zf695or-Flq" - }, - "outputs": [], - "source": [ - "BATCH_SIZE = 32\n", - "IMG_HEIGHT = 224\n", - "IMG_WIDTH = 224\n", - "STEPS_PER_EPOCH = np.ceil(image_count / BATCH_SIZE)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Pw94ajOOVrWI" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found 3670 images belonging to 5 classes.\n" - ] - } - ], - "source": [ - "train_data_gen = image_generator.flow_from_directory(\n", - " directory=str(data_dir),\n", - " batch_size=BATCH_SIZE,\n", - " shuffle=True,\n", - " target_size=(IMG_HEIGHT, IMG_WIDTH),\n", - " classes=list(CLASS_NAMES),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2ZgIZeXaDUsF" - }, - "source": [ - "Inspect a batch for image processing:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "nLp0XVG_Vgi2" - }, - "outputs": [], - "source": [ - "def show_batch(image_batch, label_batch):\n", - " plt.figure(figsize=(10,10))\n", - " for n in range(25):\n", - " # TODO 1b\n", - " ax = # TODO -- Your code here.\n", - " # TODO -- Your code here.\n", - " plt.title(CLASS_NAMES[label_batch[n]==1][0].title())\n", - " plt.axis('off')" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "suh6Sjv68rY3" - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "image_batch, label_batch = next(train_data_gen)\n", - "show_batch(image_batch, label_batch)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "AxS1cLzM8mEp" - }, - "source": [ - "## Load using `tf.data`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Ylj9fgkamgWZ" - }, - "source": [ - "The above `keras.preprocessing` method is convienient, but has three downsides: \n", - "\n", - "1. It's slow. See the performance section below.\n", - "1. It lacks fine-grained control.\n", - "1. It is not well integrated with the rest of TensorFlow." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "IIG5CPaULegg" - }, - "source": [ - "To load the files as a `tf.data.Dataset` first create a dataset of the file paths:" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "lAkQp5uxoINu" - }, - "outputs": [], - "source": [ - "list_ds = tf.data.Dataset.list_files(str(data_dir / \"*/*\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "coORvEH-NGwc" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "b'/home/jupyter/.keras/datasets/flower_photos/tulips/2351637471_5dd34fd3ac_n.jpg'\n", - "b'/home/jupyter/.keras/datasets/flower_photos/tulips/16711791713_e54bc9c1af_n.jpg'\n", - "b'/home/jupyter/.keras/datasets/flower_photos/roses/229488796_21ac6ee16d_n.jpg'\n", - "b'/home/jupyter/.keras/datasets/flower_photos/roses/2535466393_6556afeb2f_m.jpg'\n", - "b'/home/jupyter/.keras/datasets/flower_photos/dandelion/14070457521_8eb41f65fa.jpg'\n" - ] - } - ], - "source": [ - "for f in list_ds.take(5):\n", - " print(f.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "91CPfUUJ_8SZ" - }, - "source": [ - "**Lab Task #2:** Write a short pure-tensorflow function that converts a file path to an `(img, label)` pair:" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "arSQzIey-4D4" - }, - "outputs": [], - "source": [ - "def get_label(file_path):\n", - " # TODO 2a\n", - " # convert the path to a list of path components\n", - " # TODO -- Your code here.\n", - " # The second to last is the class-directory\n", - " # TODO -- Your code here." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "MGlq4IP4Aktb" - }, - "outputs": [], - "source": [ - "def decode_img(img):\n", - " # TODO 2b\n", - " # convert the compressed string to a 3D uint8 tensor\n", - " # TODO -- Your code here.\n", - " # Use `convert_image_dtype` to convert to floats in the [0,1] range.\n", - " # TODO -- Your code here.\n", - " # resize the image to the desired size.\n", - " return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "-xhBRgvNqRRe" - }, - "outputs": [], - "source": [ - "def process_path(file_path):\n", - " label = get_label(file_path)\n", - " # TODO 2c\n", - " # load the raw data from the file as a string\n", - " # TODO -- Your code here.\n", - " img = decode_img(img)\n", - " return img, label" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "S9a5GpsUOBx8" - }, - "source": [ - "Use `Dataset.map` to create a dataset of `image, label` pairs:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3SDhbo8lOBQv" - }, - "outputs": [], - "source": [ - "# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.\n", - "labeled_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "kxrl0lGdnpRz" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Image shape: (224, 224, 3)\n", - "Label: [False False False True False]\n" - ] - } - ], - "source": [ - "for image, label in labeled_ds.take(1):\n", - " print(\"Image shape: \", image.numpy().shape)\n", - " print(\"Label: \", label.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "vYGCgJuR_9Qp" - }, - "source": [ - "### Next Steps: Basic methods for training" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "wwZavzgsIytz" - }, - "source": [ - "To train a model with this dataset you will want the data:\n", - "\n", - "* To be well shuffled.\n", - "* To be batched.\n", - "* Batches to be available as soon as possible.\n", - "\n", - "These features can be easily added using the `tf.data` api." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Lab Task #3:** Adding features using the tf.data api. " - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "uZmZJx8ePw_5" - }, - "outputs": [], - "source": [ - "def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):\n", - " # This is a small dataset, only load it once, and keep it in memory.\n", - " # use `.cache(filename)` to cache preprocessing work for datasets that don't\n", - " # fit in memory.\n", - " # TODO 3a\n", - " if cache:\n", - " if isinstance(cache, str):\n", - " ds = ds.cache(cache)\n", - " else:\n", - " ds = ds.cache()\n", - "\n", - " # TODO -- Your code here.\n", - "\n", - " # Repeat forever\n", - " ds = ds.repeat()\n", - "\n", - " ds = ds.batch(BATCH_SIZE)\n", - "\n", - " # `prefetch` lets the dataset fetch batches in the background while the model\n", - " # is training.\n", - " ds = ds.prefetch(buffer_size=AUTOTUNE)\n", - "\n", - " return ds" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "-YKnrfAeZV10" - }, - "outputs": [], - "source": [ - "train_ds = prepare_for_training(labeled_ds)\n", - "\n", - "image_batch, label_batch = next(iter(train_ds))" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "UN_Dnl72YNIj" - }, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "show_batch(image_batch.numpy(), label_batch.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright 2020 Google Inc.\n", - "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", - "http://www.apache.org/licenses/LICENSE-2.0\n", - "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "images.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/labs/tfrecord-tf.example.ipynb b/notebooks/introduction_to_tensorflow/labs/tfrecord-tf.example.ipynb deleted file mode 100644 index 972292ff..00000000 --- a/notebooks/introduction_to_tensorflow/labs/tfrecord-tf.example.ipynb +++ /dev/null @@ -1,1670 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "3pkUd_9IZCFO" - }, - "source": [ - "# TFRecord and tf.Example\n", - "\n", - "**Learning Objectives**\n", - "\n", - "1. Understand the TFRecord format for storing data\n", - "2. Understand the tf.Example message type\n", - "3. Read and Write a TFRecord file\n", - "\n", - "\n", - "## Introduction \n", - "\n", - "In this notebook, you create, parse, and use the `tf.Example` message, and then serialize, write, and read `tf.Example` messages to and from `.tfrecord` files. To read data efficiently it can be helpful to serialize your data and store it in a set of files (100-200MB each) that can each be read linearly. This is especially true if the data is being streamed over a network. This can also be useful for caching any data-preprocessing.\n", - "\n", - "\n", - "Each learning objective will correspond to a __#TODO__ in this student lab notebook -- try to complete this notebook first and then review the [solution notebook](../solutions/tfrecord-tf.example.ipynb). \n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Ac83J0QxjhFt" - }, - "source": [ - "### The TFRecord format \n", - "\n", - "The TFRecord format is a simple format for storing a sequence of binary records. [Protocol buffers](https://developers.google.com/protocol-buffers/) are a cross-platform, cross-language library for efficient serialization of structured data. Protocol messages are defined by `.proto` files, these are often the easiest way to understand a message type.\n", - "\n", - "The `tf.Example` message (or protobuf) is a flexible message type that represents a `{\"string\": value}` mapping. It is designed for use with TensorFlow and is used throughout the higher-level APIs such as [TFX](https://www.tensorflow.org/tfx/).\n", - "Note: While useful, these structures are optional. There is no need to convert existing code to use TFRecords, unless you are using [`tf.data`](https://www.tensorflow.org/guide/datasets) and reading data is still the bottleneck to training. See [Data Input Pipeline Performance](https://www.tensorflow.org/guide/performance/datasets) for dataset performance tips." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "WkRreBf1eDVc" - }, - "source": [ - "## Load necessary libraries \n", - "We will start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!sudo chown -R jupyter:jupyter /home/jupyter/training-data-analyst" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Ja7sezsmnXph" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[31mERROR: tensorflow 2.1.0 has requirement gast==0.2.2, but you'll have gast 0.3.3 which is incompatible.\u001b[0m\n", - "\u001b[31mERROR: witwidget 1.6.0 has requirement oauth2client>=4.1.3, but you'll have oauth2client 3.0.0 which is incompatible.\u001b[0m\n", - "\u001b[31mERROR: tensorflow-probability 0.8.0 has requirement cloudpickle==1.1.1, but you'll have cloudpickle 1.3.0 which is incompatible.\u001b[0m\n", - "\u001b[31mERROR: tensorflow-probability 0.8.0 has requirement gast<0.3,>=0.2, but you'll have gast 0.3.3 which is incompatible.\u001b[0m\n", - "\u001b[31mERROR: tensorflow-io 0.9.10 has requirement tensorflow==2.1.0rc0, but you'll have tensorflow 2.1.0 which is incompatible.\u001b[0m\n", - "\u001b[33mWARNING: You are using pip version 20.1; however, version 20.1.1 is available.\n", - "You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n", - "TensorFlow version: 2.3.0-dev20200613\n" - ] - } - ], - "source": [ - "!pip install -q tf-nightly\n", - "import IPython.display as display\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Please ignore any incompatibility warnings and errors.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "e5Kq88ccUWQV" - }, - "source": [ - "## `tf.Example`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "VrdQHgvNijTi" - }, - "source": [ - "### Data types for `tf.Example`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "lZw57Qrn4CTE" - }, - "source": [ - "Fundamentally, a `tf.Example` is a `{\"string\": tf.train.Feature}` mapping.\n", - "\n", - "The `tf.train.Feature` message type can accept one of the following three types (See the [`.proto` file](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto) for reference). Most other generic types can be coerced into one of these:\n", - "\n", - "1. `tf.train.BytesList` (the following types can be coerced)\n", - "\n", - " - `string`\n", - " - `byte`\n", - "\n", - "1. `tf.train.FloatList` (the following types can be coerced)\n", - "\n", - " - `float` (`float32`)\n", - " - `double` (`float64`)\n", - "\n", - "1. `tf.train.Int64List` (the following types can be coerced)\n", - "\n", - " - `bool`\n", - " - `enum`\n", - " - `int32`\n", - " - `uint32`\n", - " - `int64`\n", - " - `uint64`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "_e3g9ExathXP" - }, - "source": [ - "**Lab Task #1a:** In order to convert a standard TensorFlow type to a `tf.Example`-compatible `tf.train.Feature`, you can use the shortcut functions below. Note that each function takes a scalar input value and returns a `tf.train.Feature` containing one of the three `list` types above. Complete the `TODOs` below using these types." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "mbsPOUpVtYxA" - }, - "outputs": [], - "source": [ - "# TODO 1a\n", - "# The following functions can be used to convert a value to a type compatible\n", - "# with tf.Example.\n", - "\n", - "\n", - "def _bytes_feature(value):\n", - " \"\"\"Returns a bytes_list from a string / byte.\"\"\"\n", - " if isinstance(value, type(tf.constant(0))):\n", - " value = (\n", - " value.numpy()\n", - " ) # BytesList won't unpack a string from an EagerTensor.\n", - " return # TODO: Complete the code here.\n", - "\n", - "\n", - "def _float_feature(value):\n", - " \"\"\"Returns a float_list from a float / double.\"\"\"\n", - " return # TODO: Complete the code here.\n", - "\n", - "\n", - "def _int64_feature(value):\n", - " \"\"\"Returns an int64_list from a bool / enum / int / uint.\"\"\"\n", - " return # TODO: Complete the code here." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Wst0v9O8hgzy" - }, - "source": [ - "Note: To stay simple, this example only uses scalar inputs. The simplest way to handle non-scalar features is to use `tf.serialize_tensor` to convert tensors to binary-strings. Strings are scalars in tensorflow. Use `tf.parse_tensor` to convert the binary-string back to a tensor." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "vsMbkkC8xxtB" - }, - "source": [ - "Below are some examples of how these functions work. Note the varying input types and the standardized output types. If the input type for a function does not match one of the coercible types stated above, the function will raise an exception (e.g. `_int64_feature(1.0)` will error out, since `1.0` is a float, so should be used with the `_float_feature` function instead):" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "hZzyLGr0u73y" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "bytes_list {\n", - " value: \"test_string\"\n", - "}\n", - "\n", - "bytes_list {\n", - " value: \"test_bytes\"\n", - "}\n", - "\n", - "float_list {\n", - " value: 2.7182817459106445\n", - "}\n", - "\n", - "int64_list {\n", - " value: 1\n", - "}\n", - "\n", - "int64_list {\n", - " value: 1\n", - "}\n", - "\n" - ] - } - ], - "source": [ - "print(_bytes_feature(b\"test_string\"))\n", - "print(_bytes_feature(b\"test_bytes\"))\n", - "\n", - "print(_float_feature(np.exp(1)))\n", - "\n", - "print(_int64_feature(True))\n", - "print(_int64_feature(1))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "nj1qpfQU5qmi" - }, - "source": [ - "**Lab Task #1b:** All proto messages can be serialized to a binary-string using the `.SerializeToString` method. Use this method to complete the below `TODO`:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "5afZkORT5pjm" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "b'\\x12\\x06\\n\\x04T\\xf8-@'" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "feature = _float_feature(np.exp(1))\n", - "\n", - "# TODO 1b\n", - "# TODO: Complete the code here" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "laKnw9F3hL-W" - }, - "source": [ - "### Creating a `tf.Example` message" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "b_MEnhxchQPC" - }, - "source": [ - "Suppose you want to create a `tf.Example` message from existing data. In practice, the dataset may come from anywhere, but the procedure of creating the `tf.Example` message from a single observation will be the same:\n", - "\n", - "1. Within each observation, each value needs to be converted to a `tf.train.Feature` containing one of the 3 compatible types, using one of the functions above.\n", - "\n", - "1. You create a map (dictionary) from the feature name string to the encoded feature value produced in #1.\n", - "\n", - "1. The map produced in step 2 is converted to a [`Features` message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto#L85)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "4EgFQ2uHtchc" - }, - "source": [ - "In this notebook, you will create a dataset using NumPy.\n", - "\n", - "This dataset will have 4 features:\n", - "\n", - "* a boolean feature, `False` or `True` with equal probability\n", - "* an integer feature uniformly randomly chosen from `[0, 5]`\n", - "* a string feature generated from a string table by using the integer feature as an index\n", - "* a float feature from a standard normal distribution\n", - "\n", - "Consider a sample consisting of 10,000 independently and identically distributed observations from each of the above distributions:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "CnrguFAy3YQv" - }, - "outputs": [], - "source": [ - "# The number of observations in the dataset.\n", - "n_observations = int(1e4)\n", - "\n", - "# Boolean feature, encoded as False or True.\n", - "feature0 = np.random.choice([False, True], n_observations)\n", - "\n", - "# Integer feature, random from 0 to 4.\n", - "feature1 = np.random.randint(0, 5, n_observations)\n", - "\n", - "# String feature\n", - "strings = np.array([b\"cat\", b\"dog\", b\"chicken\", b\"horse\", b\"goat\"])\n", - "feature2 = strings[feature1]\n", - "\n", - "# Float feature, from a standard normal distribution\n", - "feature3 = np.random.randn(n_observations)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "aGrscehJr7Jd" - }, - "source": [ - "Each of these features can be coerced into a `tf.Example`-compatible type using one of `_bytes_feature`, `_float_feature`, `_int64_feature`. You can then create a `tf.Example` message from these encoded features:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "RTCS49Ij_kUw" - }, - "outputs": [], - "source": [ - "def serialize_example(feature0, feature1, feature2, feature3):\n", - " \"\"\"\n", - " Creates a tf.Example message ready to be written to a file.\n", - " \"\"\"\n", - " # Create a dictionary mapping the feature name to the tf.Example-compatible\n", - " # data type.\n", - " feature = {\n", - " \"feature0\": _int64_feature(feature0),\n", - " \"feature1\": _int64_feature(feature1),\n", - " \"feature2\": _bytes_feature(feature2),\n", - " \"feature3\": _float_feature(feature3),\n", - " }\n", - "\n", - " # Create a Features message using tf.train.Example.\n", - "\n", - " example_proto = tf.train.Example(\n", - " features=tf.train.Features(feature=feature)\n", - " )\n", - " return example_proto.SerializeToString()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "XftzX9CN_uGT" - }, - "source": [ - "For example, suppose you have a single observation from the dataset, `[False, 4, bytes('goat'), 0.9876]`. You can create and print the `tf.Example` message for this observation using `create_message()`. Each single observation will be written as a `Features` message as per the above. Note that the `tf.Example` [message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto#L88) is just a wrapper around the `Features` message:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "N8BtSx2RjYcb" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "b'\\nR\\n\\x11\\n\\x08feature0\\x12\\x05\\x1a\\x03\\n\\x01\\x00\\n\\x11\\n\\x08feature1\\x12\\x05\\x1a\\x03\\n\\x01\\x04\\n\\x14\\n\\x08feature2\\x12\\x08\\n\\x06\\n\\x04goat\\n\\x14\\n\\x08feature3\\x12\\x08\\x12\\x06\\n\\x04[\\xd3|?'" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# This is an example observation from the dataset.\n", - "\n", - "example_observation = []\n", - "\n", - "serialized_example = serialize_example(False, 4, b\"goat\", 0.9876)\n", - "serialized_example" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "_pbGATlG6u-4" - }, - "source": [ - "**Lab Task #1c:** To decode the message use the `tf.train.Example.FromString` method and complete the below TODO" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "dGim-mEm6vit" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "features {\n", - " feature {\n", - " key: \"feature0\"\n", - " value {\n", - " int64_list {\n", - " value: 0\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"feature1\"\n", - " value {\n", - " int64_list {\n", - " value: 4\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"feature2\"\n", - " value {\n", - " bytes_list {\n", - " value: \"goat\"\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"feature3\"\n", - " value {\n", - " float_list {\n", - " value: 0.9876000285148621\n", - " }\n", - " }\n", - " }\n", - "}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# TODO 1c\n", - "example_proto = # TODO: Complete the code here\n", - "example_proto" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "o6qxofy89obI" - }, - "source": [ - "## TFRecords format details\n", - "\n", - "A TFRecord file contains a sequence of records. The file can only be read sequentially.\n", - "\n", - "Each record contains a byte-string, for the data-payload, plus the data-length, and CRC32C (32-bit CRC using the Castagnoli polynomial) hashes for integrity checking.\n", - "\n", - "Each record is stored in the following formats:\n", - "\n", - " uint64 length\n", - " uint32 masked_crc32_of_length\n", - " byte data[length]\n", - " uint32 masked_crc32_of_data\n", - "\n", - "The records are concatenated together to produce the file. CRCs are\n", - "[described here](https://en.wikipedia.org/wiki/Cyclic_redundancy_check), and\n", - "the mask of a CRC is:\n", - "\n", - " masked_crc = ((crc >> 15) | (crc << 17)) + 0xa282ead8ul\n", - "\n", - "Note: There is no requirement to use `tf.Example` in TFRecord files. `tf.Example` is just a method of serializing dictionaries to byte-strings. Lines of text, encoded image data, or serialized tensors (using `tf.io.serialize_tensor`, and\n", - "`tf.io.parse_tensor` when loading). See the `tf.io` module for more options." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "y-Hjmee-fbLH" - }, - "source": [ - "## TFRecord files using `tf.data`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "GmehkCCT81Ez" - }, - "source": [ - "The `tf.data` module also provides tools for reading and writing data in TensorFlow." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "1FISEuz8ubu3" - }, - "source": [ - "### Writing a TFRecord file\n", - "\n", - "The easiest way to get the data into a dataset is to use the `from_tensor_slices` method.\n", - "\n", - "Applied to an array, it returns a dataset of scalars:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "mXeaukvwu5_-" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tf.data.Dataset.from_tensor_slices(feature1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "f-q0VKyZvcad" - }, - "source": [ - "Applied to a tuple of arrays, it returns a dataset of tuples:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "H5sWyu1kxnvg" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "features_dataset = tf.data.Dataset.from_tensor_slices(\n", - " (feature0, feature1, feature2, feature3)\n", - ")\n", - "features_dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "m1C-t71Nywze" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "tf.Tensor(False, shape=(), dtype=bool)\n", - "tf.Tensor(1, shape=(), dtype=int64)\n", - "tf.Tensor(b'dog', shape=(), dtype=string)\n", - "tf.Tensor(-0.6086492521118764, shape=(), dtype=float64)\n" - ] - } - ], - "source": [ - "# Use `take(1)` to only pull one example from the dataset.\n", - "for f0, f1, f2, f3 in features_dataset.take(1):\n", - " print(f0)\n", - " print(f1)\n", - " print(f2)\n", - " print(f3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "mhIe63awyZYd" - }, - "source": [ - "Use the `tf.data.Dataset.map` method to apply a function to each element of a `Dataset`.\n", - "\n", - "The mapped function must operate in TensorFlow graph mode—it must operate on and return `tf.Tensors`. A non-tensor function, like `serialize_example`, can be wrapped with `tf.py_function` to make it compatible.\n", - "\n", - "**Lab Task 2a:** Using `tf.py_function` requires to specify the shape and type information that is otherwise unavailable:" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "apB5KYrJzjPI" - }, - "outputs": [], - "source": [ - "# TODO 2a\n", - "# TODO: Your code goes here" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "lHFjW4u4Npz9" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tf_serialize_example(f0, f1, f2, f3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "CrFZ9avE3HUF" - }, - "source": [ - "**Lab Task 2b:** Apply this function to each element in the features_dataset using the map function and complete below `TODO`:" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "VDeqYVbW3ww9" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# TODO 2b\n", - "serialized_features_dataset = #TODO : Complete the code here.\n", - "serialized_features_dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "DlDfuh46bRf6" - }, - "outputs": [], - "source": [ - "def generator():\n", - " for features in features_dataset:\n", - " yield serialize_example(*features)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "iv9oXKrcbhvX" - }, - "outputs": [], - "source": [ - "serialized_features_dataset = tf.data.Dataset.from_generator(\n", - " generator, output_types=tf.string, output_shapes=()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Dqz8C4D5cIj9" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "serialized_features_dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "p6lw5VYpjZZC" - }, - "source": [ - "And write them to a TFRecord file:" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "vP1VgTO44UIE" - }, - "outputs": [], - "source": [ - "filename = \"test.tfrecord\"\n", - "writer = tf.data.experimental.TFRecordWriter(filename)\n", - "writer.write(serialized_features_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "6aV0GQhV8tmp" - }, - "source": [ - "### Reading a TFRecord file" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "o3J5D4gcSy8N" - }, - "source": [ - "You can also read the TFRecord file using the `tf.data.TFRecordDataset` class.\n", - "\n", - "More information on consuming TFRecord files using `tf.data` can be found [here](https://www.tensorflow.org/guide/datasets#consuming_tfrecord_data).\n", - "\n", - "**Lab Task 2c:** Complete the below TODO by using `TFRecordDataset`s which is useful for standardizing input data and optimizing performance." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "6OjX6UZl-bHC" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# TODO 2c\n", - "# TODO: Your code goes here" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "6_EQ9i2E_-Fz" - }, - "source": [ - "At this point the dataset contains serialized `tf.train.Example` messages. When iterated over it returns these as scalar string tensors.\n", - "\n", - "Use the `.take` method to only show the first 10 records.\n", - "\n", - "Note: iterating over a `tf.data.Dataset` only works with eager execution enabled." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "hxVXpLz_AJlm" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\n", - "\n", - "\\n\\x11\\n\\x08feature0\\x12\\x05\\x1a\\x03\\n\\x01\\x00\\n\\x11\\n\\x08feature1\\x12\\x05\\x1a\\x03\\n\\x01\\x00'>\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ] - } - ], - "source": [ - "for raw_record in raw_dataset.take(10):\n", - " print(repr(raw_record))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "W-6oNzM4luFQ" - }, - "source": [ - "These tensors can be parsed using the function below. Note that the `feature_description` is necessary here because datasets use graph-execution, and need this description to build their shape and type signature:" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "zQjbIR1nleiy" - }, - "outputs": [], - "source": [ - "# Create a description of the features.\n", - "feature_description = {\n", - " \"feature0\": tf.io.FixedLenFeature([], tf.int64, default_value=0),\n", - " \"feature1\": tf.io.FixedLenFeature([], tf.int64, default_value=0),\n", - " \"feature2\": tf.io.FixedLenFeature([], tf.string, default_value=\"\"),\n", - " \"feature3\": tf.io.FixedLenFeature([], tf.float32, default_value=0.0),\n", - "}\n", - "\n", - "\n", - "def _parse_function(example_proto):\n", - " # Parse the input `tf.Example` proto using the dictionary above.\n", - " return tf.io.parse_single_example(example_proto, feature_description)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "gWETjUqhEQZf" - }, - "source": [ - "Alternatively, use `tf.parse example` to parse the whole batch at once. Apply this function to each item in the dataset using the `tf.data.Dataset.map` method:" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "6Ob7D-zmBm1w" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "parsed_dataset = raw_dataset.map(_parse_function)\n", - "parsed_dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "sNV-XclGnOvn" - }, - "source": [ - "Use eager execution to display the observations in the dataset. There are 10,000 observations in this dataset, but you will only display the first 10. The data is displayed as a dictionary of features. Each item is a `tf.Tensor`, and the `numpy` element of this tensor displays the value of the feature:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "x2LT2JCqhoD_" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'feature0': , 'feature2': , 'feature1': , 'feature3': }\n", - "{'feature0': , 'feature2': , 'feature1': , 'feature3': }\n", - "{'feature0': , 'feature2': , 'feature1': , 'feature3': }\n", - "{'feature0': , 'feature2': , 'feature1': , 'feature3': }\n", - "{'feature0': , 'feature2': , 'feature1': , 'feature3': }\n", - "{'feature0': , 'feature2': , 'feature1': , 'feature3': }\n", - "{'feature0': , 'feature2': , 'feature1': , 'feature3': }\n", - "{'feature0': , 'feature2': , 'feature1': , 'feature3': }\n", - "{'feature0': , 'feature2': , 'feature1': , 'feature3': }\n", - "{'feature0': , 'feature2': , 'feature1': , 'feature3': }\n" - ] - } - ], - "source": [ - "for parsed_record in parsed_dataset.take(10):\n", - " print(repr(parsed_record))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Cig9EodTlDmg" - }, - "source": [ - "Here, the `tf.parse_example` function unpacks the `tf.Example` fields into standard tensors." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "jyg1g3gU7DNn" - }, - "source": [ - "## TFRecord files in Python" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "3FXG3miA7Kf1" - }, - "source": [ - "The `tf.io` module also contains pure-Python functions for reading and writing TFRecord files." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "CKn5uql2lAaN" - }, - "source": [ - "### Writing a TFRecord file" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "LNW_FA-GQWXs" - }, - "source": [ - "Next, write the 10,000 observations to the file `test.tfrecord`. Each observation is converted to a `tf.Example` message, then written to file. You can then verify that the file `test.tfrecord` has been created:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "MKPHzoGv7q44" - }, - "outputs": [], - "source": [ - "# Write the `tf.Example` observations to the file.\n", - "with tf.io.TFRecordWriter(filename) as writer:\n", - " for i in range(n_observations):\n", - " example = serialize_example(\n", - " feature0[i], feature1[i], feature2[i], feature3[i]\n", - " )\n", - " writer.write(example)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "EjdFHHJMpUUo" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "984K\ttest.tfrecord\n" - ] - } - ], - "source": [ - "!du -sh {filename}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2osVRnYNni-E" - }, - "source": [ - "### Reading a TFRecord file\n", - "\n", - "These serialized tensors can be easily parsed using `tf.train.Example.ParseFromString`:" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "U3tnd3LerOtV" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "filenames = [filename]\n", - "raw_dataset = tf.data.TFRecordDataset(filenames)\n", - "raw_dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "nsEAACHcnm3f" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "features {\n", - " feature {\n", - " key: \"feature0\"\n", - " value {\n", - " int64_list {\n", - " value: 0\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"feature1\"\n", - " value {\n", - " int64_list {\n", - " value: 1\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"feature2\"\n", - " value {\n", - " bytes_list {\n", - " value: \"dog\"\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"feature3\"\n", - " value {\n", - " float_list {\n", - " value: -0.6086492538452148\n", - " }\n", - " }\n", - " }\n", - "}\n", - "\n" - ] - } - ], - "source": [ - "for raw_record in raw_dataset.take(1):\n", - " example = tf.train.Example()\n", - " example.ParseFromString(raw_record.numpy())\n", - " print(example)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "S0tFDrwdoj3q" - }, - "source": [ - "## Walkthrough: Reading and writing image data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "rjN2LFxFpcR9" - }, - "source": [ - "This is an end-to-end example of how to read and write image data using TFRecords. Using an image as input data, you will write the data as a TFRecord file, then read the file back and display the image.\n", - "\n", - "This can be useful if, for example, you want to use several models on the same input dataset. Instead of storing the image data raw, it can be preprocessed into the TFRecords format, and that can be used in all further processing and modelling.\n", - "\n", - "First, let's download [this image](https://commons.wikimedia.org/wiki/File:Felis_catus-cat_on_snow.jpg) of a cat in the snow and [this photo](https://upload.wikimedia.org/wikipedia/commons/f/fe/New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg) of the Williamsburg Bridge, NYC under construction." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "5Lk2qrKvN0yu" - }, - "source": [ - "### Fetch the images" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3a0fmwg8lHdF" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg\n", - "24576/17858 [=========================================] - 0s 0us/step\n", - "Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/194px-New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg\n", - "16384/15477 [===============================] - 0s 0us/step\n" - ] - } - ], - "source": [ - "cat_in_snow = tf.keras.utils.get_file(\n", - " \"320px-Felis_catus-cat_on_snow.jpg\",\n", - " \"https://storage.googleapis.com/download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg\",\n", - ")\n", - "williamsburg_bridge = tf.keras.utils.get_file(\n", - " \"194px-New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg\",\n", - " \"https://storage.googleapis.com/download.tensorflow.org/example_images/194px-New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "7aJJh7vENeE4" - }, - "outputs": [ - { - "data": { - "image/jpeg": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Image cc-by: Von.grzanka" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "display.display(display.Image(filename=cat_in_snow))\n", - "display.display(\n", - " display.HTML(\n", - " 'Image cc-by: Von.grzanka'\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "KkW0uuhcXZqA" - }, - "outputs": [ - { - "data": { - "image/jpeg": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "From Wikimedia" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "display.display(display.Image(filename=williamsburg_bridge))\n", - "display.display(\n", - " display.HTML(\n", - " 'From Wikimedia'\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "VSOgJSwoN5TQ" - }, - "source": [ - "### Write the TFRecord file" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Azx83ryQEU6T" - }, - "source": [ - "As before, encode the features as types compatible with `tf.Example`. This stores the raw image string feature, as well as the height, width, depth, and arbitrary `label` feature. The latter is used when you write the file to distinguish between the cat image and the bridge image. Use `0` for the cat_in_snow image, and `1` for the williamsburg_bridge image." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "kC4TS1ZEONHr" - }, - "outputs": [], - "source": [ - "image_labels = {\n", - " cat_in_snow: 0,\n", - " williamsburg_bridge: 1,\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "c5njMSYNEhNZ" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "features {\n", - " feature {\n", - " key: \"depth\"\n", - " value {\n", - " int64_list {\n", - " value: 3\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"height\"\n", - " value {\n", - " int64_list {\n", - " value: 213\n", - " }\n", - "...\n" - ] - } - ], - "source": [ - "# This is an example, just using the cat image.\n", - "image_string = open(cat_in_snow, \"rb\").read()\n", - "\n", - "label = image_labels[cat_in_snow]\n", - "\n", - "\n", - "# Create a dictionary with features that may be relevant.\n", - "def image_example(image_string, label):\n", - " image_shape = tf.image.decode_jpeg(image_string).shape\n", - "\n", - " feature = {\n", - " \"height\": _int64_feature(image_shape[0]),\n", - " \"width\": _int64_feature(image_shape[1]),\n", - " \"depth\": _int64_feature(image_shape[2]),\n", - " \"label\": _int64_feature(label),\n", - " \"image_raw\": _bytes_feature(image_string),\n", - " }\n", - "\n", - " return tf.train.Example(features=tf.train.Features(feature=feature))\n", - "\n", - "\n", - "for line in str(image_example(image_string, label)).split(\"\\n\")[:15]:\n", - " print(line)\n", - "print(\"...\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2G_o3O9MN0Qx" - }, - "source": [ - "Notice that all of the features are now stored in the `tf.Example` message. Next, functionalize the code above and write the example messages to a file named `images.tfrecords`:" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "qcw06lQCOCZU" - }, - "outputs": [], - "source": [ - "# Write the raw image files to `images.tfrecords`.\n", - "# First, process the two images into `tf.Example` messages.\n", - "# Then, write to a `.tfrecords` file.\n", - "record_file = \"images.tfrecords\"\n", - "with tf.io.TFRecordWriter(record_file) as writer:\n", - " for filename, label in image_labels.items():\n", - " image_string = open(filename, \"rb\").read()\n", - " tf_example = image_example(image_string, label)\n", - " writer.write(tf_example.SerializeToString())" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "yJrTe6tHPCfs" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "36K\timages.tfrecords\n" - ] - } - ], - "source": [ - "!du -sh {record_file}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "jJSsCkZLPH6K" - }, - "source": [ - "### Read the TFRecord file\n", - "\n", - "You now have the file—`images.tfrecords`—and can now iterate over the records in it to read back what you wrote. Given that in this example you will only reproduce the image, the only feature you will need is the raw image string. Extract it using the getters described above, namely `example.features.feature['image_raw'].bytes_list.value[0]`. You can also use the labels to determine which record is the cat and which one is the bridge:" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "M6Cnfd3cTKHN" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "raw_image_dataset = tf.data.TFRecordDataset(\"images.tfrecords\")\n", - "\n", - "# Create a dictionary describing the features.\n", - "image_feature_description = {\n", - " \"height\": tf.io.FixedLenFeature([], tf.int64),\n", - " \"width\": tf.io.FixedLenFeature([], tf.int64),\n", - " \"depth\": tf.io.FixedLenFeature([], tf.int64),\n", - " \"label\": tf.io.FixedLenFeature([], tf.int64),\n", - " \"image_raw\": tf.io.FixedLenFeature([], tf.string),\n", - "}\n", - "\n", - "\n", - "def _parse_image_function(example_proto):\n", - " # Parse the input tf.Example proto using the dictionary above.\n", - " return tf.io.parse_single_example(example_proto, image_feature_description)\n", - "\n", - "\n", - "parsed_image_dataset = raw_image_dataset.map(_parse_image_function)\n", - "parsed_image_dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "0PEEFPk4NEg1" - }, - "source": [ - "Recover the images from the TFRecord file:" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "yZf8jOyEIjSF" - }, - "outputs": [ - { - "data": { - "image/jpeg": "/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAYEBQYFBAYGBQYHBwYIChAKCgkJChQODwwQFxQYGBcUFhYaHSUfGhsjHBYWICwgIyYnKSopGR8tMC0oMCUoKSj/2wBDAQcHBwoIChMKChMoGhYaKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCj/wAARCADVAUADAREAAhEBAxEB/8QAHAAAAgIDAQEAAAAAAAAAAAAAAwQCBQEGBwgA/8QAPxAAAgEDAwIFAQUGBQIGAwAAAQIDAAQRBRIhMUEGEyJRYXEHFDKBkQgjQqGx8BVSwdHxYuEWJDNDcoI0U7L/xAAaAQADAQEBAQAAAAAAAAAAAAAAAQIDBAUG/8QAJxEAAgICAgICAwEBAAMAAAAAAAECESExAxJBUQRhEyIycSMzgbH/2gAMAwEAAhEDEQA/AO0suOldRzkVTcaACABRSGDmkHSmkJiy+pqYieMnFABolx0pDQwHCjmlQxea5wSBTSE2V8spZqtIiz4Lu60wCJF3qbHQVYgetKx0Zx2FABUXApAECH8qLHQSOIDmk2NIM5AXHekMgCAcnmgRCVyR7D2oSBiMiK3PQVZLEbn92cp3q1kl4BQR73LPTbEkNQLhiF6VLKQ/DhRmoZSDlty1JWyvmdlk+K0SIZgEtyaBBFGR7UhkXIUYzk0AU1+hkfgVrEzaJ2cODiQUpMaQ1LbRnoKlMfUqrqEKxrRMhoSCgyiqEkFkkKjAooCMQ3OM0mM2WzH7tQo6VlI1Qxu9RGelSMS1SVfKwOtXBZM5suyAOtZGxAsFNAiEj8UxCknJqkIJEuDSYIKQKQz4PgdaAASTEZANVQrF2JbmmIykdDYB0j+MUrHQdE46VIzOygCJGGGKYhiNOmalspDG0BaQwRODQBkgsOKAJBcDmgAE5A600JldMxJ9NWiGDVQ/4uTTCibRApxSsKIplTimwQ5HwvPWpKIByG68UAAuJAapIlkoiDjNJjRKY44XihDYq6sckGqRADdlsGmAzbKHJBFSxobEY2Ee1TZVFJqSHeQBWsDKRSTiRJsgGrJTJ5yMmmFhrYbmGKljRslrIEhA71i0aolKQPUDyaSBiF5ZzTnKqcEVakkQ4tmwMeayNQUjYFMGBZiaYj4LmgQUEKKQwbSCnQAGct0piMBC1FioQ1nVYtLltoDEZp5wWVA4TCjjOT88YrLk5VD7NYcfYstNuYb2ASwNx0ZW6qfY0Q5I8itClBwdMe2r7iqsVEtyKPxCgCJmix+IUAAe6iBwDTyGhG98SWNjJ5chkdx+MRqCE+pJH6DJrHk5ocbqTyaw4pSVosPv6lQQdwIyCO4rWjMib1SelFAGS8bgKtIZNpXkPoHSgBOZnZsVSJZ9HCe9FhQRYAKVjSM+Wi8seKMgAeSNTmnTDBg3HpO0EgewooLRKKKecZCYB96LoVWTeyMYJc5p9rF1I2UPnOE7k0NgkX6aCrR89ay7l9SH/hzdn1YFH5BdAEnhbByrn9aa5hfjFn0h7TJLZqu9h1ozHZu4yoOKLHQC50aR1MgPPzTXJRLga/d2TAkMACK2UzNxEI7ctMEAzzVOWBKOTZ4tKjjiQ7fVisHNm3RIKlnn0qMUrCh620pI8M5LH5qXIaQ2EjVTgAUhlTnJ61ZJB8DqaAYPKjvTFREzAdKAIJKJG5pMEfTKAuUPNCY2hVvPI9I5qrJoYiEir6utIpGjeMHYeJElmP7qGNF/FgKDnnNcfK/+lfR0wX/O17C6fqUlhdyPHiWJhk7WOdoHJ6YyOO/NZO4PtHZSqS6yNmgvhdQpLDKWRhkGu/inHlj2ics4uD6sLud/4ia0pEWyQTjnNICn1bVFgb7tZkPct1ZeQg9/76fWub5HyOn6Q/r/AOG/Dw9v2loor+Nf8ElLJmZiMAnkZ7464rirFs6k/wBi/wDDn3i40WxLZwI9uT8HH+lej8Z3xKzj+Qq5WkX0EYQkE7jWrMkxqFwCFI5NS0UmWltGijOeTUMojJahiStFhQq42EjqaoVETuPQHPtQAGe1mk+BTTSE0VhtpproRp+EHk1paSsjNm0abYxCLYVye5NYSkapDbxpEnHQUrsZXXMbTgFc4qk6JasxptuYr9AelOTtBFZN0TBUYrnZqTIFIZFsUAUmrRtI4C9K0iZshn7vbAkdBT2x6Ka+1NsEJxWsYGUpGs3s7SOcnOa2UaM7szpyA3Kn2NKWio7N4hhEsagdcVzN0bUKuohl5wMUwGjKPLyOlKgKu+vkiGBy3xVqLZLkkVmG/wA1OgIFWzyxoCzG0Z6k0xH20D+EmixEwhPbFKwokExQMlxQB8eOTQBpvjS3W5vvKjIMjWoeRWwAyhyowT/ED2ri+U1Hki36Oz4/7cbRqMN8X0+ELMxdX2ortljzjHA5z0xz7VE2CRQf4pfw6fNaRXH3MR3RZJopCqqm4hl56/TscVzKUuN9oM1pTVNGuHX/ABPatayWHia+mt5srCxfJI7A5GAcEjPwTXVHnl5eTGXGi/tNd8V6hIket63N5CzkSpZqiOYlwWG5QDnJCkjoM/Wp5PmSSqI48C2zbdAlSeC5uonKoZAFWMcKgGAOhH5HFcnHJ3k6pRVJIuNUnZo8LFh1QsFOAAvUscfrgda0lNVkXHx5N10qBLbQtOSJmZPuyMGYYLZGckds5r1eBdYJHmcz7Tk/szbyjzjuP51s1gyWwisXuB5fIHU0vAeS4gBcioZoiygjwhDd6hlIiLNd5JGTRYqJiBN4UAZosdEb9AkZVfxYoixMrbKMIjEj1E1TYkiwhcQW7MTyal5Hoq1u2nu9mfRV1SJu2XOEEe1Bk1BZKzs8SiWTr7UnLwCReQ9OKzZaC5x1pDISEbDQBUzE5JarRAtfjzLcgVccMUtGoXYbzCpBFdCZg0VkyHcQa0TJMW7mOQVMiom56FehkGT0rmmjeLB6uk0rmSFcrRHApCL30i25iKkPVqNktiEihVEkjZatEQ0MQxSXBJQcVm3RaJLYys2CaLBotbXSlVQW5NQ5FUMGziTqopWBUX7ASbIwKtIluiVpD/8AsoYIYS1R5M9qQyVzBGqYA5pDND+1Rm0rwvJeKilfwbiuSpOSOf1rl+XxufV+jp+LyLjbOI6NcQ/dBdy2/mTSFgHPAVO4GTgnPIHFRLVBfkubgJqMU5kSMgRGCXzGY+oglQOOpxjjklc+9YSRpF5K3SbeXT9Q0PTTGifd5HsZkB/dCdDv3Dd0DI2TnPK+xFRJ3bKS0i48P28e++1BLRES+uHu7fzDykTqCGOejHk4HYis3n/0af4bhpzQzWkVpc27LaM3mFIG25B6cD8Xt+VQnTK0sGuzTSRa+dOhb1tKV2YxlSeD8DHT2q2uyNPydMo7NfyLDHHCn/toE656DFe9xxpJM8OcrZVAu0mFJya2Mi+sYBFENw5NYydmsVRbWiDrUMpDxYBetSUYmuUhhJYjNACWm3qS3DeY2COlOSEmWE6ibO31UkNkH08LFuXrR2CimujIQUJIFWiWAs02TBiOKbdgkbHbTQ4BGKydlqg5lB9SdqQxuynVxjPNS0NDMrYANIbIO3p5PFAip1GZVZVX8VaRRLIopcDPPFNgIahZxHngGqjJkuKNZ1CGNWOGHFbxkZSRVAjcc9Ktkou9CR3xyQtZTo1gbeg/dBCOKwZoV19ZI4JC/nVJktGs39s4DYbPsK3i7MZJl7pu0WyqvXFYy2ax0WUMOcHFS2Oh8LiI4FTZQnOMoTTQjXXI+9Et0FarRm9jsQ3j09KllJljaQ+nmpbGhW6XMwXHemgZzr7e5mTwTHApPlS3cazYIwVAJ2nvgkL0rPlf6l8aycOsrmK3tI7qZnS3jXbMYF3uuDhVAHQ+31rlbvDNkvJtNjPpl0INInXUrKdb2G6H3uIJGzKSBHuUnaWPAz1Ix16c6t/yauks+SOpaPBr+pR3boVV5ZWuYiSM5jEZ3Y6YA69azk3BtM0VSVo2zxHp62/3C4DeXA9g9vFAsRkZ8soDKg5KqAQM4GcVkuRRzIrq3hFfpckSK1rBJcrdW8irPZ3ELQuFf8LAMOVPbHek/DWmUs2Z1dCuqTNtQ3Vsf3Mg5dQMtj5BJPBqY8ji0U4Jo3nS9RXV7OG6jYHzFBbBHB7/AM6+mhNSVo8OcXF0XenWWZ1ZgQKcp4FGJa3rxW0YdnUKenPWsrNaK2TxVpVlHK13qFpDHFjc0koAHek2NJnP/Ef29eH7ItFosVxq9yOAIxsjHyWPb6A8VDmkUoNmnat9vl7J/wDh6HvQHAkaQ4c9yAOgznjrS/KP8ZRL9umurNu/we0EYHIEjZOe319+KPysOiNh0L9pa5s2A1Hw2s0f8Tw3JB+gBX+eaHOwUUdEsP2kPA93GqXg1SxkKgsXtd6AnsCDn8yBSTH1Rvmg6zo/iOBbrR72C8tnPDxODz9OtaXghxosxpytIcH00rFROSyit03lsChWwqiME6tIRH+HHWm4gmHsZQ8xCcUmhpltIoEfXmoKBMCUJzTBlLsaS8Oa00iPI+5SCIkkZxU7G8Gnarc3MkzbdwWuiEVRhKTNeu5W3EMSD81uoozshYRPNIepUVLwVHJumg2+0qPesJs3ijY5I9gB9qxssBcEGI4FNCZrXk+fdPv4VK1ukZ1YLS7jbNg9KJIUWbJFMrEBayo0ssVX089KkZX3vAIHSqQma9NAxmJI4rRPBm1ksrWPZEM1LZSQ7azqBjvUtFJn17EAnmUJgzl/2p28mq+GL6KGQJKgEsZIyu5Tnke1VONwYoSqRxzw/bWejwXGnatE0mm6lEYJ7hGAZHJDB1PZgQpGf8v5V5vIm8rZ2Qa8g/H2nasNKgv21bRbqEzorXFqkizzNkNuZCNqkY5wSMnisuBQjJ1dl8naSV6Nw8M3FvdzXl2QUimuGkdieMk8r+R69qz+TyrsacPG6GdZTWtZ8ZeIrq0tLa+0XZBYrDLdrbvGIQdpjL+kgszkrkcsDWMnDkildNFuLh9ph5rSS8tL3/H4oprvU3iRIYZ96W8cYO1RICC8jFmLEengDNJpRiop2EW+16FZ/DN/oeluBqSppgRp1DsTNH13Jv8A8vzjNO+zTaHlYOI6f4z1PTdWe806fbFECIopDlM/5se9erBuCSOKaUm2Xk/2meMLyAB9RManBykYVR26+3Wm+V+xKCrRU6jretagq3F9qN5NvGOZSFVPoOAen8qn8jb2NwRqt7+/mCs5kLHG52JJJPX/AL1omZ0WMREEBtYgQxBO4E8fP9+9ZPLs0WAtzLlYoU3BgMAL1A7/ANaaQN+haeJbSQKedoy3qyQfbNO7FoYMUbw7pAEXHAzyam2iqTEhYr5JcZRc9WPJFX3I6DWj3Wr+Gr9dR0O8uLO6To8eMkY5ypyCPqKa5EwcJI719lH7QVsiiz8eTXKTHhb7ZuQk/wCYKMgfODWvYikzur30Ws2UV3YXMU9nJhkkjbKuPg961jSMpJ+RxSn3bbCMNjk0vIeAaXH3YKE5bvTqw0WIvGEW6Q/lU9SrMx3nmqdvNHULF5pTGCwXmmkSVs0r7DIzk/FXQMErCXgj1fSnoWyl1XRZnJeE8ntWkeT2ZS434AaPY3kBcSIAuaJyTHBNG0WUnkkOeNo6Vi8mqY6uomVCMZqeo7FbjUPLjYvgCqUbE3RWaddedclnGFz0q3GkTF2B+7lQHTOaLJotNLkZQGfOfmokiosuxeKUweDWfU0sBJtk5zgU9CF51XyiAOaoQAsRb4x6qBGdMRg5Mnc0SBFjqRBtDj2qFst6NDukWTekgyrZBBFdNWjDRyjxN4XmsL2BDepDo0j52FMHHXYW+vTIrg5eLrL6OuHJ2X2JXXhpZdQ/w8G4aJx5kU0mPTnnp8cc4rN8dOkV3xbKDS59SsEXw59xlXWEmMHkYw7EjIbB6qeu7PSuLm+PfJk9Di5UuHvZ2m38HyWVvDp95I8l1JEgmZVLKJCoG4t8Yx7HFc645QfUUuRTXYDq+g6lolzHJpl9Zyyhdr/eY2G9sclAuf5DJq5cfUiM09o0b7VdeOmaE6tOp1GeMxRrECoTPUjPT8/eq4oXJBOWDg8MXn3EEU4VpG/E+cE//L5/nxXc5YbOZR0jZNQgVYIYUJC5VckEMf8AjmueLt2dElgDrkam3WCHhcAMQcEADP55GKvjeSJrBqoVQscjE53GRhj8IHGPk11HMWSysbcMq7VJHPc49v8AtUqORtg7IiKYFxmRhuY/9PtVMSHDIrweZ5aksAAxXOAD7e9SkOwcsMb24klMhBG4ntS80PxZGSVVVdsY3AjOB/Sl1bKUkhuAfeoAxU89MKTWbfU0rsip1CyVDuwykdDjIP51rHksylx0bJ9n32g694Luh9yuZJLEt++s3YFJB8ccHp0rVS66Irthnr/wV4s0nxR4fTUdGuUmAAE0YyGhfHKsCOP9a3TUtGUouOx+3k82Z2BzVtUiFkO2+ZwD0pDLiwiVIiCOcVEmUirurzNyYUGatLBNmVtw+C36UWFBobcBzxSbAMYgAc0rHQvDGz5OOAadgZu4VMZxw2KExFNFOYSyfxVdWSmJ6pdAxlSfVnpVwjkmbBaYxUbz0FVMUTb47FRGpYVzdjWiLoqqdoAAoATeUtkDqKoRODe7DOcUmA+8WEzSsYBBufBximAWQpGuR1pAJPOzZDfhqqARS1Sa4yV4FVdIirZYz6Tb3Vm8MsYaNhyp9+1ZtlpHMtX0k+HrlYJXLQknynBJYDuST8np8Vl/JpsS07TFOvwa3b6sw1CC3a3QSIpHllt2D36/NZyp5KVm6xa7FdJmYo9yn4tgOOOuCevviueSjdmqbqjVfFPiC30/zZZnUXXAjkPKtu6ce2Pbn9K5ZtWdEE6OFeN7mXU9UJvztlB3SoTwoA4IPcY6VfHjI5o1y3hluL/zIozljkrjt2OO9aydRyZxVsevpFkCjfsQTBVJOQfn8+azii5MDq0v7q4RT+Ecs3O4nn++1awRnN4KG9tttpGoVSW2qm0+3UV0JnO0HClbq3iAA2rk4HBpgAuIijzSDgBcBemP9/rQB8wcQRxhiWYAscgkD2x+nzQIF98ciSJhlRgKp/l/P+lFeQssLFFEcpnO0KdrMD1PcD498f0qOTWDTjXsvdLhX7kJpUKJv2hcEcdOv+v9K5J3Z1Rqiv1RmFw0GQQTk5Pf61pB4siWcGv3UCxysICElGSBng57D3/Ot1L2YSj6Ng8EeJ77RNVW8024+7zYxIu4hXHHDDoc9qabhlBiSpnrvwJq0GvaTFqNpKrxvw4HVH7qR2/Ou1TUlaOVxcXRuKRBIw+OamxmLm68qNmzjihIG6K6yiPmNI/LNzmrfolIcDgEgnmpGPW6kpkDmpZSGHjUL80rHRXmTy2ZQfTVE2LPMJOFPNOgKSdXSd5GGABxWq1RkynZWmmLHlc1rpGZcWkIkQbfwrWUmapG2PKQMdq5zUTkfcCKskWijJkJxRYh62AzgUmMncyYwi0JDYvnac0xA2JdvigCEo5AoEM2UA37j0pNjSHZp44xheoFSlZV0al4xs3v9LmMcYkmj/eIv+bHaiUbjSCLp5Of6bPGpaYyqW53enGMDoPjr+tcTfs6Cqv/ABEn+KPbWpIDYQOcFPUMA/JBx+mPauScrdI6IwxbNU1O8+/TNBM4K2b7k3ndujbufocfI3Gs/DNVtGnzyxXk7ysR5SHywWPXAyFB/nWqTikiG7tgbOBY2JZfL85eSeMqDx+p6CnKVglQKRB5GwsPMR92GOOc9v76U1sGVV/LukVjtZmyDjOeB1+tdMVRzSdgbELKLaNgCqguMjGTxVkE4woklmOMseOM8Y68f6UAwUOwxSFhuDOWUEZzjp/r8UxCKMB5hcnDEKABk59l9v8AemIJDEEaSNtvmMAd3J2gg9uu7jA+tDYIsIbaGzEkczkxxvlyycs2eVUZ5/hzz/rWbdmkcF0oyiFtq4XAwDhFOcAe/HT6VzyRvF+ANxFBawnIzcDsy7vnB/v3ojkcmUGpoXkWfaDt5RSeGHuO/XPb3+lbxeKMZLNi01rFJ6o1kV8cqxxz1789KpNoTSeUb99jPjOTwv4pgiuZQmn3ZEU27pnsTnvz1rXjl1ZEl2VM9kC7V4FAxyOOa3owsSuYzIVPO0GqToQzkJtUd6QApkMb7veiwLS0mCw89alotMhd3BEW5T+dCQmyr8wlSfxZ71dEg7eFlkZ15+Pam2CMajD58BUdaIumKStFJa2siSFHB2561q5Gaiy8toVSLatYt2apUh4SE8GpoYLH7zNMQYcdKQGQ208UAYY4bcaABsdxyOaYGVHIoAzOmAMUkwMrNhcDiigIE5JJpgDl3NhccUCOU+PYLfw9rUQjkWFL0F4iRwrA+ofQZzXJzwSydHE2zQJdVspJlVJI2ZJA/o4A28kk/kPn9a89xzZ2J4o1u4uZlsdQuio8u5LKDjBA3AgcdPw/rRGNsqTpFVHBJLboHDLFnexJyHPye3firdJkLKLeWyhjTe8mZVXK55GD0+n0rNOzR4Nb1wyQh2iY7WI9Pv8Al7V0caTZjySaRRJJv3MyqwGcAD4/r/tXTRz3ZYx7Y5pXwMJDkZHbv/zU7wMkIGS0WMfix6z3APOB8/7UWIjIoVmUBQkQ9ee3HSqQhSUBbdZGiG8/+mnGAPp70woYmg+6G3uy0atCqttJx1HGfnjpU3Y0EtUuby4gBICIC+CoIRc9AOmSP1pPCKRsVhbQvcPKXKhSWRWyA2MDHJHX88DHHOBzytm8aRWXkYvpUkMkaQAhi7EZzjcRjOM9sdeABVRVbJk7YW1i+8LG0EVvFFkjkcFSSeuOwbAYdjUylRUY2VU0OJ13ZbYFR++Mk8HHtjk1SlaCqYskcXmRiaAyRSBv3UcoRg5BAGSCOuDjHI44zmtYszkj079gPiKfXPDcdhqF0ZrvTsRs0h9bJ/CT746Zrr4p9onNyRp2dmMMbQ8AfFOyaKrUPMimi2qTngVaJY22EhMk/GBSBmvm7ubq62xHZDnr8VpSiiE22XL3KGD7tH6j/Eais2aX4BXCGOJQvpHehCGNOQzDEY496JYGshmtGMpDdBU2FGGtkQHcOTRYADEI1LKelMRCJT1agA20sMjigCYdEQ7qQCb3H734qqFZJmaU4Xp70hhVQxr9aACRlQOetAH0jhgQBzSoACRMzZY8e1MQcwrjrikMUluCJfKAzjvVUK/BzL7XLFb/AFyxiuEzD93IVs9CW5rn5VZrB0jncvhTTIEIgMsMcmQSpJduCWP8gPqa8/kdbOyGdGveJSrCDT4l8vYNpOegz+I/yohjI3qiutlMsi2ihkfdgFu4A56fGf1pSwuw45wT1R1ktwVBQwjB5OOPcduKnjWSpukatd3YfeJDwWwM/wBDx/fNdkY0cspWV1m4+8zKrYVlJ4HUd8j++laMhFzaRqY8yjBZUBAO7qeT9agocuifMKrklcHcOnHAB980kDAXMQKrbkKuB5kpB4Yjtn+/inYhTzSs+ZFXdI+5OM4x3pgNabF97s91yoWIOCqHOSBkhQe/Tp9MVLZSD6pGxszGIsMMZJU5fIyM8ntjv70IBqBUFje2aySTTNGpjw4cjJUDjrkjIHtUNFpgJ9v3C22H9zETkBcbpByQDjkH2+D2xT0F2yx+8zWdkzJJIvqEaqg3mNSOTk9s7j+WBWTVs0TwLSxBmLKhLllSPC7mkYdCe+CRzzjke1Kh2JXlmQgaG3YHjgsd4OTgA/JB69AKuONkSydB+wbWDZeJYreYHyblDANxUtGxORzxkZ4P1FdPBKpV7MuRXGz08sz27Ks/pB6AmurejmLKRRPAjKBuWp0x7BSJFcwOknToaMpg1Zq96q20hETEKDjFbRd7MngtLBolhBA9R7mobtmiQ3H5UtrKJcF+wpWG0Rs5Bbw+o7cdqHkA33sMNynJpUFg5riDyBJPMB8A069AyuXVklLQxwOd3RyOMUU7DwWKqFJ3dRSsQKScbtqnFMBa4k6AdKaEyDqGAJHAo0GxmBlEfXBpDJmQMvJoATknaOQgAkU0iWR+9tngH86KHZiO8kz7/SgD6S5kmbYmQaEqC7CLbyJF5h5J/WhsKrJr/i/SZNSsBOqt59t6kPx3/lUyVoqLOW6jLJboJI4wHHpUnkKvfJPfHPHxXncsHZ18cjn1xBcuJrySRWFw5cOBgAbsAY7cCs1jBo85JHbDdh4VbftPlg/p/pUvKoqOHZSX+pylQZU4LYXn3+lbcfHRlOdlLexL92kMBYyRjPBzkfH0rdGJU6bIv35QMeWcrk9Bn3pyWBLZfQekyxNhMAFGPHBOMf0qPBbH23BTMzMSWAGADk/1465oELXBN1ZSsFAZ2Kh2PUDv+n9aNAzEb27H7zlGUSeVGxGd2DgZH0GfyxToLAO1xdefDbHbGCEU7Op/07jAoryF+B+wuN8dwPMdXWRYfVjdICpx8YyBz2OKGsgtGbeLyZZJlRlnm8p9o9IEYzuPTPxn5PfFJjWSwtbVDY3lnb7cwRB1m9OQoH4QR05PzzWMpXk2iqClI553RwrJaoCY9jESck4B6kAbj0BwDQAKdblwkhZkYhSk3LYxgjntjpge546mpKEb2B3szcW8s9tPFJhk2kD3Oe3YHr9KcbTrwJ01Yx4anMerfepLX7sUx5Mg/eBGBHA6Hkd8nBx75q3+tUyUeuNO1l9U0+zubiItIyBiT3OOtejHKtHFLDDre3DSsC/lqP4RVUibYU3biILnaCfzpUMrLuAzXqsWOwdapOlRLVuyxlnRoxHEuFUdazUadstu9AorwRNtBOaqrEBvL6GwU3eqXMcVp7u2AKTaSBLJrT/aX4W82NF1WJVlbai4OW+fgfJpKaK6su7F7fVJfNguUe2XvG2Qa0vGCOrvJZeZHkJEMk8CkkAe5ebO0fiNQhmbaJiQJeD3psBnUZLVQgGAR1qY2N0J+YhTOcrVCPnnjdML1FFALzCTYGU8e1ABUEojMkm0KO1AC8ErXU+xUwpPLVTjRKlbG7qNIFxERnvSWStH1hCQvmz429qGJBnu0J2orbfelQ7IXe7yPR6if4RQgZoninwy13E91bKu8As8Z6N/ftWfJxqWioTo5d4nWG0sJ/OfYSMhQm1V+B844rhnCmdUJWc8uNUOUkSJxNGm1WUEhsUKA3LGBVZU1CJDKzCdfWo75Hz+n8q0qjO7KycX9tK0zQ7ldvUK0VEFdbDzL2RwuY2GcZx3x+XNN6BGyaeGknyjBoyMqdo4PHJB7dP5VBRbTQr5UokOEQFRtHq56ge5NJgVRgmur8oX2xxoF2qDsUEcjPdumew6/FO6DYtKbOAebFFPdPE4G4EABPYAcde/NJNsdE3uJor+2QiNI0JLscgnGSAQOnbPuR8VdkBHCtKEkVpLNpEkC/OO5xnJB6YqdZKLqWP96LtAJDHhSeq7h6lBJ75wDj3z9M26waJFmFL3dqEmha4uJFHlOCAckl1bPDcgnIPBPesdGiB6mw/xBoEeQGVwXYnaMKrcHHB5bGR/OhaDyHu4AlgqIWFvCGV07MQPyyOe3c0IGUrpKtwDCFSYbNwBIXgEdPYqOo9xzVr7E/orIbm6tL4SgI8eeHA3Ng9QV689c1dJqiU2mepPs41K31HwlYfdQMopDAe+fbt/3rs4ZXE5+VVI3FLeKIkyZZiPwitLZlgHBa/vgbjGzqE7079BRiSNQ+wLyT+gosCOoG3srYyzyLGoxkk4+P8AWlY6OQeO/tj0/SM2mjot1cyEhXGNqqDjP1ODj9ahz9DUTjnjbxFrfiiRZ9TvHECnKWseQkSk9h3PyaycrZfWjTrfd5nLBR1Ungn2I+KGxo9R/s7WkieGbiS7LCORwUBBxj4zx+lbQ0Lk9HU2SMO7xHCr0+tWZBYo5HmeY5VR0JpBtkI0nuJdyk7c8k09C2Ykga7k2qPQp5b3oToHnBM2LtN5cIIjH4mPejsFeEQktN0vlxDpwW7Udga9Gb0CARwL6n7n2oi7yDxgmkIk2x7iT35pN1kKIusUN3HFGcJ/Ef8AShO1YPGENS2yTyAk7YB1PvSuhtWSZoZm27wlunH1oysgTaGKfb5XphXueM0Wwr0ElSOKLZAC0hPLe1Kx0DihgGc4du9O2FHPvH/ge28Q2s7KqCZf4Rxkd8Y71nOCkVGXU4lqWiRWEU1qsJRoPSVJ5BrjknFnRFpo0m40+GW6SaOQAcF17Ee4q1LAmic86xo6ttyuOh+Ohpogo7byfvsgibKOmFz/AAtu6VpQiy0jdG6xnjYwUE/yP9/FSxo3O6t/L0sPCTvPHPqwx7/NKWAWTUQsly06I4gs41ClsjJ+BnuT3NIYzHbpHYG3s5ZFCbgu0Zfkg4H1HxUt5spaE7ospWGRpZVQMA7sAo74xjg9CT1yfbFNAxzSHMcWzKLG6FnnYkFVxk4J6Ejj/wC3xRLLCKL6O3CxyrdJJ5Qk3CPLMn4cc49jhh35I4zWMmaRQ2S8kBBRLlo/3sOGJdTgsin8ieO2SKzbo0ooJEu73VriUvHlweG5APTBx2AAH0x71phRJV2bbHA0mj2kccqzzAFWKj1bwAdxHtnr/wA0rTdiarBrN+p3XrTKcKjJD6fSnJwVU8kY3HH0z0NWvBNlBrCW8iSG2hlDRtlZEDD4IP8AkJ4GDxWkbsmR1f8AZ58bWttqp0u5mfz7rEUXnZCs3YewPOBjriteNdZU9MmT7RtbR6GW4e0kciLzZXPU9BXTVnPdFjCES3eWQK8xGfzqG80VWLNL8U+OtE8KrF9+lEuoTybBbocsuBk59uw/MUpPwCRxbxf4t1HxPf26zgrazI06QxnCoMggnHcAY+euKxcrNFGtnIfEGjPZ6gpibc+0SSqqkLHnkKCeTxjn3OKaeBVQ9p9wl1bmF2ZznG0rgZ+e/wCZpMpZOn/Zn9kp1aYahrcrW2lqdw7mX4AI61pGF7FJqJ6J0jRLPTLJViEVpYqMIgPatbrCMnnLCS2a3R3RSeXbk4Vs9aLFQSTUd8oQpthHJ9zT6+RWGuL4NB+5AjTp80lH2O/Qt98JQCIhAOvuadCsanuDNaDy5RH7461KWR3aFh53khUIC+/c1QhdOSwdgHHU96YHzkgqImwT2FAArxWGA52KB1oQmN2l03kBNu9B3NJoEwaopuQzjdk4AHQU/AeT68a7F9HHFIDFjJCihVQndjr3Epj2qMDoc9ami7At5YX1OU9wDTEfQqpZiDtHb5oYHPPtL8PJJZXWpwYVo4yZFxy2O9Y8sOys0hKnR5Na6uDJIqxP5PmFQ+Dlc9visqVF3kzeWzWpDNdEBupLZPvgc007EyvmljEjm2PXn6fPxVoRcaHeRz3sC/h81thUnOGPAz8dKmSGjrsPhy/g0qY3seFByBj8IH9/pRKDoFJWcturW4uLuPTbVxEItzzMOBuPX88VC1ZTD/drWyhSGGaSW5l3AJFgvJ7tu+vGD7ce9J5BMakFpcW8nnTmKQeoxRpvQIOqk/Ujv2PxWeUabDwIYMsLqCWFkEqsigjcSV4zwQOPYZ9zScilEvrRGE9vFKVkhaQbpBnCsygkdgRk8Z9vyrBy2aqIaeC4aG1igkdYyXUeW2Cgzwp6gjG3nrj6mknY9AJYDDkqi/PPvVuyVQHw7fCC9uEn9RcbYyOijnJxjnn+eKqKwKWRa4tA12st1GI1tX4Rs4kYYyBn3x/U1qsIxspbqWfUGmngMhMjfvZckDLc+r3GSfV3wM06rYjWLwLC6i2lEz4J3pkEDjrzWyzszf0dq+z77f3stNj07xjZ3d7JbIFS/tcNI6jp5qkjJA/iBye4J5rZT9ktWWfiX7dpLnTb1/CunyxRrGcXd4ByThRtRT7+5544qJTEonnm81a+vtYkv76eSW7kkMjs553H+/5UqK0dR0+5Gp29gnm7jHgOiDBdiBtJb2AGMH3rB7Ni5sNJGty+UfMm3ZCq5wvyR2+mMY96EFHQ/Bf2R6ZZXEd/qkUDTp6ooiMBB2z7/SuqEKyzGU6wjpLRboQcF0j4BA9IArUyYxBAl26rPIWXrk9BRoNjRWIyBY3ztOBjoKQCaMqM6lc/NMQtcqzKdjVSEyFhEYGLSncxpydiiqDtcrIphYYz7Uq8jswrPGAm47T0opMNAzHmTJUkimIO5yBHHw4GeO1IYWOPdb5mBdhzipe8AtZPovMETEKAp6ChjC29tIpdlPqIobBIzE/kjeFy/SkAM+Y0o3th27DoKYH0tqIZfUxlJ5oUhURSWaGYnZ5gPAHZaMNBmzSvtl11NJ0CO2aaNJ74kMSQAka9f1JA/WseWXVUjWCt2zhdlbafJpN3c22GlLGafHII6AiuV20brZz7VPu9/eNhiVUY2j+GtI2kRLZSyR+TPtjwvPDH+VaEmw/Z/avc+KdLjjCEy3KjLjcu4f8AcdD1zTA9oNp0NxbyWuVdyoQ+w4x+lbtYyY3k8y/at4fuvDGp3EcRKvcuyxN0DhsZ5+mRXI49XRunaNMWKPTWjjBEMhCiWZ2A6jk568fHfpRdj0WkmrabbNKryzyxl8GRAUJOAMlfkZ6HPfvWLhJmqkkXmnXUNsUjMb/d5HZjuIjPvlQx45I69ckdK55RbNoySL+302IQNLG/mWdxL5pyPcjj8vUMfSs4pydFyaSsGJEslAXkszMQ3fIHf8q6Ix6oycnJlNd3BuFRmbiR8nHGM/8ANIdCtlF5l2gBIVztJzxnuP8AX61ajZDkbb9oXheaxtoroBmtn8tpW255HBJ+P04Jrolx9TBSs5/qK/4jrkdlEfLtUQKzrgDaWPXPGOM5981K1Y/opdfcedHZWsSJI3AKuT6QTj6EjB+h7VUfZLKjVYHtxDarFsc/i4wWOehPfmrsRuen26QaRbozxqsSMW3MpwxGMHpyecdc5GKybyaVgp9R8KSRAvEzy5GIgq7c4IGOmcgduuOeaa5ET1oL4dTULPUkiljTazZ6KUU++B2+mKGlLRUW0em/AuhRx6bFdzpHLLIAcR8R+/51tx8SWWZz5PCOhxRw3EWLlDEcgKue1bN1oz3sdl+72eUkJ8g/hUd6m2weBi3ubRLPapRfak7Hiij1qX7nDHNA+XJ/CB1NaRzhmcnWUElkijZvMYD2ApFAdglG6N8k9Fp3RNWRjim3klN2O1Fjo+EG5uIiH+aLCkNQ6dcoFlbB56Gl2TCjFyZzMMqqGhDFzG25mUnd3agBmBJCMqSe3vmkBO8gliQFty8cAULIMHppllkPmkqo446mqaSJTbGZlRQW3YUdeaRVmIZEL5IyooaFdkzdQtkIoBFKgsgsiLGwjA5Oc0DOFfbs8N3rKW86rIkduow3QZJNcnyZVJHRwq0zhemyLY6vHbmRfukpMUgAYYVuOOcEjNEf2QpKmJavpN1Z39xHZxMsCHAYcEimpWsiaEVtbl32zegYzuP9f5/zq7Ebj9j1nKftI0JInRiJtzK3RgATn6+w+KcctCej2C5lLHIC564HWuijI1zxx4XtPFWnx22pAs0TeZDIvBRsdf6UpQUkNSaZ5m8UeF7mx8avaalbsscUe+MtzHKAcE5rjknBUdCalkqVgvNkS2qRtOymUnOwIu4kDJPXjjB5AqG15Gr8BbZLOKGL77sJ2eaWcByc/PX8vntUO3/JpGl/Ru+g3NmPD90LI4jiKk7VOPV0wOgrKKkpZLk1WCuvtQQwSMPWFTJIPBPXGf771TjYk6Kq+WW1jXEkWY2wpUnDp03KTyRlWz8cVURSbH/DIdtStDFAZYPNDFFbBfJxj69MVakk0ierZ6ouNKstS8NSQThN8icCTja2OOB84rtavBy6ycI8afZzcWGoyT6fH58EqgnaDhdpyAPf3IrGXG4lxkmc5vbI2+u4WJjNPCUjdjgRnPfv8dRxUJ4KrJnxH4fTUPMuLWTZLEod1GSFwoB47nOORxzn3qVOhuN5DeE7s3Ajs9Qt7aOeH0208ahTgjpkcH8/c0pryi4u9m7ad4YZbjfGn49rKMFiBng8d+uT1+tRd4Ko6j4V+zGxtGW/uLdDNwd4cgZznIxjmuvjhWWYTl4R0NrSKAqB6Yjgknk1tZkLXKvPIZLdsIOAxpr7E/oLdSebHFHEAzgdX6mhIHklKsSW6xkDzG6vjofakBJtMhhjFxKw3AcAnv70dvA68lNNctPsSNM/OK0SohuyVraSeZveUg9gD0obEo+yxRRG2fPOfrUFhw5BB8wUgCPduy7TMMD2pUOwHnKHBLBj80xGTMOcqB8UAfRShPwLj6GgCcs7P1Ut9TQAN3KkeWBnHNAAXSRzlgCD2p2FH2yXcB0FFgGlt0XmFz85pWAIQNg+rFAHnv7brgW/ijUCZARGqKBnphR+tcfPmdHTxOo2cGkuUkuCZC6xu4O/3XPJH/FapUZtm3a8x0+GOe4vvvMc8aSwYHLKQcH+WPyrNr9qRSeLZqP32aaSSRQIoud21evwauqJOh/s92dzN9ptldQp50EEckkzDH7oFcZwfkgcfrVw2KWj1k78gnJroMgMkrgYCEjtRgWSu1jRrLV7Yw6hbJMCpXJHK59j2pSSkqZStHAfE3hoeD9Z+5BJJ7UW4lgdlB3YOMcnqMniuDng0dPFI554hn8+8lKD059GRyo6jPseaXGqQ5u2b39kmntrMPiCxEb4eySVT1y6OCR+eSKaj2lX0DdRv7KsLc6kx+5whLSBjGzMPxHIzj6YI/OsZNR2aQTlkFeaXE8j3TykxCcqg5y65wMDtgZ/XNQuSsRL6J5Za+FrZpNVsY7e+gZC2djgDaM9xxnpt9uacXclaG1SwendPtkSCEOD6VGQW3YNeusI815ZYTyxyJsMalcEYx8YpUBqmt+C9F1SUSTWyCRQQGXgkE5P86l8aZSk0a/H9mFpvVhcP5iPlW6ek4yDjvkde9YvgRa5aJ2v2R6D5gmuPMaUMWyDjOeoPuPrTjxVsb5PRvulaXpumW6xwxocfxba1UEtIhzb2PyXUZ24JwOnsKqiAEt4mCXJbPvQkFgLIGefbFGEibkux4pvAIvdO0u1+8eb5qvMBjg9KhydFJLZbLpFu6nem4nqajuPqiB0G0JywY9+WJo7h1RzhXmI9JC10mNBV3Yy7t+VLAwqS7TwuT7mkMOZsj1HFIZISx455pAFjki7Ak/SgAyevnac/SkBMwqeCMUASWAHgZoAkLUZ5BoAmIMng9PmgCYtRkZJP50ASWOEHBIJ+tAExFEWG4DHwaQHkj9o+EweOtRVmKwSMspDZyVKggD865n/AORm6/hHJl0nUJWsgluXe9z5Kq4JbB5yM+nr3xxz0rSiLOmeM9Dhk8O+DbmcBpTosfpB6kO3X3wCBU8lxa+yoZs0O4WJV8pwDgnCL6QP1/rSQM61+zvpsH+NS3cWVMULEOq5BJIHX2q+PMiZ/wAnoYSomOc10UZWSFyh425NFDsIsoJ4iNKgs539uUUcnheCVkfek2AqpuyCpzkDr/3rDn/k04tnmW8Zmcjq6kqzAnHuMn36/l9Kxias7h+zRZOLnWbtwFVIkgA98nJ+nQ1XDnkf0h8uONfbM+NbGOx1vVLOCMQxs29PYBueP51x/LVTNvju4nPNQjWzt5oHZ32tvyTjGQR/U1EH20azVbLLwLp8cviLR0SKYr95UbBLucnp1B9utaxbc0jNpdWeoo7SfzBGmHHXg9PivVs86jEsTxSlJFOfinYURRuc7RQB9v8ATx1oAiHJXkgjpigCG5y+1Bk4yc9qYENsjAkqQKAMiLzFG/8ABRdBRMWqM2WYsF7dgKOwqGLJIrZG2yupJyccUm7GlQ9HqWzkTTke26pcUOz6XXJS52NIMds0dBWVMsEECFnGW7AVoskt0ZtvMkg8tvRATyo6n86GkhJsfeG3SIDylIx1PWoLK+a39X7tBk+/aqQmx63toYlJnG4YqWBGRRI+2BNkf+duv6U69hd6CBNq4DEkdzSGTVX/AM3P0ooCRVwhLvgD2FFILPrWPzizksyj3NDwJFtDFD5QCglj2CVDKFJ9LluAdpeJO/qpqSQqsPb6YsKeogn6UnIdDf3ONEyUycdalyHR4z/aP8y7+1HUraWeKG3aeOIOSWEaqigkgZPGaxSubZq8RRze3eQSK/3lJEgBniEzhXZcgAYycscfhz05zg1qQegvtN8O3GnfZF4W1S9jjQ2tvbx3SxpjyFlTjH/xO0Yo5P2ivoIYbPOt9EYJmaCQTyOwXcTkYP8AfWoQ2d2/ZzCS6nfWcgYyJaLIzKuFTL9PzyPir4sNsU8o9Ax2UZAVQuT71tZnRCS1EJ/hJ+KfYVGQrnhQAPegZzj7edsHhazWWQ/vLgkDdj8KHnr9K5vkv9UacX9Hm6/yLqRZGYRqcjuG6Ege5II5x7VhHRs9nfP2bLUReFdQnVDNPc3ARCpznHOSfqa2+OrlJ/4TzyqMV/prP2k6qJfGl8wljEUbGElG4IUYznucg1yfJfaTo2+OqVnNfFN+l9bJFEGDD1s4HyeCf75qeDjcXbL5p2qOn/szaEb3W73Wpy8i2KeTESuAZ5B6iT7qn/8AfxXdwwy5HNOVRr2eh5LUZ4dkI/ymtznFpLYk8yMT7d6YEDEc84GPcUAYwGIBkG48YXvTAyIAjbejdM0gMiPaCVwSO9AEfLkIxjApgSMBA5Ix1+lFgR4BIHTpQBHBI9hQB8y7fxfpQAKRgqjJBzxTEQKsXbMcjHdgkjgH2p2KgqmYsGC7Yx2xUjGBcQuArMu72HJNSMVfUrSIlC0zlc52xMapJsm0M6Zcw3aNLOJrePO2MOuC5+lKVrQ1T2T80tcOqK2FIyW6Y70h+QiSAquxd5PJfI29aLAYjEfl72njBJIG3+H60uwUKzyRPgLcxOTjKn04/wB6pNixobspnWHYmw98gcc9PpzUsaAx+I0hljA9TEhWXZ37jilVjsvYdbjklSBYQ0jjIVX5I9+ajqOwseo2jyMkhdGHQFCfy470UwslLqNuo4V3i7uBwB+dS0x2eTv2ofDtinii08Q208zadfuBdrGATFKBg4zx6gAeTjg0q6u35LvssFH9kXg6fx740tr7VLWO38K2RUXO44jYJykSsRmRmYZY+xPI9IrTLyRo9V+PLGy8T+C9c0uW4j23lrIqt1wQNwIHwVpNYBPJ4Hv7b7s5NsuxUKlgF/ET9TyBWcXZbVHav2XpTLqfiFUcvOYLf0oMKBvbjH6VrAiR6DAij5nuFVccguAM1paIJWzW7NuhlSUf9DbqE09ANGSKIhZGCuezcUAedP2rdaW2vdEtIDumktmkOeiqX4/XFYci7SRpB0cJj1CWRWZwI2cY3H+Iew98kH+xUdaLs9J+DvEKeF/srtk05ozf3ZIizyETaN8nHyeM9/pWXFyOMZe7NeaPZxX0cn1KOfU74w6fHLcXihikEZDSSsFLHC8EnAJwOTg8Vgk5S/01xGJrGj293r2q21jYwl7ueRYo4wp5YnAOPjvn2NdUY1hGF9nbPZXgDw5beDfDltpVlvkEeZJZnHqllb8T47fHwBXYkkqOeUrZtqKTDvyBu/iPagQswG5iv9aYiC+UdwLqzY6e1FgSt4lllZYNpKjJ5A/nQ2CRiaJ45cSKVI6ZFCYBABtCoBjvzmgDDRK2DyXPVs0ADmjDKFOFA9upoQC4tmDk7i0fPA4qgBta7s5Zs+4c8UCJm3LCNT6Qo7D/AFoAxDAkLSZIYuMerkj4A7UN2CNkmswofAYbhzjoR81imWUf3eFQEkBDluQTlVHatKJskunQgMY1ijaQcsCQ2KWQwfX1gPKCGLqQRI3Uf7Uk2NpUL3bXHnAOdmwD1FPjr37U0AvI/mRHzpY2DHHrYgqKSEAtbiysJFliQ+jjjLr9QDVNOWBYjkPaztqk5fDBWY7pQo4+g7fWjr1QdrC6pZRpDHHNGpySzSqDnGejGkm7G1aE02RALaukag7vKjLbD9fmhu9hVaHpMyDEDLHgg9AFY+5zS1sZP7w0Rc+e7yY5ZcLx7CirAClxIZ/U8u1lJyuTtx2/4pNAZ2vcIyKXZCQpDkkfPAphRLUbSxYbJxDcwjACzwbunTjHvU234HoDfyOLLyXnW2t8ekMQFJ7gAirWXZLwUOv+LNO0LR7q5fVLY3v3V1gt3cCSVipAAXqMms5ySVMcc6PIPiJJI3ZojkY2knPbvjp0HxWMDWR0z9l6dLfxTrNvPAN8+nrMGfvskH8jkHn2FaReaRMliz0FPcxSFkZ0e4YgKuzBY+wJFa0vJnfouNLhgiRJLhmDFsshChQvdgByTnqahtL+Sv8ATUvGnjux8ExHzriKd7oyPChUGR1HBYscBFzx3+KJySWVkEn4Z5G+1DxLdeMPFE2szpiIosEezJRFXICg/wB5696hW8suq0axB6A55zjGKGB2Tw9dTJ4asoL+NoJ7eFkMbghk9WcsMZGeK4eX9W0jrj+1P6NF8UXcb3kaW8gEq/vGkVRuD9sMDwTx9MfNacMWssz5ZJukdx/Zc8LqFn8Yaqs+yQyWunhlyJT/AO7MCef+nPvu+a64xezFypV7O/LfWgcxbnjtgvCoBlz8nuat2Z4J7oJbj0XUVvEi8GYhm5/6en59aMhg+kS0uLZbfTZoLqVG9UjrhBzk7j/tRbWWGHhBl02Muwea0igUZMcABJP+lH5A6i9nprTl5gkzw/8AtbVXJ9+ppudYBR8k76ymtYo3MJlc5Cxphm6fNCkmJpoSW0v7vBkt5osDqwC7R9arskFNkUhv1McccJbPAkRWZRj++tFoKZFVnSVxc7w0eC25dgGew96LVYCgri48rzoYSYxx6hgA/JNFrTCmJS3V9Exb/DJZEXA37wA2f8vPP5Va6+yG5LwG0q4fVJZoHtbi0dMZWZSjOD3GcYFKS65sat7RsVvYxptBWOMYwMEc1m5F0VV7qE8xIiGz2B5qkktiZSTWkruXnuGxjnjFaqSWkZ9fbHbe/giJEZ3A4HPPA+TUOLY7SHYtQRyUiidoz/mOc1PVoqxsaq2NjWkeQMHLdKXUdiczpIy/+XQFehzz+tNIRhYISxY28ak9h0pgWFk9urqsq7F+Omal2NF2kCugKsrL271lZVFJeeHYixKJkHoAcY/3q1ITRXtoUkSqAuCT25/4ppoVAPuE8eRi3ePeSzvlHUewHIPPxTEA1ewvrez8/TZ47p0IMsWxt4UfiZAPxHHbGe/PSiLTdMbTrBrsup317O40YvfQoB5kc1v5ZHwTgH9K0UEv7wZuTf8AI81/qk1kmbW3srsFMlZJHJXPIIIO3Ax71PSKe7RVyayB1Cy1zUtHuYvvelxXUisscjrIxQkH1cL1Gfb564qZJV+o1fk5ZN9i+tNh0v8AT2lB3K370/r6c1z/AIpXho3U4lbqX2HeLLm4BXUdFht3YuwcTk4z1/8AT7jPGf8AenHirbE5rwbn9nH2T3HhXWL3U7/U4bm6mh+7xiNdiJHkHkEk7uABjgD3q4xURSl2OlRQSQxBYvu7oWyS0hLNj3z/AKUUTZhXaB55ttusj4BfJYqOwH/FOmFnN/tZ+zT/AMcXljfDVUs7mGDyUVE8xW9W4bh1GCT+VHWwTPvAv2Wz6PNex3d1BfaRdmNZrNdOEcJZUK4G+Rl9WRk4zwMEZNLrjY7zY1B9jnhO2Bkg0BjMvmfvpLh2EYZdvAY4G3qp7Nz8VLj9j7fRr+ufY3ol5NLJFreq200q7SseydRjg54U8jGSWyTlicmj8SH+Qt/Bf2b+FvD+iy2Oo6BZeILh3Zmv7q22SMrdBt3HZj3U/PBqnF+Bd/RudrJHZafb2FjpohtIYRBFFJOSsaDhQBjij9hOmOJcL5cUccaQyKAHJLNvx1J9s005eROjPmujyfdp0cSNtxMGOF65Jz/T9aeWLCPvNd7uIuw2gbNqgBB/1Y/l/vRkeBq3NxGpeS5S1WYFSYogSfb4Ix+YpZCgcs91DM6C7mlj24C7GQD2GM8fWnV+BCkkYa9BuVmleT1Mzs3pwBjkH25xj4+apXWBNItlubxrVULb0K7d/mbGRR0GOhP15qKyU2BnubgQARzXgKAejzzg/H54p0IB9+1NpswWTl5CN8rTejj3zk88cimo+2H+F3Hb396haPf0AUXMu4LwMnaCQe9ThDoWutOWd4hPetC9s+VwqgH5zzgd8flVJixsObgvKkVzLFfW0eWDxpkrx1x1z/LmlXlBfsBfmOWQmBCJOAkaqV3KP4vamrWxMJOLhkIhjC56k8k1aryIqbywu58mV1jU9cnOa0jKKJcWxNbERygTTCRB0RBVd70T0otbWWUKQkYUHgZ7Cs2UhiGKXkk5J+KTGhmOCQ8k8/SpGFW0f6596LAkbSbtgUrGFhjuIMmNip+DSdMB2HUmHoulI/6lqOvoqxvNvPypD0ZAGLeIZO3gn45p2IG8cAUs64HuGxQBWTXFuf3MDygg8kDOPrVdWKxO5WP1PJGbgAerD4GPp1poQSLzGtVjtLWLyySQijgfPNH+jBGzmDSCSNUkf1BmGAvxgf1owGQVjZ3lzcSKzegcLuUhc59wemKHSQKwjWEsEZe4gVZA5LKr7l2/GeaVrwANJbfzSkUU1vcYyZgw6f5en8qbixWgE9nJdMAskkb4xlT/AL5xQsD2F+5wi7aaW2lYsArhpN2eCDgduvPei3QUHa+mQCK3jaK2BA2AAHA7Z/2/Wl1vLCyV1cwTQENA5Bbo5DcY/v3pdXY7K42czyrLBEVgBBaMRhg3/wBvaqX2LJKex3YbyYIsk5V59xH1osBY28Hmui+tgvJ83I/MY/1pgShsyxchcrGM7whwaAAJNFE8vIyPxexP0p0xWT3hgcRiYkdEAz/M0UAbRN1yZfMga1ii/CpIJJI7YOBSnFIcWOsHcZR8sBgArgdM4JpVQ7MRFmGHhVnxgFnx27Y6UUKxmO3jVlMgUIqkhVUEfnk80DEyIfvax2sqyXTcKr5AzjOD78c06YvOCamKLJugQqnaREgwD7cfX+dKn4HY47WCjYY51/zAhUqaY7RX3yRXCqsUUionKrJLyD71UU0S8icsV9I6mCeGJVUBWjGCCM8+5PNVjyLIrPaapO8iy3sxj/EYWkOw++4dD7+1NOK8EuLOhRxLgcVjZofS20bqdyg/lRYynm0+FJTtGM/FWpMTVE47VOKLENR26ZpWNIZjhUUrCgyxKKQwqxikB8YV70WAN7aN1IYD2osCruIjaShoXINVdiYLz5XJV3Y/Q4p0hMG+DlzuJ/CctnNOxEPKRXBI3KeNvQZ96dgQuG8rCADDAk8ULImxWDU5mv1jT0Z4yKbiqsFIflnlPDSEjpz+lTRRGSSW3LRrKdo5wvApUnkBC5mJIZtzHqMtwD1qkkIjbo10yhnKDGfSKbwAWW4WO5FuqHOAN+7mlV5Cy1SE4lDuW2bSO3BqCqI6hBGLBpUUBtuRnnn3oi8gxKwst9x92aVi2CxkI549varbrIki6i0mJohI7uzMMZNZORfUrpdJji8wrI2FOAABVqRLQCQRRhUaPcVG4tnGadiE7p97LGqIqOOvJYEnrknH8qaQgCP5cjSIqbugyMgY9vrVAV+pXbwKx2xmSUgbgoG0e3H9aqMUyW6LOJY1WCNIkVWIU4HPp7596zbyWsobubgxLtRF6ZyfgUkFgrBnuEYs+0g/wihug2MpEvnIMvnfjIPxSsYd4Y0jDIGBUnBzz+R7UKTChbyAtwj7iWOQaq/Aq8g76JINzct5eGIz+Ie30oTE8ZGLBGvtMW9DeUnOIQAwGD7mlJ9XQ0sWDtnLjzW5JwTmmwQWONbiVwcgEHvmlpCP/9k=\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/jpeg": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "for image_features in parsed_image_dataset:\n", - " image_raw = image_features[\"image_raw\"].numpy()\n", - " display.display(display.Image(data=image_raw))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright 2020 Google Inc.\n", - "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", - "http://www.apache.org/licenses/LICENSE-2.0\n", - "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "pL--_KGdYoBz" - ], - "name": "tfrecord.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/labs/what_if_mortgage.ipynb b/notebooks/introduction_to_tensorflow/labs/what_if_mortgage.ipynb deleted file mode 100644 index f94860ea..00000000 --- a/notebooks/introduction_to_tensorflow/labs/what_if_mortgage.ipynb +++ /dev/null @@ -1,766 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# LABXX: What-if Tool: Model Interpretability Using Mortgage Data \n", - "\n", - "**Learning Objectives**\n", - "\n", - "1. Create a What-if Tool visualization\n", - "2. What-if Tool exploration using the XGBoost Model\n", - " \n", - " \n", - "## Introduction \n", - "\n", - "This notebook shows how to use the [What-if Tool (WIT)](https://pair-code.github.io/what-if-tool/) on a deployed [Cloud AI Platform](https://cloud.google.com/ai-platform/) model. The What-If Tool provides an easy-to-use interface for expanding understanding of black-box classification and regression ML models. With the plugin, you can perform inference on a large set of examples and immediately visualize the results in a variety of ways. Additionally, examples can be edited manually or programmatically and re-run through the model in order to see the results of the changes. It contains tooling for investigating model performance and fairness over subsets of a dataset. The purpose of the tool is to give people a simple, intuitive, and powerful way to explore and investigate trained ML models through a visual interface with absolutely no code required.\n", - "\n", - "[Extreme Gradient Boosting (XGBoost)](https://xgboost.ai/) is a decision-tree-based ensemble Machine Learning algorithm that uses a gradient boosting framework. In prediction problems involving unstructured data (images, text, etc.) artificial neural networks tend to outperform all other algorithms or frameworks. However, when it comes to small-to-medium structured/tabular data, decision tree based algorithms are considered best-in-class right now. Please see the chart below for the evolution of tree-based algorithms over the years.\n", - "\n", - "*You don't need your own cloud project* to run this notebook. \n", - "\n", - "** UPDATE LINK BEFORE PRODUCTION **: Each learning objective will correspond to a __#TODO__ in the [student lab notebook](https://github.com/GoogleCloudPlatform/training-data-analyst/blob/gwendolyn-dev/courses/machine_learning/deepdive2/ml_on_gc/what_if_mortgage.ipynb)) -- try to complete that notebook first before reviewing this solution notebook." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Set up environment variables and load necessary libraries \n", - "We will start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Y93EHw56Vtid" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Python Version: 3\n" - ] - } - ], - "source": [ - "import sys\n", - "\n", - "python_version = sys.version_info[0]\n", - "print(\"Python Version: \", python_version)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip3 install witwidget" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "CosDxuLy7M4Q" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import witwidget\n", - "from witwidget.notebook.visualization import WitConfigBuilder, WitWidget" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "bFIxtguO1In_" - }, - "source": [ - "## Loading the mortgage test dataset\n", - "\n", - "The model we'll be exploring here is a binary classification model built with XGBoost and trained on a [mortgage dataset](https://www.ffiec.gov/hmda/hmdaflat.htm). It predicts whether or not a mortgage application will be approved. In this section we'll:\n", - "\n", - "* Download some test data from Cloud Storage and load it into a numpy array + Pandas DataFrame\n", - "* Preview the features for our model in Pandas" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "9BngZjdsO6Mr" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Copying gs://mortgage_dataset_files/data.pkl...\n", - "| [1 files][104.0 MiB/104.0 MiB] \n", - "Operation completed over 1 objects/104.0 MiB. \n", - "Copying gs://mortgage_dataset_files/x_test.npy...\n", - "/ [1 files][172.0 KiB/172.0 KiB] \n", - "Operation completed over 1 objects/172.0 KiB. \n", - "Copying gs://mortgage_dataset_files/y_test.npy...\n", - "/ [1 files][ 628.0 B/ 628.0 B] \n", - "Operation completed over 1 objects/628.0 B. \n" - ] - } - ], - "source": [ - "# Download our Pandas dataframe and our test features and labels\n", - "!gsutil cp gs://mortgage_dataset_files/data.pkl .\n", - "!gsutil cp gs://mortgage_dataset_files/x_test.npy .\n", - "!gsutil cp gs://mortgage_dataset_files/y_test.npy ." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Preview the Features \n", - "\n", - "Preview the features from our model as a pandas DataFrame" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "GkHavVlmGYlk" - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
as_of_yearoccupancyloan_amt_thousandscounty_codeapplicant_income_thousandspopulationffiec_median_fam_incometract_to_msa_income_pctnum_owner_occupied_unitsnum_1_to_4_family_units...purchaser_type_Life insurance company, credit union, mortgage bank, or finance companypurchaser_type_Loan was not originated or was not sold in calendar year covered by registerpurchaser_type_Other type of purchaserpurchaser_type_Private securitizationhoepa_status_HOEPA loanhoepa_status_Not a HOEPA loanlien_status_Not applicable (purchased loans)lien_status_Not secured by a lienlien_status_Secured by a first lienlien_status_Secured by a subordinate lien
31065020161110.0119.055.05930.064100.098.811305.01631.0...0000010010
63012920161480.033.0270.04791.090300.0144.061420.01450.0...0100010010
71548420162240.059.096.03439.0105700.0104.62853.01076.0...0000010010
8877082016176.065.085.03952.061300.090.931272.01666.0...0100010001
71959820161100.0127.070.02422.046400.088.37650.01006.0...0100010010
\n", - "

5 rows × 44 columns

\n", - "
" - ], - "text/plain": [ - " as_of_year occupancy loan_amt_thousands county_code \\\n", - "310650 2016 1 110.0 119.0 \n", - "630129 2016 1 480.0 33.0 \n", - "715484 2016 2 240.0 59.0 \n", - "887708 2016 1 76.0 65.0 \n", - "719598 2016 1 100.0 127.0 \n", - "\n", - " applicant_income_thousands population ffiec_median_fam_income \\\n", - "310650 55.0 5930.0 64100.0 \n", - "630129 270.0 4791.0 90300.0 \n", - "715484 96.0 3439.0 105700.0 \n", - "887708 85.0 3952.0 61300.0 \n", - "719598 70.0 2422.0 46400.0 \n", - "\n", - " tract_to_msa_income_pct num_owner_occupied_units \\\n", - "310650 98.81 1305.0 \n", - "630129 144.06 1420.0 \n", - "715484 104.62 853.0 \n", - "887708 90.93 1272.0 \n", - "719598 88.37 650.0 \n", - "\n", - " num_1_to_4_family_units ... \\\n", - "310650 1631.0 ... \n", - "630129 1450.0 ... \n", - "715484 1076.0 ... \n", - "887708 1666.0 ... \n", - "719598 1006.0 ... \n", - "\n", - " purchaser_type_Life insurance company, credit union, mortgage bank, or finance company \\\n", - "310650 0 \n", - "630129 0 \n", - "715484 0 \n", - "887708 0 \n", - "719598 0 \n", - "\n", - " purchaser_type_Loan was not originated or was not sold in calendar year covered by register \\\n", - "310650 0 \n", - "630129 1 \n", - "715484 0 \n", - "887708 1 \n", - "719598 1 \n", - "\n", - " purchaser_type_Other type of purchaser \\\n", - "310650 0 \n", - "630129 0 \n", - "715484 0 \n", - "887708 0 \n", - "719598 0 \n", - "\n", - " purchaser_type_Private securitization hoepa_status_HOEPA loan \\\n", - "310650 0 0 \n", - "630129 0 0 \n", - "715484 0 0 \n", - "887708 0 0 \n", - "719598 0 0 \n", - "\n", - " hoepa_status_Not a HOEPA loan \\\n", - "310650 1 \n", - "630129 1 \n", - "715484 1 \n", - "887708 1 \n", - "719598 1 \n", - "\n", - " lien_status_Not applicable (purchased loans) \\\n", - "310650 0 \n", - "630129 0 \n", - "715484 0 \n", - "887708 0 \n", - "719598 0 \n", - "\n", - " lien_status_Not secured by a lien \\\n", - "310650 0 \n", - "630129 0 \n", - "715484 0 \n", - "887708 0 \n", - "719598 0 \n", - "\n", - " lien_status_Secured by a first lien \\\n", - "310650 1 \n", - "630129 1 \n", - "715484 1 \n", - "887708 0 \n", - "719598 1 \n", - "\n", - " lien_status_Secured by a subordinate lien \n", - "310650 0 \n", - "630129 0 \n", - "715484 0 \n", - "887708 1 \n", - "719598 0 \n", - "\n", - "[5 rows x 44 columns]" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "features = pd.read_pickle(\"data.pkl\")\n", - "features.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Int64Index: 999999 entries, 310650 to 875688\n", - "Data columns (total 44 columns):\n", - "as_of_year 999999 non-null int16\n", - "occupancy 999999 non-null int8\n", - "loan_amt_thousands 999999 non-null float64\n", - "county_code 999999 non-null float64\n", - "applicant_income_thousands 999999 non-null float64\n", - "population 999999 non-null float64\n", - "ffiec_median_fam_income 999999 non-null float64\n", - "tract_to_msa_income_pct 999999 non-null float64\n", - "num_owner_occupied_units 999999 non-null float64\n", - "num_1_to_4_family_units 999999 non-null float64\n", - "agency_code_Consumer Financial Protection Bureau (CFPB) 999999 non-null uint8\n", - "agency_code_Department of Housing and Urban Development (HUD) 999999 non-null uint8\n", - "agency_code_Federal Deposit Insurance Corporation (FDIC) 999999 non-null uint8\n", - "agency_code_Federal Reserve System (FRS) 999999 non-null uint8\n", - "agency_code_National Credit Union Administration (NCUA) 999999 non-null uint8\n", - "agency_code_Office of the Comptroller of the Currency (OCC) 999999 non-null uint8\n", - "loan_type_Conventional (any loan other than FHA, VA, FSA, or RHS loans) 999999 non-null uint8\n", - "loan_type_FHA-insured (Federal Housing Administration) 999999 non-null uint8\n", - "loan_type_FSA/RHS (Farm Service Agency or Rural Housing Service) 999999 non-null uint8\n", - "loan_type_VA-guaranteed (Veterans Administration) 999999 non-null uint8\n", - "property_type_Manufactured housing 999999 non-null uint8\n", - "property_type_One to four-family (other than manufactured housing) 999999 non-null uint8\n", - "loan_purpose_Home improvement 999999 non-null uint8\n", - "loan_purpose_Home purchase 999999 non-null uint8\n", - "loan_purpose_Refinancing 999999 non-null uint8\n", - "preapproval_Not applicable 999999 non-null uint8\n", - "preapproval_Preapproval was not requested 999999 non-null uint8\n", - "preapproval_Preapproval was requested 999999 non-null uint8\n", - "purchaser_type_Affiliate institution 999999 non-null uint8\n", - "purchaser_type_Commercial bank, savings bank or savings association 999999 non-null uint8\n", - "purchaser_type_Fannie Mae (FNMA) 999999 non-null uint8\n", - "purchaser_type_Farmer Mac (FAMC) 999999 non-null uint8\n", - "purchaser_type_Freddie Mac (FHLMC) 999999 non-null uint8\n", - "purchaser_type_Ginnie Mae (GNMA) 999999 non-null uint8\n", - "purchaser_type_Life insurance company, credit union, mortgage bank, or finance company 999999 non-null uint8\n", - "purchaser_type_Loan was not originated or was not sold in calendar year covered by register 999999 non-null uint8\n", - "purchaser_type_Other type of purchaser 999999 non-null uint8\n", - "purchaser_type_Private securitization 999999 non-null uint8\n", - "hoepa_status_HOEPA loan 999999 non-null uint8\n", - "hoepa_status_Not a HOEPA loan 999999 non-null uint8\n", - "lien_status_Not applicable (purchased loans) 999999 non-null uint8\n", - "lien_status_Not secured by a lien 999999 non-null uint8\n", - "lien_status_Secured by a first lien 999999 non-null uint8\n", - "lien_status_Secured by a subordinate lien 999999 non-null uint8\n", - "dtypes: float64(8), int16(1), int8(1), uint8(34)\n", - "memory usage: 104.0 MB\n" - ] - } - ], - "source": [ - "features.info()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load the test features and labels into numpy arrays" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Developing machine learning models in Python often requires the use of NumPy arrays. Recall that NumPy, which stands for Numerical Python, is a library consisting of multidimensional array objects and a collection of routines for processing those arrays. NumPy arrays are efficient data structures for working with data in Python, and machine learning models like those in the scikit-learn library, and deep learning models like those in the Keras library, expect input data in the format of NumPy arrays and make predictions in the format of NumPy arrays. As such, it is common to need to save NumPy arrays to file. Note that the data info reveals the following datatypes dtypes: float64(8), int16(1), int8(1), uint8(34) -- and no strings or \"objects\". So, let's now load the features and labels into numpy arrays. " - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "57KQ_XX2FEdl" - }, - "outputs": [], - "source": [ - "x_test = np.load(\"x_test.npy\")\n", - "y_test = np.load(\"y_test.npy\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's take a look at the contents of the 'x_test.npy' file. You can see the \"array\" structure." - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[2.016e+03 1.000e+00 4.170e+02 ... 0.000e+00 1.000e+00 0.000e+00]\n", - " [2.016e+03 1.000e+00 2.760e+02 ... 0.000e+00 1.000e+00 0.000e+00]\n", - " [2.016e+03 1.000e+00 6.000e+01 ... 0.000e+00 1.000e+00 0.000e+00]\n", - " ...\n", - " [2.016e+03 1.000e+00 5.000e+02 ... 0.000e+00 0.000e+00 0.000e+00]\n", - " [2.016e+03 1.000e+00 1.100e+02 ... 0.000e+00 1.000e+00 0.000e+00]\n", - " [2.016e+03 1.000e+00 3.680e+02 ... 0.000e+00 1.000e+00 0.000e+00]]\n" - ] - } - ], - "source": [ - "print(x_test)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Combine the features and labels into one array for the What-if Tool\n", - "\n", - "Note that the numpy.hstack() function is used to stack the sequence of input arrays horizontally (i.e. column wise) to make a single array. In the following example, the numpy matrix is reshaped into a vector using the reshape function with .reshape((-1, 1) to convert the array into a single column matrix." - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "hoFIrCQfFgvm" - }, - "outputs": [], - "source": [ - "test_examples = np.hstack((x_test, y_test.reshape(-1, 1)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "-8xNn8EhgUi7" - }, - "source": [ - "## Using the What-if Tool to interpret our model\n", - "With our test examples ready, we can now connect our model to the What-if Tool using the `WitWidget`. To use the What-if Tool with Cloud AI Platform, we need to send it:\n", - "* A Python list of our test features + ground truth labels\n", - "* Optionally, the names of our columns\n", - "* Our Cloud project, model, and version name (we've created a public one for you to play around with)\n", - "\n", - "See the next cell for some exploration ideas in the What-if Tool." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Create a What-if Tool visualization\n", - "\n", - "This prediction adjustment function is needed as this xgboost model's prediction returns just a score for the positive class of the binary classification, whereas the What-If Tool expects a list of scores for each class (in this case, both the negative class and the positive class). \n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**NOTE:** The WIT may take a minute to load. While it is loading, review the parameters that are defined in the next cell, BUT NOT RUN IT, it is simply for reference." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# ******** DO NOT RUN THIS CELL ********\n", - "\n", - "# TODO 1\n", - "\n", - "PROJECT_ID = \"YOUR_PROJECT_ID\"\n", - "MODEL_NAME = \"YOUR_MODEL_NAME\"\n", - "VERSION_NAME = \"YOUR_VERSION_NAME\"\n", - "TARGET_FEATURE = \"mortgage_status\"\n", - "LABEL_VOCAB = [\"denied\", \"approved\"]\n", - "\n", - "# TODO 1a\n", - "\n", - "config_builder = (\n", - " WitConfigBuilder(\n", - " test_examples.tolist(), features.columns.tolist() + [\"mortgage_status\"]\n", - " )\n", - " .set_ai_platform_model(\n", - " PROJECT_ID,\n", - " MODEL_NAME,\n", - " VERSION_NAME,\n", - " adjust_prediction=adjust_prediction,\n", - " )\n", - " .set_target_feature(TARGET_FEATURE)\n", - " .set_label_vocab(LABEL_VOCAB)\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run this cell to load the WIT config builder. **NOTE:** The WIT may take a minute to load" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "dqAbAmxkgW4p" - }, - "outputs": [ - { - "data": { - "text/html": [ - "" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "654bf83aca0642c78308122d31fc000f", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "WitWidget(config={'use_aip': True, 'model_name': 'xgb_mortgage', 'uses_json_list': True, 'get_explanations': T…" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# TODO 1b\n", - "\n", - "\n", - "def adjust_prediction(pred):\n", - " return [1 - pred, pred]\n", - "\n", - "\n", - "config_builder = (\n", - " WitConfigBuilder(\n", - " test_examples.tolist(), features.columns.tolist() + [\"mortgage_status\"]\n", - " )\n", - " .set_ai_platform_model(\n", - " \"wit-caip-demos\",\n", - " \"xgb_mortgage\",\n", - " \"v1\",\n", - " adjust_prediction=adjust_prediction,\n", - " )\n", - " .set_target_feature(\"mortgage_status\")\n", - " .set_label_vocab([\"denied\", \"approved\"])\n", - ")\n", - "WitWidget(config_builder, height=800)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "_B2BskDk55rk" - }, - "source": [ - "## What-if Tool exploration using the XGBoost Model\n", - "\n", - "#### TODO 2\n", - "\n", - "* **Individual data points**: The default graph shows all data points from the test set, colored by their ground truth label (approved or denied)\n", - " * Try selecting data points close to the middle and tweaking some of their feature values. Then run inference again to see if the model prediction changes\n", - " * Select a data point and then move the \"Show nearest counterfactual datapoint\" slider to the right. This will highlight a data point with feature values closest to your original one, but with a different prediction\n", - "\n", - "#### TODO 2a\n", - "\n", - "* **Binning data**: Create separate graphs for individual features\n", - " * From the \"Binning - X axis\" dropdown, try selecting one of the agency codes, for example \"Department of Housing and Urban Development (HUD)\". This will create 2 separate graphs, one for loan applications from the HUD (graph labeled 1), and one for all other agencies (graph labeled 0). This shows us that loans from this agency are more likely to be denied\n", - "\n", - "#### TODO 2b\n", - "\n", - "* **Exploring overall performance**: Click on the \"Performance & Fairness\" tab to view overall performance statistics on the model's results on the provided dataset, including confusion matrices, PR curves, and ROC curves.\n", - " * Experiment with the threshold slider, raising and lowering the positive classification score the model needs to return before it decides to predict \"approved\" for the loan, and see how it changes accuracy, false positives, and false negatives.\n", - " * On the left side \"Slice by\" menu, select \"loan_purpose_Home purchase\". You'll now see performance on the two subsets of your data: the \"0\" slice shows when the loan is not for a home purchase, and the \"1\" slice is for when the loan is for a home purchase. Notice that the model's false positive rate is much higher on loans for home purchases. If you expand the rows to look at the confusion matrices, you can see that the model predicts \"approved\" more often for home purchase loans.\n", - " * You can use the optimization buttons on the left side to have the tool auto-select different positive classification thresholds for each slice in order to achieve different goals. If you select the \"Demographic parity\" button, then the two thresholds will be adjusted so that the model predicts \"approved\" for a similar percentage of applicants in both slices. What does this do to the accuracy, false positives and false negatives for each slice?\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright 2020 Google Inc.\n", - "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", - "http://www.apache.org/licenses/LICENSE-2.0\n", - "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "What-If Tool with XGBoost Cloud AI Platform Model", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/labs/write_low_level_code.ipynb b/notebooks/introduction_to_tensorflow/labs/write_low_level_code.ipynb deleted file mode 100644 index 7c791adb..00000000 --- a/notebooks/introduction_to_tensorflow/labs/write_low_level_code.ipynb +++ /dev/null @@ -1,760 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Writing Low-Level TensorFlow Code\n", - "\n", - "\n", - "**Learning Objectives**\n", - "\n", - " 1. Practice defining and performing basic operations on constant Tensors\n", - " 2. Use Tensorflow's automatic differentiation capability\n", - " 3. Learn how to train a linear regression from scratch with TensorFLow\n", - "\n", - "\n", - "## Introduction \n", - "\n", - "In this notebook, we will start by reviewing the main operations on Tensors in TensorFlow and understand how to manipulate TensorFlow Variables. We explain how these are compatible with python built-in list and numpy arrays. \n", - "\n", - "Then we will jump to the problem of training a linear regression from scratch with gradient descent. The first order of business will be to understand how to compute the gradients of a function (the loss here) with respect to some of its arguments (the model weights here). The TensorFlow construct allowing us to do that is `tf.GradientTape`, which we will describe. \n", - "\n", - "At last we will create a simple training loop to learn the weights of a 1-dim linear regression using synthetic data generated from a linear model. \n", - "\n", - "As a bonus exercise, we will do the same for data generated from a non linear model, forcing us to manual engineer non-linear features to improve our linear model performance.\n", - "\n", - "Each learning objective will correspond to a #TODO in the [student lab notebook](https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/introduction_to_tensorflow/labs/write_low_level_code.ipynb) -- try to complete that notebook first before reviewing this solution notebook." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "!sudo chown -R jupyter:jupyter /home/jupyter/training-data-analyst" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Ensure the right version of Tensorflow is installed.\n", - "!pip freeze | grep tensorflow==2.1 || pip install tensorflow==2.1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import tensorflow as tf\n", - "from matplotlib import pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(tf.__version__)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Operations on Tensors" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Variables and Constants" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Tensors in TensorFlow are either contant (`tf.constant`) or variables (`tf.Variable`).\n", - "Constant values can not be changed, while variables values can be.\n", - "\n", - "The main difference is that instances of `tf.Variable` have methods allowing us to change \n", - "their values while tensors constructed with `tf.constant` don't have these methods, and\n", - "therefore their values can not be changed. When you want to change the value of a `tf.Variable`\n", - "`x` use one of the following method: \n", - "\n", - "* `x.assign(new_value)`\n", - "* `x.assign_add(value_to_be_added)`\n", - "* `x.assign_sub(value_to_be_subtracted`\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "x = tf.constant([2, 3, 4])\n", - "x" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "x = tf.Variable(2.0, dtype=tf.float32, name=\"my_variable\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "x.assign(45.8)\n", - "x" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "x.assign_add(4)\n", - "x" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "x.assign_sub(3)\n", - "x" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Point-wise operations" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Tensorflow offers similar point-wise tensor operations as numpy does:\n", - " \n", - "* `tf.add` allows to add the components of a tensor \n", - "* `tf.multiply` allows us to multiply the components of a tensor\n", - "* `tf.subtract` allow us to substract the components of a tensor\n", - "* `tf.math.*` contains the usual math operations to be applied on the components of a tensor\n", - "* and many more...\n", - "\n", - "Most of the standard aritmetic operations (`tf.add`, `tf.substrac`, etc.) are overloaded by the usual corresponding arithmetic symbols (`+`, `-`, etc.)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Lab Task #1:** Performing basic operations on Tensors \n", - "1. Compute the sum of the constants `a` and `b` below using `tf.add` and `+` and verify both operations produce the same values.\n", - "2. Compute the product of the constants `a` and `b` below using `tf.multiply` and `*` and verify both operations produce the same values.\n", - "3. Compute the exponential of the constant `a` using `tf.math.exp`. Note, you'll need to specify the type for this operation.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO 1a\n", - "a = # TODO -- Your code here.\n", - "b = # TODO -- Your code here.\n", - "c = # TODO -- Your code here.\n", - "d = # TODO -- Your code here.\n", - "\n", - "print(\"c:\", c)\n", - "print(\"d:\", d)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO 1b\n", - "a = # TODO -- Your code here.\n", - "b = # TODO -- Your code here.\n", - "c = # TODO -- Your code here.\n", - "d = # TODO -- Your code here.\n", - "\n", - "print(\"c:\", c)\n", - "print(\"d:\", d)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO 1c\n", - "# tf.math.exp expects floats so we need to explicitly give the type\n", - "a = # TODO -- Your code here.\n", - "b = # TODO -- Your code here.\n", - "\n", - "print(\"b:\", b)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### NumPy Interoperability\n", - "\n", - "In addition to native TF tensors, tensorflow operations can take native python types and NumPy arrays as operands. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# native python list\n", - "a_py = [1, 2]\n", - "b_py = [3, 4]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tf.add(a_py, b_py)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# numpy arrays\n", - "a_np = np.array([1, 2])\n", - "b_np = np.array([3, 4])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tf.add(a_np, b_np)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# native TF tensor\n", - "a_tf = tf.constant([1, 2])\n", - "b_tf = tf.constant([3, 4])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "tf.add(a_tf, b_tf)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can convert a native TF tensor to a NumPy array using .numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "a_tf.numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Linear Regression\n", - "\n", - "Now let's use low level tensorflow operations to implement linear regression.\n", - "\n", - "Later in the course you'll see abstracted ways to do this using high level TensorFlow." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Toy Dataset\n", - "\n", - "We'll model the following function:\n", - "\n", - "\\begin{equation}\n", - "y= 2x + 10\n", - "\\end{equation}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X = tf.constant(range(10), dtype=tf.float32)\n", - "Y = 2 * X + 10\n", - "\n", - "print(f\"X:{X}\")\n", - "print(f\"Y:{Y}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's also create a test dataset to evaluate our models:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X_test = tf.constant(range(10, 20), dtype=tf.float32)\n", - "Y_test = 2 * X_test + 10\n", - "\n", - "print(f\"X_test:{X_test}\")\n", - "print(f\"Y_test:{Y_test}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Loss Function" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The simplest model we can build is a model that for each value of x returns the sample mean of the training set:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "y_mean = Y.numpy().mean()\n", - "\n", - "\n", - "def predict_mean(X):\n", - " y_hat = [y_mean] * len(X)\n", - " return y_hat\n", - "\n", - "\n", - "Y_hat = predict_mean(X_test)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using mean squared error, our loss is:\n", - "\\begin{equation}\n", - "MSE = \\frac{1}{m}\\sum_{i=1}^{m}(\\hat{Y}_i-Y_i)^2\n", - "\\end{equation}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For this simple model the loss is then:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "errors = (Y_hat - Y) ** 2\n", - "loss = tf.reduce_mean(errors)\n", - "loss.numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This values for the MSE loss above will give us a baseline to compare how a more complex model is doing." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, if $\\hat{Y}$ represents the vector containing our model's predictions when we use a linear regression model\n", - "\\begin{equation}\n", - "\\hat{Y} = w_0X + w_1\n", - "\\end{equation}\n", - "\n", - "we can write a loss function taking as arguments the coefficients of the model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def loss_mse(X, Y, w0, w1):\n", - " Y_hat = w0 * X + w1\n", - " errors = (Y_hat - Y) ** 2\n", - " return tf.reduce_mean(errors)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Gradient Function\n", - "\n", - "To use gradient descent we need to take the partial derivatives of the loss function with respect to each of the weights. We could manually compute the derivatives, but with Tensorflow's automatic differentiation capabilities we don't have to!\n", - "\n", - "During gradient descent we think of the loss as a function of the parameters $w_0$ and $w_1$. Thus, we want to compute the partial derivative with respect to these variables. \n", - "\n", - "For that we need to wrap our loss computation within the context of `tf.GradientTape` instance which will reccord gradient information:\n", - "\n", - "```python\n", - "with tf.GradientTape() as tape:\n", - " loss = # computation \n", - "```\n", - "\n", - "This will allow us to later compute the gradients of any tensor computed within the `tf.GradientTape` context with respect to instances of `tf.Variable`:\n", - "\n", - "```python\n", - "gradients = tape.gradient(loss, [w0, w1])\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We illustrate this procedure with by computing the loss gradients with respect to the model weights:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Lab Task #2:** Complete the function below to compute the loss gradients with respect to the model weights `w0` and `w1`. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO 2\n", - "def compute_gradients(X, Y, w0, w1):\n", - " # TODO -- Your code here." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "w0 = tf.Variable(0.0)\n", - "w1 = tf.Variable(0.0)\n", - "\n", - "dw0, dw1 = compute_gradients(X, Y, w0, w1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"dw0:\", dw0.numpy())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"dw1\", dw1.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Training Loop\n", - "\n", - "Here we have a very simple training loop that converges. Note we are ignoring best practices like batching, creating a separate test set, and random weight initialization for the sake of simplicity." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Lab Task #3:** Complete the `for` loop below to train a linear regression. \n", - "1. Use `compute_gradients` to compute `dw0` and `dw1`.\n", - "2. Then, re-assign the value of `w0` and `w1` using the `.assign_sub(...)` method with the computed gradient values and the `LEARNING_RATE`.\n", - "3. Finally, for every 100th step , we'll compute and print the `loss`. Use the `loss_mse` function we created above to compute the `loss`. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO 3\n", - "STEPS = 1000\n", - "LEARNING_RATE = .02\n", - "MSG = \"STEP {step} - loss: {loss}, w0: {w0}, w1: {w1}\\n\"\n", - "\n", - "\n", - "w0 = tf.Variable(0.0)\n", - "w1 = tf.Variable(0.0)\n", - "\n", - "\n", - "for step in range(0, STEPS + 1):\n", - "\n", - " dw0, dw1 = # TODO -- Your code here.\n", - "\n", - " if step % 100 == 0:\n", - " loss = # TODO -- Your code here.\n", - " print(MSG.format(step=step, loss=loss, w0=w0.numpy(), w1=w1.numpy()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now let's compare the test loss for this linear regression to the test loss from the baseline model that outputs always the mean of the training set:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "loss = loss_mse(X_test, Y_test, w0, w1)\n", - "loss.numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This is indeed much better!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Bonus" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Try modelling a non-linear function such as: $y=xe^{-x^2}$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X = tf.constant(np.linspace(0, 2, 1000), dtype=tf.float32)\n", - "Y = X * tf.exp(-(X**2))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%matplotlib inline\n", - "\n", - "plt.plot(X, Y)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def make_features(X):\n", - " f1 = tf.ones_like(X) # Bias.\n", - " f2 = X\n", - " f3 = tf.square(X)\n", - " f4 = tf.sqrt(X)\n", - " f5 = tf.exp(X)\n", - " return tf.stack([f1, f2, f3, f4, f5], axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def predict(X, W):\n", - " return tf.squeeze(X @ W, -1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def loss_mse(X, Y, W):\n", - " Y_hat = predict(X, W)\n", - " errors = (Y_hat - Y) ** 2\n", - " return tf.reduce_mean(errors)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def compute_gradients(X, Y, W):\n", - " with tf.GradientTape() as tape:\n", - " loss = loss_mse(Xf, Y, W)\n", - " return tape.gradient(loss, W)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "STEPS = 2000\n", - "LEARNING_RATE = 0.02\n", - "\n", - "\n", - "Xf = make_features(X)\n", - "n_weights = Xf.shape[1]\n", - "\n", - "W = tf.Variable(np.zeros((n_weights, 1)), dtype=tf.float32)\n", - "\n", - "# For plotting\n", - "steps, losses = [], []\n", - "plt.figure()\n", - "\n", - "\n", - "for step in range(1, STEPS + 1):\n", - " dW = compute_gradients(X, Y, W)\n", - " W.assign_sub(dW * LEARNING_RATE)\n", - "\n", - " if step % 100 == 0:\n", - " loss = loss_mse(Xf, Y, W)\n", - " steps.append(step)\n", - " losses.append(loss)\n", - " plt.clf()\n", - " plt.plot(steps, losses)\n", - "\n", - "\n", - "print(f\"STEP: {STEPS} MSE: {loss_mse(Xf, Y, W)}\")\n", - "\n", - "plt.figure()\n", - "plt.plot(X, Y, label=\"actual\")\n", - "plt.plot(X, predict(Xf, W), label=\"predicted\")\n", - "plt.legend()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright 2020 Google Inc. Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/solutions/2a_dataset_api.ipynb b/notebooks/introduction_to_tensorflow/solutions/2_dataset_api.ipynb similarity index 100% rename from notebooks/introduction_to_tensorflow/solutions/2a_dataset_api.ipynb rename to notebooks/introduction_to_tensorflow/solutions/2_dataset_api.ipynb diff --git a/notebooks/introduction_to_tensorflow/solutions/2b_loading_filedata.ipynb b/notebooks/introduction_to_tensorflow/solutions/2b_loading_filedata.ipynb deleted file mode 100644 index 6497df97..00000000 --- a/notebooks/introduction_to_tensorflow/solutions/2b_loading_filedata.ipynb +++ /dev/null @@ -1,992 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "sUtoed20cRJJ" - }, - "source": [ - "# How to Load CSV and Numpy File Types in TensorFlow 2.0\n", - "\n", - "\n", - "\n", - "## Learning Objectives\n", - "\n", - "1. Load a CSV file into a `tf.data.Dataset`. \n", - "2. Load Numpy data\n", - "\n", - "\n", - "\n", - "\n", - "## Introduction \n", - "\n", - "In this lab, you load CSV data from a file into a `tf.data.Dataset`. This tutorial provides an example of loading data from NumPy arrays into a `tf.data.Dataset` you also load text data.\n", - "\n", - "Each learning objective will correspond to a __#TODO__ in the [student lab notebook](https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/ml_on_gcloud_v2/labs/03_load_diff_filedata.ipynb) -- try to complete that notebook first before reviewing this solution notebook." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "fgZ9gjmPfSnK" - }, - "source": [ - "## Load necessary libraries \n", - "We will start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "baYFZMW_bJHh" - }, - "outputs": [], - "source": [ - "import functools\n", - "import os\n", - "\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "\n", - "os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"2\"\n", - "\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Ncf5t6tgL5ZI" - }, - "outputs": [], - "source": [ - "TRAIN_DATA_URL = \"https://storage.googleapis.com/tf-datasets/titanic/train.csv\"\n", - "TEST_DATA_URL = \"https://storage.googleapis.com/tf-datasets/titanic/eval.csv\"\n", - "\n", - "train_file_path = tf.keras.utils.get_file(\"train.csv\", TRAIN_DATA_URL)\n", - "test_file_path = tf.keras.utils.get_file(\"eval.csv\", TEST_DATA_URL)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "4ONE94qulk6S" - }, - "outputs": [], - "source": [ - "# Make numpy values easier to read.\n", - "np.set_printoptions(precision=3, suppress=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Wuqj601Qw0Ml" - }, - "source": [ - "## Load data\n", - "\n", - "This section provides an example of how to load CSV data from a file into a `tf.data.Dataset`. The data used in this tutorial are taken from the Titanic passenger list. The model will predict the likelihood a passenger survived based on characteristics like age, gender, ticket class, and whether the person was traveling alone.\n", - "\n", - "To start, let's look at the top of the CSV file to see how it is formatted." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "54Dv7mCrf9Yw" - }, - "outputs": [], - "source": [ - "!head {train_file_path}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "jC9lRhV-q_R3" - }, - "source": [ - "You can [load this using pandas](pandas_dataframe.ipynb), and pass the NumPy arrays to TensorFlow. If you need to scale up to a large set of files, or need a loader that integrates with [TensorFlow and tf.data](../../guide/data.ipynb) then use the `tf.data.experimental.make_csv_dataset` function:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "67mfwr4v-mN_" - }, - "source": [ - "The only column you need to identify explicitly is the one with the value that the model is intended to predict. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "iXROZm5f3V4E" - }, - "outputs": [], - "source": [ - "# TODO 1\n", - "LABEL_COLUMN = \"survived\"\n", - "LABELS = [0, 1]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "t4N-plO4tDXd" - }, - "source": [ - "Now read the CSV data from the file and create a dataset. \n", - "\n", - "(For the full documentation, see `tf.data.experimental.make_csv_dataset`)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "yIbUscB9sqha" - }, - "outputs": [], - "source": [ - "def get_dataset(file_path, **kwargs):\n", - " # TODO 2\n", - " dataset = tf.data.experimental.make_csv_dataset(\n", - " file_path,\n", - " batch_size=5, # Artificially small to make examples easier to show.\n", - " label_name=LABEL_COLUMN,\n", - " na_value=\"?\",\n", - " num_epochs=1,\n", - " ignore_errors=True,\n", - " **kwargs,\n", - " )\n", - " return dataset\n", - "\n", - "\n", - "raw_train_data = get_dataset(train_file_path)\n", - "raw_test_data = get_dataset(test_file_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "v4oMO9MIxgTG" - }, - "outputs": [], - "source": [ - "def show_batch(dataset):\n", - " for batch, label in dataset.take(1):\n", - " for key, value in batch.items():\n", - " print(f\"{key:20s}: {value.numpy()}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "vHUQFKoQI6G7" - }, - "source": [ - "Each item in the dataset is a batch, represented as a tuple of (*many examples*, *many labels*). The data from the examples is organized in column-based tensors (rather than row-based tensors), each with as many elements as the batch size (5 in this case).\n", - "\n", - "It might help to see this yourself." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "HjrkJROoxoll" - }, - "outputs": [], - "source": [ - "show_batch(raw_train_data)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "YOYKQKmMj3D6" - }, - "source": [ - "As you can see, the columns in the CSV are named. The dataset constructor will pick these names up automatically. If the file you are working with does not contain the column names in the first line, pass them in a list of strings to the `column_names` argument in the `make_csv_dataset` function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "2Av8_9L3tUg1" - }, - "outputs": [], - "source": [ - "CSV_COLUMNS = [\n", - " \"survived\",\n", - " \"sex\",\n", - " \"age\",\n", - " \"n_siblings_spouses\",\n", - " \"parch\",\n", - " \"fare\",\n", - " \"class\",\n", - " \"deck\",\n", - " \"embark_town\",\n", - " \"alone\",\n", - "]\n", - "\n", - "temp_dataset = get_dataset(train_file_path, column_names=CSV_COLUMNS)\n", - "\n", - "show_batch(temp_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "gZfhoX7bR9u4" - }, - "source": [ - "This example is going to use all the available columns. If you need to omit some columns from the dataset, create a list of just the columns you plan to use, and pass it into the (optional) `select_columns` argument of the constructor.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "S1TzSkUKwsNP" - }, - "outputs": [], - "source": [ - "SELECT_COLUMNS = [\n", - " \"survived\",\n", - " \"age\",\n", - " \"n_siblings_spouses\",\n", - " \"class\",\n", - " \"deck\",\n", - " \"alone\",\n", - "]\n", - "\n", - "temp_dataset = get_dataset(train_file_path, select_columns=SELECT_COLUMNS)\n", - "\n", - "show_batch(temp_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "9cryz31lxs3e" - }, - "source": [ - "## Data preprocessing\n", - "\n", - "A CSV file can contain a variety of data types. Typically you want to convert from those mixed types to a fixed length vector before feeding the data into your model.\n", - "\n", - "TensorFlow has a built-in system for describing common input conversions: `tf.feature_column`, see [this tutorial](../keras/feature_columns) for details.\n", - "\n", - "\n", - "You can preprocess your data using any tool you like (like [nltk](https://www.nltk.org/) or [sklearn](https://scikit-learn.org/stable/)), and just pass the processed output to TensorFlow. \n", - "\n", - "\n", - "The primary advantage of doing the preprocessing inside your model is that when you export the model it includes the preprocessing. This way you can pass the raw data directly to your model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "9AsbaFmCeJtF" - }, - "source": [ - "### Continuous data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Xl0Q0DcfA_rt" - }, - "source": [ - "If your data is already in an appropriate numeric format, you can pack the data into a vector before passing it off to the model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "4Yfji3J5BMxz" - }, - "outputs": [], - "source": [ - "SELECT_COLUMNS = [\"survived\", \"age\", \"n_siblings_spouses\", \"parch\", \"fare\"]\n", - "DEFAULTS = [0, 0.0, 0.0, 0.0, 0.0]\n", - "temp_dataset = get_dataset(\n", - " train_file_path, select_columns=SELECT_COLUMNS, column_defaults=DEFAULTS\n", - ")\n", - "\n", - "show_batch(temp_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "zEUhI8kZCfq8" - }, - "outputs": [], - "source": [ - "example_batch, labels_batch = next(iter(temp_dataset))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "IP45_2FbEKzn" - }, - "source": [ - "Here's a simple function that will pack together all the columns:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "JQ0hNSL8CC3a" - }, - "outputs": [], - "source": [ - "def pack(features, label):\n", - " return tf.stack(list(features.values()), axis=-1), label" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "75LA9DisEIoE" - }, - "source": [ - "Apply this to each element of the dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "VnP2Z2lwCTRl" - }, - "outputs": [], - "source": [ - "packed_dataset = temp_dataset.map(pack)\n", - "\n", - "for features, labels in packed_dataset.take(1):\n", - " print(features.numpy())\n", - " print()\n", - " print(labels.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "1VBvmaFrFU6J" - }, - "source": [ - "If you have mixed datatypes you may want to separate out these simple-numeric fields. The `tf.feature_column` api can handle them, but this incurs some overhead and should be avoided unless really necessary. Switch back to the mixed dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "ad-IQ_JPFQge" - }, - "outputs": [], - "source": [ - "show_batch(raw_train_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "HSrYNKKcIdav" - }, - "outputs": [], - "source": [ - "example_batch, labels_batch = next(iter(temp_dataset))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "p5VtThKfGPaQ" - }, - "source": [ - "So define a more general preprocessor that selects a list of numeric features and packs them into a single column:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "5DRishYYGS-m" - }, - "outputs": [], - "source": [ - "class PackNumericFeatures:\n", - " def __init__(self, names):\n", - " self.names = names\n", - "\n", - " def __call__(self, features, labels):\n", - " numeric_features = [features.pop(name) for name in self.names]\n", - " numeric_features = [\n", - " tf.cast(feat, tf.float32) for feat in numeric_features\n", - " ]\n", - " numeric_features = tf.stack(numeric_features, axis=-1)\n", - " features[\"numeric\"] = numeric_features\n", - "\n", - " return features, labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "1SeZka9AHfqD" - }, - "outputs": [], - "source": [ - "NUMERIC_FEATURES = [\"age\", \"n_siblings_spouses\", \"parch\", \"fare\"]\n", - "\n", - "packed_train_data = raw_train_data.map(PackNumericFeatures(NUMERIC_FEATURES))\n", - "\n", - "packed_test_data = raw_test_data.map(PackNumericFeatures(NUMERIC_FEATURES))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "wFrw0YobIbUB" - }, - "outputs": [], - "source": [ - "show_batch(packed_train_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "_EPUS8fPLUb1" - }, - "outputs": [], - "source": [ - "example_batch, labels_batch = next(iter(packed_train_data))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "o2maE8d2ijsq" - }, - "source": [ - "#### Data Normalization\n", - "\n", - "Continuous data should always be normalized." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "WKT1ASWpwH46" - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "desc = pd.read_csv(train_file_path)[NUMERIC_FEATURES].describe()\n", - "desc" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "cHHstcKPsMXM" - }, - "outputs": [], - "source": [ - "# TODO 1\n", - "MEAN = np.array(desc.T[\"mean\"])\n", - "STD = np.array(desc.T[\"std\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "REKqO_xHPNx0" - }, - "outputs": [], - "source": [ - "def normalize_numeric_data(data, mean, std):\n", - " # TODO 2\n", - " # Center the data\n", - " return (data - mean) / std" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(MEAN, STD)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "VPsoMUgRCpUM" - }, - "source": [ - "Now create a numeric column. The `tf.feature_columns.numeric_column` API accepts a `normalizer_fn` argument, which will be run on each batch.\n", - "\n", - "Bind the `MEAN` and `STD` to the normalizer fn using [`functools.partial`](https://docs.python.org/3/library/functools.html#functools.partial)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Bw0I35xRS57V" - }, - "outputs": [], - "source": [ - "# See what you just created.\n", - "normalizer = functools.partial(normalize_numeric_data, mean=MEAN, std=STD)\n", - "\n", - "numeric_column = tf.feature_column.numeric_column(\n", - " \"numeric\", normalizer_fn=normalizer, shape=[len(NUMERIC_FEATURES)]\n", - ")\n", - "numeric_columns = [numeric_column]\n", - "numeric_column" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "HZxcHXc6LCa7" - }, - "source": [ - "When you train the model, include this feature column to select and center this block of numeric data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "b61NM76Ot_kb" - }, - "outputs": [], - "source": [ - "example_batch[\"numeric\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "j-r_4EAJAZoI" - }, - "outputs": [], - "source": [ - "numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)\n", - "numeric_layer(example_batch).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "M37oD2VcCO4R" - }, - "source": [ - "The mean based normalization used here requires knowing the means of each column ahead of time." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "tSyrkSQwYHKi" - }, - "source": [ - "### Categorical data\n", - "\n", - "Some of the columns in the CSV data are categorical columns. That is, the content should be one of a limited set of options.\n", - "\n", - "Use the `tf.feature_column` API to create a collection with a `tf.feature_column.indicator_column` for each categorical column.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "mWDniduKMw-C" - }, - "outputs": [], - "source": [ - "CATEGORIES = {\n", - " \"sex\": [\"male\", \"female\"],\n", - " \"class\": [\"First\", \"Second\", \"Third\"],\n", - " \"deck\": [\"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\", \"H\", \"I\", \"J\"],\n", - " \"embark_town\": [\"Cherbourg\", \"Southhampton\", \"Queenstown\"],\n", - " \"alone\": [\"y\", \"n\"],\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "kkxLdrsLwHPT" - }, - "outputs": [], - "source": [ - "categorical_columns = []\n", - "for feature, vocab in CATEGORIES.items():\n", - " cat_col = tf.feature_column.categorical_column_with_vocabulary_list(\n", - " key=feature, vocabulary_list=vocab\n", - " )\n", - " categorical_columns.append(tf.feature_column.indicator_column(cat_col))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "H18CxpHY_Nma" - }, - "outputs": [], - "source": [ - "# See what you just created.\n", - "categorical_columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "p7mACuOsArUH" - }, - "outputs": [], - "source": [ - "categorical_layer = tf.keras.layers.DenseFeatures(categorical_columns)\n", - "print(categorical_layer(example_batch).numpy()[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "R7-1QG99_1sN" - }, - "source": [ - "This will be become part of a data processing input later when you build the model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "kPWkC4_1l3IG" - }, - "source": [ - "### Combined preprocessing layer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "R3QAjo1qD4p9" - }, - "source": [ - "Add the two feature column collections and pass them to a `tf.keras.layers.DenseFeatures` to create an input layer that will extract and preprocess both input types:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3-OYK7GnaH0r" - }, - "outputs": [], - "source": [ - "# TODO 1\n", - "preprocessing_layer = tf.keras.layers.DenseFeatures(\n", - " categorical_columns + numeric_columns\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "m7_U_K0UMSVS" - }, - "outputs": [], - "source": [ - "print(preprocessing_layer(example_batch).numpy()[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "DlF_omQqtnOP" - }, - "source": [ - "### Next Step\n", - "\n", - "A next step would be to build a build a `tf.keras.Sequential`, starting with the `preprocessing_layer`, which is beyond the scope of this lab. We will cover the Keras Sequential API in the next Lesson." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Load NumPy data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load necessary libraries \n", - "First, restart the Kernel. Then, we will start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import tensorflow as tf\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Load data from `.npz` file\n", - "\n", - "We use the MNIST dataset in Keras." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "DATA_URL = (\n", - " \"https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz\"\n", - ")\n", - "\n", - "path = tf.keras.utils.get_file(\"mnist.npz\", DATA_URL)\n", - "with np.load(path) as data:\n", - " # TODO 1\n", - " train_examples = data[\"x_train\"]\n", - " train_labels = data[\"y_train\"]\n", - " test_examples = data[\"x_test\"]\n", - " test_labels = data[\"y_test\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load NumPy arrays with `tf.data.Dataset`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Assuming you have an array of examples and a corresponding array of labels, pass the two arrays as a tuple into `tf.data.Dataset.from_tensor_slices` to create a `tf.data.Dataset`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# TODO 2\n", - "train_dataset = tf.data.Dataset.from_tensor_slices(\n", - " (train_examples, train_labels)\n", - ")\n", - "test_dataset = tf.data.Dataset.from_tensor_slices((test_examples, test_labels))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Next Step\n", - "\n", - "A next step would be to build a build a `tf.keras.Sequential`, starting with the `preprocessing_layer`, which is beyond the scope of this lab. We will cover the Keras Sequential API in the next Lesson." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Resources \n", - "1. Load text data - this link: https://www.tensorflow.org/tutorials/load_data/text\n", - "2. TF.text - this link: https://www.tensorflow.org/tutorials/tensorflow_text/intro\n", - "3. Load image daeta - https://www.tensorflow.org/tutorials/load_data/images\n", - "4. Read data into a Pandas DataFrame - https://www.tensorflow.org/tutorials/load_data/pandas_dataframe\n", - "5. How to represent Unicode strings in TensorFlow - https://www.tensorflow.org/tutorials/load_data/unicode\n", - "6. TFRecord and tf.Example - https://www.tensorflow.org/tutorials/load_data/tfrecord " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright 2020 Google Inc.\n", - "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", - "http://www.apache.org/licenses/LICENSE-2.0\n", - "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "csv.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "environment": { - "kernel": "python3", - "name": "tf2-gpu.2-12.m109", - "type": "gcloud", - "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-12:m109" - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/solutions/2c_loading_images.ipynb b/notebooks/introduction_to_tensorflow/solutions/2c_loading_images.ipynb deleted file mode 100644 index 7abfd4e5..00000000 --- a/notebooks/introduction_to_tensorflow/solutions/2c_loading_images.ipynb +++ /dev/null @@ -1,608 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ucMoYase6URl" - }, - "source": [ - "# Loading Images Using tf.Data.Dataset\n", - "\n", - "**Learning Objectives**\n", - "\n", - "1. Retrieve Images using tf.keras.utils.get_file\n", - "2. Load Images using Keras Pre-Processing\n", - "3. Load Images using tf.Data.Dataset\n", - "4. Understand basic Methods for Training\n", - "\n", - "## Introduction \n", - "\n", - "In this notebook, we load an image dataset using tf.data. The dataset used in this example is distributed as directories of images, with one class of image per directory.\n", - "\n", - "\n", - "Each learning objective will correspond to a **#TODO** in the [student lab notebook](https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/courses/machine_learning/deepdive2/introduction_to_tensorflow/solutions/load_images_tf.data.ipynb) -- try to complete that notebook first before reviewing this solution notebook." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "hoQQiZDB6URn" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3vhAMaIOBIee" - }, - "source": [ - "## Load necessary libraries \n", - "We will start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "gIksPgtT8B6B" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import IPython.display as display\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "from PIL import Image\n", - "\n", - "os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"2\"\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "KT6CcaqgQewg" - }, - "outputs": [], - "source": [ - "AUTOTUNE = tf.data.experimental.AUTOTUNE" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "wO0InzL66URu" - }, - "source": [ - "### Retrieve the images\n", - "\n", - "Before you start any training, you will need a set of images to teach the network about the new classes you want to recognize. You can use an archive of creative-commons licensed flower photos from Google.\n", - "\n", - "Note: all images are licensed CC-BY, creators are listed in the `LICENSE.txt` file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "rN-Pc6Zd6awg" - }, - "outputs": [], - "source": [ - "import pathlib\n", - "\n", - "data_dir = tf.keras.utils.get_file(\n", - " origin=\"https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz\",\n", - " fname=\"flower_photos\",\n", - " untar=True,\n", - ")\n", - "data_dir = pathlib.Path(data_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "rFkFK74oO--g" - }, - "source": [ - "After downloading (218MB), you should now have a copy of the flower photos available.\n", - "\n", - "The directory contains 5 sub-directories, one per class:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "QhewYCxhXQBX" - }, - "outputs": [], - "source": [ - "image_count = len(list(data_dir.glob(\"*/*.jpg\")))\n", - "image_count" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "sJ1HKKdR4A7c" - }, - "outputs": [], - "source": [ - "CLASS_NAMES = np.array(\n", - " [item.name for item in data_dir.glob(\"*\") if item.name != \"LICENSE.txt\"]\n", - ")\n", - "CLASS_NAMES" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "IVxsk4OW61TY" - }, - "source": [ - "Each directory contains images of that type of flower. Here are some roses:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "crs7ZjEp60Ot" - }, - "outputs": [], - "source": [ - "roses = list(data_dir.glob(\"roses/*\"))\n", - "\n", - "for image_path in roses[:3]:\n", - " display.display(Image.open(str(image_path)))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "6jobDTUs8Wxu" - }, - "source": [ - "## Load using `keras.preprocessing`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "ehhW308g8soJ" - }, - "source": [ - "A simple way to load images is to use `tf.keras.preprocessing`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "syDdF_LWVrWE" - }, - "outputs": [], - "source": [ - "# The 1./255 is to convert from uint8 to float32 in range [0,1].\n", - "image_generator = tf.keras.preprocessing.image.ImageDataGenerator(\n", - " rescale=1.0 / 255\n", - ") # TODO 1a" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "lAmtzsnjDNhB" - }, - "source": [ - "Define some parameters for the loader:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "1zf695or-Flq" - }, - "outputs": [], - "source": [ - "BATCH_SIZE = 32\n", - "IMG_HEIGHT = 224\n", - "IMG_WIDTH = 224\n", - "STEPS_PER_EPOCH = np.ceil(image_count / BATCH_SIZE)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Pw94ajOOVrWI" - }, - "outputs": [], - "source": [ - "train_data_gen = image_generator.flow_from_directory(\n", - " directory=str(data_dir),\n", - " batch_size=BATCH_SIZE,\n", - " shuffle=True,\n", - " target_size=(IMG_HEIGHT, IMG_WIDTH),\n", - " classes=list(CLASS_NAMES),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2ZgIZeXaDUsF" - }, - "source": [ - "Inspect a batch:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "nLp0XVG_Vgi2" - }, - "outputs": [], - "source": [ - "def show_batch(image_batch, label_batch):\n", - " plt.figure(figsize=(10, 10))\n", - " for n in range(25):\n", - " ax = plt.subplot(5, 5, n + 1) # TODO 1b\n", - " plt.imshow(image_batch[n]) # TODO 1b\n", - " plt.title(CLASS_NAMES[label_batch[n] == 1][0].title())\n", - " plt.axis(\"off\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "suh6Sjv68rY3" - }, - "outputs": [], - "source": [ - "image_batch, label_batch = next(train_data_gen)\n", - "show_batch(image_batch, label_batch)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "AxS1cLzM8mEp" - }, - "source": [ - "## Load using `tf.data`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Ylj9fgkamgWZ" - }, - "source": [ - "The above `keras.preprocessing` method is convienient, but has three downsides: \n", - "\n", - "1. It's slow. See the performance section below.\n", - "1. It lacks fine-grained control.\n", - "1. It is not well integrated with the rest of TensorFlow." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "IIG5CPaULegg" - }, - "source": [ - "To load the files as a `tf.data.Dataset` first create a dataset of the file paths:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "lAkQp5uxoINu" - }, - "outputs": [], - "source": [ - "list_ds = tf.data.Dataset.list_files(str(data_dir / \"*/*\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "coORvEH-NGwc" - }, - "outputs": [], - "source": [ - "for f in list_ds.take(5):\n", - " print(f.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "91CPfUUJ_8SZ" - }, - "source": [ - "Write a short pure-tensorflow function that converts a file path to an `(img, label)` pair:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "arSQzIey-4D4" - }, - "outputs": [], - "source": [ - "def get_label(file_path):\n", - " # convert the path to a list of path components\n", - " parts = tf.strings.split(file_path, os.path.sep) # TODO 2a\n", - " # The second to last is the class-directory\n", - " return parts[-2] == CLASS_NAMES # TODO 2a" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "MGlq4IP4Aktb" - }, - "outputs": [], - "source": [ - "def decode_img(img):\n", - " # convert the compressed string to a 3D uint8 tensor\n", - " img = tf.image.decode_jpeg(img, channels=3) # TODO 2b\n", - " # Use `convert_image_dtype` to convert to floats in the [0,1] range.\n", - " img = tf.image.convert_image_dtype(img, tf.float32) # TODO 2b\n", - " # resize the image to the desired size.\n", - " return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "-xhBRgvNqRRe" - }, - "outputs": [], - "source": [ - "def process_path(file_path):\n", - " label = get_label(file_path)\n", - " # load the raw data from the file as a string\n", - " img = tf.io.read_file(file_path) # TODO 2c\n", - " img = decode_img(img)\n", - " return img, label" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "S9a5GpsUOBx8" - }, - "source": [ - "Use `Dataset.map` to create a dataset of `image, label` pairs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3SDhbo8lOBQv" - }, - "outputs": [], - "source": [ - "# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.\n", - "labeled_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "kxrl0lGdnpRz" - }, - "outputs": [], - "source": [ - "for image, label in labeled_ds.take(1):\n", - " print(\"Image shape: \", image.numpy().shape)\n", - " print(\"Label: \", label.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "vYGCgJuR_9Qp" - }, - "source": [ - "### Next Steps: Basic methods for training" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "wwZavzgsIytz" - }, - "source": [ - "To train a model with this dataset you will want the data:\n", - "\n", - "* To be well shuffled.\n", - "* To be batched.\n", - "* Batches to be available as soon as possible.\n", - "\n", - "These features can be easily added using the `tf.data` api." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "uZmZJx8ePw_5" - }, - "outputs": [], - "source": [ - "def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):\n", - " # This is a small dataset, only load it once, and keep it in memory.\n", - " # use `.cache(filename)` to cache preprocessing work for datasets that don't\n", - " # fit in memory.\n", - " if cache:\n", - " if isinstance(cache, str):\n", - " ds = ds.cache(cache)\n", - " else:\n", - " ds = ds.cache()\n", - "\n", - " ds = ds.shuffle(buffer_size=shuffle_buffer_size) # TODO 3a\n", - "\n", - " # Repeat forever\n", - " ds = ds.repeat()\n", - "\n", - " ds = ds.batch(BATCH_SIZE)\n", - "\n", - " # `prefetch` lets the dataset fetch batches in the background while the model\n", - " # is training.\n", - " ds = ds.prefetch(buffer_size=AUTOTUNE)\n", - "\n", - " return ds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "-YKnrfAeZV10" - }, - "outputs": [], - "source": [ - "train_ds = prepare_for_training(labeled_ds)\n", - "\n", - "image_batch, label_batch = next(iter(train_ds))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "UN_Dnl72YNIj" - }, - "outputs": [], - "source": [ - "show_batch(image_batch.numpy(), label_batch.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright 2020 Google Inc.\n", - "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", - "http://www.apache.org/licenses/LICENSE-2.0\n", - "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "images.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "environment": { - "kernel": "python3", - "name": "tf2-gpu.2-12.m109", - "type": "gcloud", - "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-12:m109" - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/introduction_to_tensorflow/solutions/2d_loading_tfrecords.ipynb b/notebooks/introduction_to_tensorflow/solutions/2d_loading_tfrecords.ipynb deleted file mode 100644 index 91ad2125..00000000 --- a/notebooks/introduction_to_tensorflow/solutions/2d_loading_tfrecords.ipynb +++ /dev/null @@ -1,1259 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "3pkUd_9IZCFO" - }, - "source": [ - "# TFRecord and tf.Example\n", - "\n", - "**Learning Objectives**\n", - "\n", - "1. Understand the TFRecord format for storing data\n", - "2. Understand the tf.Example message type\n", - "3. Read and Write a TFRecord file\n", - "\n", - "\n", - "## Introduction \n", - "\n", - "In this notebook, you create, parse, and use the `tf.Example` message, and then serialize, write, and read `tf.Example` messages to and from `.tfrecord` files. To read data efficiently it can be helpful to serialize your data and store it in a set of files (100-200MB each) that can each be read linearly. This is especially true if the data is being streamed over a network. This can also be useful for caching any data-preprocessing.\n", - "\n", - "\n", - "Each learning objective will correspond to a __#TODO__ in the [student lab notebook](../labs/tfrecord-tf.example.ipynb) -- try to complete that notebook first before reviewing this solution notebook. \n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Ac83J0QxjhFt" - }, - "source": [ - "### The TFRecord format \n", - "\n", - "The TFRecord format is a simple format for storing a sequence of binary records. [Protocol buffers](https://developers.google.com/protocol-buffers/) are a cross-platform, cross-language library for efficient serialization of structured data. Protocol messages are defined by `.proto` files, these are often the easiest way to understand a message type.\n", - "\n", - "The `tf.Example` message (or protobuf) is a flexible message type that represents a `{\"string\": value}` mapping. It is designed for use with TensorFlow and is used throughout the higher-level APIs such as [TFX](https://www.tensorflow.org/tfx/).\n", - "Note: While useful, these structures are optional. There is no need to convert existing code to use TFRecords, unless you are using [`tf.data`](https://www.tensorflow.org/guide/datasets) and reading data is still the bottleneck to training. See [Data Input Pipeline Performance](https://www.tensorflow.org/guide/performance/datasets) for dataset performance tips." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "WkRreBf1eDVc" - }, - "source": [ - "## Load necessary libraries \n", - "We will start by importing the necessary libraries for this lab." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Ja7sezsmnXph" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import IPython.display as display\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "\n", - "os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"2\"\n", - "\n", - "print(\"TensorFlow version: \", tf.version.VERSION)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "e5Kq88ccUWQV" - }, - "source": [ - "## `tf.Example`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "VrdQHgvNijTi" - }, - "source": [ - "### Data types for `tf.Example`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "lZw57Qrn4CTE" - }, - "source": [ - "Fundamentally, a `tf.Example` is a `{\"string\": tf.train.Feature}` mapping.\n", - "\n", - "The `tf.train.Feature` message type can accept one of the following three types (See the [`.proto` file](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto) for reference). Most other generic types can be coerced into one of these:\n", - "\n", - "1. `tf.train.BytesList` (the following types can be coerced)\n", - "\n", - " - `string`\n", - " - `byte`\n", - "\n", - "1. `tf.train.FloatList` (the following types can be coerced)\n", - "\n", - " - `float` (`float32`)\n", - " - `double` (`float64`)\n", - "\n", - "1. `tf.train.Int64List` (the following types can be coerced)\n", - "\n", - " - `bool`\n", - " - `enum`\n", - " - `int32`\n", - " - `uint32`\n", - " - `int64`\n", - " - `uint64`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "_e3g9ExathXP" - }, - "source": [ - "In order to convert a standard TensorFlow type to a `tf.Example`-compatible `tf.train.Feature`, you can use the shortcut functions below. Note that each function takes a scalar input value and returns a `tf.train.Feature` containing one of the three `list` types above:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "mbsPOUpVtYxA" - }, - "outputs": [], - "source": [ - "# TODO 1a\n", - "# The following functions can be used to convert a value to a type compatible\n", - "# with tf.Example.\n", - "\n", - "\n", - "def _bytes_feature(value):\n", - " \"\"\"Returns a bytes_list from a string / byte.\"\"\"\n", - " if isinstance(value, type(tf.constant(0))):\n", - " value = (\n", - " value.numpy()\n", - " ) # BytesList won't unpack a string from an EagerTensor.\n", - " return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))\n", - "\n", - "\n", - "def _float_feature(value):\n", - " \"\"\"Returns a float_list from a float / double.\"\"\"\n", - " return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))\n", - "\n", - "\n", - "def _int64_feature(value):\n", - " \"\"\"Returns an int64_list from a bool / enum / int / uint.\"\"\"\n", - " return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Wst0v9O8hgzy" - }, - "source": [ - "Note: To stay simple, this example only uses scalar inputs. The simplest way to handle non-scalar features is to use `tf.serialize_tensor` to convert tensors to binary-strings. Strings are scalars in tensorflow. Use `tf.parse_tensor` to convert the binary-string back to a tensor." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "vsMbkkC8xxtB" - }, - "source": [ - "Below are some examples of how these functions work. Note the varying input types and the standardized output types. If the input type for a function does not match one of the coercible types stated above, the function will raise an exception (e.g. `_int64_feature(1.0)` will error out, since `1.0` is a float, so should be used with the `_float_feature` function instead):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "hZzyLGr0u73y" - }, - "outputs": [], - "source": [ - "print(_bytes_feature(b\"test_string\"))\n", - "\n", - "print(_float_feature(np.exp(1)))\n", - "\n", - "print(_int64_feature(True))\n", - "print(_int64_feature(1))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "nj1qpfQU5qmi" - }, - "source": [ - "All proto messages can be serialized to a binary-string using the `.SerializeToString` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "5afZkORT5pjm" - }, - "outputs": [], - "source": [ - "# TODO 1b\n", - "feature = _float_feature(np.exp(1))\n", - "\n", - "feature.SerializeToString()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "laKnw9F3hL-W" - }, - "source": [ - "### Creating a `tf.Example` message" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "b_MEnhxchQPC" - }, - "source": [ - "Suppose you want to create a `tf.Example` message from existing data. In practice, the dataset may come from anywhere, but the procedure of creating the `tf.Example` message from a single observation will be the same:\n", - "\n", - "1. Within each observation, each value needs to be converted to a `tf.train.Feature` containing one of the 3 compatible types, using one of the functions above.\n", - "\n", - "1. You create a map (dictionary) from the feature name string to the encoded feature value produced in #1.\n", - "\n", - "1. The map produced in step 2 is converted to a [`Features` message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto#L85)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "4EgFQ2uHtchc" - }, - "source": [ - "In this notebook, you will create a dataset using NumPy.\n", - "\n", - "This dataset will have 4 features:\n", - "\n", - "* a boolean feature, `False` or `True` with equal probability\n", - "* an integer feature uniformly randomly chosen from `[0, 5]`\n", - "* a string feature generated from a string table by using the integer feature as an index\n", - "* a float feature from a standard normal distribution\n", - "\n", - "Consider a sample consisting of 10,000 independently and identically distributed observations from each of the above distributions:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "CnrguFAy3YQv" - }, - "outputs": [], - "source": [ - "# The number of observations in the dataset.\n", - "n_observations = int(1e4)\n", - "\n", - "# Boolean feature, encoded as False or True.\n", - "feature0 = np.random.choice([False, True], n_observations)\n", - "\n", - "# Integer feature, random from 0 to 4.\n", - "feature1 = np.random.randint(0, 5, n_observations)\n", - "\n", - "# String feature\n", - "strings = np.array([b\"cat\", b\"dog\", b\"chicken\", b\"horse\", b\"goat\"])\n", - "feature2 = strings[feature1]\n", - "\n", - "# Float feature, from a standard normal distribution\n", - "feature3 = np.random.randn(n_observations)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "aGrscehJr7Jd" - }, - "source": [ - "Each of these features can be coerced into a `tf.Example`-compatible type using one of `_bytes_feature`, `_float_feature`, `_int64_feature`. You can then create a `tf.Example` message from these encoded features:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "RTCS49Ij_kUw" - }, - "outputs": [], - "source": [ - "def serialize_example(feature0, feature1, feature2, feature3):\n", - " \"\"\"\n", - " Creates a tf.Example message ready to be written to a file.\n", - " \"\"\"\n", - " # Create a dictionary mapping the feature name to the tf.Example-compatible\n", - " # data type.\n", - " feature = {\n", - " \"feature0\": _int64_feature(feature0),\n", - " \"feature1\": _int64_feature(feature1),\n", - " \"feature2\": _bytes_feature(feature2),\n", - " \"feature3\": _float_feature(feature3),\n", - " }\n", - "\n", - " # Create a Features message using tf.train.Example.\n", - "\n", - " example_proto = tf.train.Example(\n", - " features=tf.train.Features(feature=feature)\n", - " )\n", - " return example_proto.SerializeToString()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "XftzX9CN_uGT" - }, - "source": [ - "For example, suppose you have a single observation from the dataset, `[False, 4, bytes('goat'), 0.9876]`. You can create and print the `tf.Example` message for this observation using `create_message()`. Each single observation will be written as a `Features` message as per the above. Note that the `tf.Example` [message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto#L88) is just a wrapper around the `Features` message:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "N8BtSx2RjYcb" - }, - "outputs": [], - "source": [ - "# This is an example observation from the dataset.\n", - "\n", - "example_observation = []\n", - "\n", - "serialized_example = serialize_example(False, 4, b\"goat\", 0.9876)\n", - "serialized_example" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "_pbGATlG6u-4" - }, - "source": [ - "To decode the message use the `tf.train.Example.FromString` method." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "dGim-mEm6vit" - }, - "outputs": [], - "source": [ - "# TODO 1c\n", - "example_proto = tf.train.Example.FromString(serialized_example)\n", - "example_proto" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "o6qxofy89obI" - }, - "source": [ - "## TFRecords format details\n", - "\n", - "A TFRecord file contains a sequence of records. The file can only be read sequentially.\n", - "\n", - "Each record contains a byte-string, for the data-payload, plus the data-length, and CRC32C (32-bit CRC using the Castagnoli polynomial) hashes for integrity checking.\n", - "\n", - "Each record is stored in the following formats:\n", - "\n", - " uint64 length\n", - " uint32 masked_crc32_of_length\n", - " byte data[length]\n", - " uint32 masked_crc32_of_data\n", - "\n", - "The records are concatenated together to produce the file. CRCs are\n", - "[described here](https://en.wikipedia.org/wiki/Cyclic_redundancy_check), and\n", - "the mask of a CRC is:\n", - "\n", - " masked_crc = ((crc >> 15) | (crc << 17)) + 0xa282ead8ul\n", - "\n", - "Note: There is no requirement to use `tf.Example` in TFRecord files. `tf.Example` is just a method of serializing dictionaries to byte-strings. Lines of text, encoded image data, or serialized tensors (using `tf.io.serialize_tensor`, and\n", - "`tf.io.parse_tensor` when loading). See the `tf.io` module for more options." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "y-Hjmee-fbLH" - }, - "source": [ - "## TFRecord files using `tf.data`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "GmehkCCT81Ez" - }, - "source": [ - "The `tf.data` module also provides tools for reading and writing data in TensorFlow." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "1FISEuz8ubu3" - }, - "source": [ - "### Writing a TFRecord file\n", - "\n", - "The easiest way to get the data into a dataset is to use the `from_tensor_slices` method.\n", - "\n", - "Applied to an array, it returns a dataset of scalars:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "mXeaukvwu5_-" - }, - "outputs": [], - "source": [ - "tf.data.Dataset.from_tensor_slices(feature1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "f-q0VKyZvcad" - }, - "source": [ - "Applied to a tuple of arrays, it returns a dataset of tuples:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "H5sWyu1kxnvg" - }, - "outputs": [], - "source": [ - "features_dataset = tf.data.Dataset.from_tensor_slices(\n", - " (feature0, feature1, feature2, feature3)\n", - ")\n", - "features_dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "m1C-t71Nywze" - }, - "outputs": [], - "source": [ - "# Use `take(1)` to only pull one example from the dataset.\n", - "for f0, f1, f2, f3 in features_dataset.take(1):\n", - " print(f0)\n", - " print(f1)\n", - " print(f2)\n", - " print(f3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "mhIe63awyZYd" - }, - "source": [ - "Use the `tf.data.Dataset.map` method to apply a function to each element of a `Dataset`.\n", - "\n", - "The mapped function must operate in TensorFlow graph mode—it must operate on and return `tf.Tensors`. A non-tensor function, like `serialize_example`, can be wrapped with `tf.py_function` to make it compatible.\n", - "\n", - "Using `tf.py_function` requires to specify the shape and type information that is otherwise unavailable:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "apB5KYrJzjPI" - }, - "outputs": [], - "source": [ - "# TODO 2a\n", - "def tf_serialize_example(f0, f1, f2, f3):\n", - " tf_string = tf.py_function(\n", - " serialize_example,\n", - " (f0, f1, f2, f3), # pass these args to the above function.\n", - " tf.string,\n", - " ) # the return type is `tf.string`.\n", - " return tf.reshape(tf_string, ()) # The result is a scalar" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "lHFjW4u4Npz9" - }, - "outputs": [], - "source": [ - "tf_serialize_example(f0, f1, f2, f3)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "CrFZ9avE3HUF" - }, - "source": [ - "Apply this function to each element in the dataset:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "VDeqYVbW3ww9" - }, - "outputs": [], - "source": [ - "# TODO 2b\n", - "serialized_features_dataset = features_dataset.map(tf_serialize_example)\n", - "serialized_features_dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "DlDfuh46bRf6" - }, - "outputs": [], - "source": [ - "def generator():\n", - " for features in features_dataset:\n", - " yield serialize_example(*features)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "iv9oXKrcbhvX" - }, - "outputs": [], - "source": [ - "serialized_features_dataset = tf.data.Dataset.from_generator(\n", - " generator, output_types=tf.string, output_shapes=()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "Dqz8C4D5cIj9" - }, - "outputs": [], - "source": [ - "serialized_features_dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "p6lw5VYpjZZC" - }, - "source": [ - "And write them to a TFRecord file:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "vP1VgTO44UIE" - }, - "outputs": [], - "source": [ - "filename = \"test.tfrecord\"\n", - "writer = tf.data.experimental.TFRecordWriter(filename)\n", - "writer.write(serialized_features_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "6aV0GQhV8tmp" - }, - "source": [ - "### Reading a TFRecord file" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "o3J5D4gcSy8N" - }, - "source": [ - "You can also read the TFRecord file using the `tf.data.TFRecordDataset` class.\n", - "\n", - "More information on consuming TFRecord files using `tf.data` can be found [here](https://www.tensorflow.org/guide/datasets#consuming_tfrecord_data).\n", - "\n", - "Using `TFRecordDataset`s can be useful for standardizing input data and optimizing performance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "6OjX6UZl-bHC" - }, - "outputs": [], - "source": [ - "# TODO 2c\n", - "filenames = [filename]\n", - "raw_dataset = tf.data.TFRecordDataset(filenames)\n", - "raw_dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "6_EQ9i2E_-Fz" - }, - "source": [ - "At this point the dataset contains serialized `tf.train.Example` messages. When iterated over it returns these as scalar string tensors.\n", - "\n", - "Use the `.take` method to only show the first 10 records.\n", - "\n", - "Note: iterating over a `tf.data.Dataset` only works with eager execution enabled." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "hxVXpLz_AJlm" - }, - "outputs": [], - "source": [ - "for raw_record in raw_dataset.take(10):\n", - " print(repr(raw_record))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "W-6oNzM4luFQ" - }, - "source": [ - "These tensors can be parsed using the function below. Note that the `feature_description` is necessary here because datasets use graph-execution, and need this description to build their shape and type signature:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "zQjbIR1nleiy" - }, - "outputs": [], - "source": [ - "# Create a description of the features.\n", - "feature_description = {\n", - " \"feature0\": tf.io.FixedLenFeature([], tf.int64, default_value=0),\n", - " \"feature1\": tf.io.FixedLenFeature([], tf.int64, default_value=0),\n", - " \"feature2\": tf.io.FixedLenFeature([], tf.string, default_value=\"\"),\n", - " \"feature3\": tf.io.FixedLenFeature([], tf.float32, default_value=0.0),\n", - "}\n", - "\n", - "\n", - "def _parse_function(example_proto):\n", - " # Parse the input `tf.Example` proto using the dictionary above.\n", - " return tf.io.parse_single_example(example_proto, feature_description)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "gWETjUqhEQZf" - }, - "source": [ - "Alternatively, use `tf.parse example` to parse the whole batch at once. Apply this function to each item in the dataset using the `tf.data.Dataset.map` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "6Ob7D-zmBm1w" - }, - "outputs": [], - "source": [ - "parsed_dataset = raw_dataset.map(_parse_function)\n", - "parsed_dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "sNV-XclGnOvn" - }, - "source": [ - "Use eager execution to display the observations in the dataset. There are 10,000 observations in this dataset, but you will only display the first 10. The data is displayed as a dictionary of features. Each item is a `tf.Tensor`, and the `numpy` element of this tensor displays the value of the feature:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "x2LT2JCqhoD_" - }, - "outputs": [], - "source": [ - "for parsed_record in parsed_dataset.take(10):\n", - " print(repr(parsed_record))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Cig9EodTlDmg" - }, - "source": [ - "Here, the `tf.parse_example` function unpacks the `tf.Example` fields into standard tensors." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "jyg1g3gU7DNn" - }, - "source": [ - "## TFRecord files in Python" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "3FXG3miA7Kf1" - }, - "source": [ - "The `tf.io` module also contains pure-Python functions for reading and writing TFRecord files." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "CKn5uql2lAaN" - }, - "source": [ - "### Writing a TFRecord file" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "LNW_FA-GQWXs" - }, - "source": [ - "Next, write the 10,000 observations to the file `test.tfrecord`. Each observation is converted to a `tf.Example` message, then written to file. You can then verify that the file `test.tfrecord` has been created:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "MKPHzoGv7q44" - }, - "outputs": [], - "source": [ - "# Write the `tf.Example` observations to the file.\n", - "with tf.io.TFRecordWriter(filename) as writer:\n", - " for i in range(n_observations):\n", - " example = serialize_example(\n", - " feature0[i], feature1[i], feature2[i], feature3[i]\n", - " )\n", - " writer.write(example)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "EjdFHHJMpUUo" - }, - "outputs": [], - "source": [ - "!du -sh {filename}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2osVRnYNni-E" - }, - "source": [ - "### Reading a TFRecord file\n", - "\n", - "These serialized tensors can be easily parsed using `tf.train.Example.ParseFromString`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "U3tnd3LerOtV" - }, - "outputs": [], - "source": [ - "filenames = [filename]\n", - "raw_dataset = tf.data.TFRecordDataset(filenames)\n", - "raw_dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "nsEAACHcnm3f" - }, - "outputs": [], - "source": [ - "for raw_record in raw_dataset.take(1):\n", - " example = tf.train.Example()\n", - " example.ParseFromString(raw_record.numpy())\n", - " print(example)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "S0tFDrwdoj3q" - }, - "source": [ - "## Walkthrough: Reading and writing image data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "rjN2LFxFpcR9" - }, - "source": [ - "This is an end-to-end example of how to read and write image data using TFRecords. Using an image as input data, you will write the data as a TFRecord file, then read the file back and display the image.\n", - "\n", - "This can be useful if, for example, you want to use several models on the same input dataset. Instead of storing the image data raw, it can be preprocessed into the TFRecords format, and that can be used in all further processing and modelling.\n", - "\n", - "First, let's download [this image](https://commons.wikimedia.org/wiki/File:Felis_catus-cat_on_snow.jpg) of a cat in the snow and [this photo](https://upload.wikimedia.org/wikipedia/commons/f/fe/New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg) of the Williamsburg Bridge, NYC under construction." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "5Lk2qrKvN0yu" - }, - "source": [ - "### Fetch the images" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "3a0fmwg8lHdF" - }, - "outputs": [], - "source": [ - "cat_in_snow = tf.keras.utils.get_file(\n", - " \"320px-Felis_catus-cat_on_snow.jpg\",\n", - " \"https://storage.googleapis.com/download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg\",\n", - ")\n", - "williamsburg_bridge = tf.keras.utils.get_file(\n", - " \"194px-New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg\",\n", - " \"https://storage.googleapis.com/download.tensorflow.org/example_images/194px-New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "7aJJh7vENeE4" - }, - "outputs": [], - "source": [ - "display.display(display.Image(filename=cat_in_snow))\n", - "display.display(\n", - " display.HTML(\n", - " 'Image cc-by: Von.grzanka'\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "KkW0uuhcXZqA" - }, - "outputs": [], - "source": [ - "display.display(display.Image(filename=williamsburg_bridge))\n", - "display.display(\n", - " display.HTML(\n", - " 'From Wikimedia'\n", - " )\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "VSOgJSwoN5TQ" - }, - "source": [ - "### Write the TFRecord file" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "Azx83ryQEU6T" - }, - "source": [ - "As before, encode the features as types compatible with `tf.Example`. This stores the raw image string feature, as well as the height, width, depth, and arbitrary `label` feature. The latter is used when you write the file to distinguish between the cat image and the bridge image. Use `0` for the cat image, and `1` for the bridge image:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "kC4TS1ZEONHr" - }, - "outputs": [], - "source": [ - "image_labels = {\n", - " cat_in_snow: 0,\n", - " williamsburg_bridge: 1,\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "c5njMSYNEhNZ" - }, - "outputs": [], - "source": [ - "# This is an example, just using the cat image.\n", - "image_string = open(cat_in_snow, \"rb\").read()\n", - "\n", - "label = image_labels[cat_in_snow]\n", - "\n", - "\n", - "# Create a dictionary with features that may be relevant.\n", - "def image_example(image_string, label):\n", - " image_shape = tf.image.decode_jpeg(image_string).shape\n", - "\n", - " feature = {\n", - " \"height\": _int64_feature(image_shape[0]),\n", - " \"width\": _int64_feature(image_shape[1]),\n", - " \"depth\": _int64_feature(image_shape[2]),\n", - " \"label\": _int64_feature(label),\n", - " \"image_raw\": _bytes_feature(image_string),\n", - " }\n", - "\n", - " return tf.train.Example(features=tf.train.Features(feature=feature))\n", - "\n", - "\n", - "for line in str(image_example(image_string, label)).split(\"\\n\")[:15]:\n", - " print(line)\n", - "print(\"...\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "2G_o3O9MN0Qx" - }, - "source": [ - "Notice that all of the features are now stored in the `tf.Example` message. Next, functionalize the code above and write the example messages to a file named `images.tfrecords`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "qcw06lQCOCZU" - }, - "outputs": [], - "source": [ - "# Write the raw image files to `images.tfrecords`.\n", - "# First, process the two images into `tf.Example` messages.\n", - "# Then, write to a `.tfrecords` file.\n", - "record_file = \"images.tfrecords\"\n", - "with tf.io.TFRecordWriter(record_file) as writer:\n", - " for filename, label in image_labels.items():\n", - " image_string = open(filename, \"rb\").read()\n", - " tf_example = image_example(image_string, label)\n", - " writer.write(tf_example.SerializeToString())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "yJrTe6tHPCfs" - }, - "outputs": [], - "source": [ - "!du -sh {record_file}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "jJSsCkZLPH6K" - }, - "source": [ - "### Read the TFRecord file\n", - "\n", - "You now have the file—`images.tfrecords`—and can now iterate over the records in it to read back what you wrote. Given that in this example you will only reproduce the image, the only feature you will need is the raw image string. Extract it using the getters described above, namely `example.features.feature['image_raw'].bytes_list.value[0]`. You can also use the labels to determine which record is the cat and which one is the bridge:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "M6Cnfd3cTKHN" - }, - "outputs": [], - "source": [ - "raw_image_dataset = tf.data.TFRecordDataset(\"images.tfrecords\")\n", - "\n", - "# Create a dictionary describing the features.\n", - "image_feature_description = {\n", - " \"height\": tf.io.FixedLenFeature([], tf.int64),\n", - " \"width\": tf.io.FixedLenFeature([], tf.int64),\n", - " \"depth\": tf.io.FixedLenFeature([], tf.int64),\n", - " \"label\": tf.io.FixedLenFeature([], tf.int64),\n", - " \"image_raw\": tf.io.FixedLenFeature([], tf.string),\n", - "}\n", - "\n", - "\n", - "def _parse_image_function(example_proto):\n", - " # Parse the input tf.Example proto using the dictionary above.\n", - " return tf.io.parse_single_example(example_proto, image_feature_description)\n", - "\n", - "\n", - "parsed_image_dataset = raw_image_dataset.map(_parse_image_function)\n", - "parsed_image_dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "0PEEFPk4NEg1" - }, - "source": [ - "Recover the images from the TFRecord file:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": {}, - "colab_type": "code", - "id": "yZf8jOyEIjSF" - }, - "outputs": [], - "source": [ - "for image_features in parsed_image_dataset:\n", - " image_raw = image_features[\"image_raw\"].numpy()\n", - " display.display(display.Image(data=image_raw))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright 2020 Google Inc.\n", - "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at\n", - "http://www.apache.org/licenses/LICENSE-2.0\n", - "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "pL--_KGdYoBz" - ], - "name": "tfrecord.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "environment": { - "kernel": "python3", - "name": "tf2-gpu.2-12.m109", - "type": "gcloud", - "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-12:m109" - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 9a03703b602c509ab94d00db9409be50d1ea3c58 Mon Sep 17 00:00:00 2001 From: BenoitDherin Date: Thu, 18 Jan 2024 20:25:35 +0000 Subject: [PATCH 2/2] pre-commit --- notebooks/text_models/labs/load_text.ipynb | 2 +- notebooks/text_models/solutions/load_text.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/text_models/labs/load_text.ipynb b/notebooks/text_models/labs/load_text.ipynb index d172581a..955b5514 100644 --- a/notebooks/text_models/labs/load_text.ipynb +++ b/notebooks/text_models/labs/load_text.ipynb @@ -1103,7 +1103,7 @@ "source": [ "tokenized_ds = configure_dataset(tokenized_ds)\n", "\n", - "vocab_dict = collections.defaultdict(lambda: 0)\n", + "vocab_dict = collections.defaultdict(int)\n", "for toks in tokenized_ds.as_numpy_iterator():\n", " for tok in toks:\n", " vocab_dict[tok] += 1\n", diff --git a/notebooks/text_models/solutions/load_text.ipynb b/notebooks/text_models/solutions/load_text.ipynb index 414c918c..64aaa0e9 100644 --- a/notebooks/text_models/solutions/load_text.ipynb +++ b/notebooks/text_models/solutions/load_text.ipynb @@ -1105,7 +1105,7 @@ "source": [ "tokenized_ds = configure_dataset(tokenized_ds)\n", "\n", - "vocab_dict = collections.defaultdict(lambda: 0)\n", + "vocab_dict = collections.defaultdict(int)\n", "for toks in tokenized_ds.as_numpy_iterator():\n", " for tok in toks:\n", " vocab_dict[tok] += 1\n",