Documented the notebooks

deepanshu-yadav · Sep 17, 2019 · eea789a · eea789a
1 parent fd63966
commit eea789a
Show file tree

Hide file tree

Showing 18 changed files with 21,646 additions and 235 deletions.
diff --git a/.ipynb_checkpoints/Metrics-checkpoint.ipynb b/.ipynb_checkpoints/Metrics-checkpoint.ipynb
diff --git a/.ipynb_checkpoints/analyze_bench-checkpoint.ipynb b/.ipynb_checkpoints/analyze_bench-checkpoint.ipynb
diff --git a/.ipynb_checkpoints/analyze_results-checkpoint.ipynb b/.ipynb_checkpoints/analyze_results-checkpoint.ipynb
diff --git a/.ipynb_checkpoints/batch_transform-checkpoint.ipynb b/.ipynb_checkpoints/batch_transform-checkpoint.ipynb
@@ -0,0 +1,215 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook contains code that evaluates the model by supplying  images through a batch tranform.\n",
+    "You will need three things for this notebook to run.<br>\n",
+    "1. **trainining Job Name (name of the job with which you have trained the model).**\n",
+    "2. **url of the location in s3 where images are uploaded.**\n",
+    "3. **url in the s3 where output is to be stored.**\n",
+    "\n",
+    "You can then copy them to your computer and follow analyze.results.ipynb to analyze the results.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "import time\n",
+    "from time import gmtime, strftime\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from sklearn.datasets import load_boston\n",
+    "import sklearn.model_selection\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sagemaker\n",
+    "from sagemaker import get_execution_role\n",
+    "from sagemaker.amazon.amazon_estimator import get_image_uri\n",
+    "\n",
+    "# This is an object that represents the SageMaker session that we are currently operating in. This\n",
+    "# object contains some useful information that we will need to access later such as our region.\n",
+    "session = sagemaker.Session()\n",
+    "\n",
+    "# This is an object that represents the IAM role that we are currently assigned. When we construct\n",
+    "# and launch the training job later we will need to tell it what IAM role it should have. Since our\n",
+    "# use case is relatively simple we will simply assign the training job the role we currently have.\n",
+    "role = get_execution_role()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "container = get_image_uri(session.boto_region_name,   'image-classification', repo_version=\"latest\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "job_name = 'image-classification-2019-09-12-17-02-05-917'\n",
+    "training_job_info = session.sagemaker_client.describe_training_job(TrainingJobName=job_name)\n",
+    "\n",
+    "model_artifacts = training_job_info['ModelArtifacts']['S3ModelArtifacts']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = job_name + \"-model\"\n",
+    "\n",
+    "# We also need to tell SageMaker which container should be used for inference and where it should\n",
+    "# retrieve the model artifacts from. In our case, the xgboost container that we used for training\n",
+    "# can also be used for inference.\n",
+    "primary_container = {\n",
+    "    \"Image\": container,\n",
+    "    \"ModelDataUrl\": model_artifacts\n",
+    "}\n",
+    "\n",
+    "# And lastly we construct the SageMaker model\n",
+    "model_info = session.sagemaker_client.create_model(\n",
+    "                                ModelName = model_name,\n",
+    "                                ExecutionRoleArn = role,\n",
+    "                                PrimaryContainer = primary_container)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transform_job_name = 'nsfwbatchtransform' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n",
+    "\n",
+    "# Now we construct the data structure which will describe the batch transform job.\n",
+    "transform_request = \\\n",
+    "{\n",
+    "    \"TransformJobName\": transform_job_name,\n",
+    "    \n",
+    "    # This is the name of the model that we created earlier.\n",
+    "    \"ModelName\": model_name,\n",
+    "    \n",
+    "    # This describes how many compute instances should be used at once. If you happen to be doing a very large\n",
+    "    # batch transform job it may be worth running multiple compute instances at once.\n",
+    "    \"MaxConcurrentTransforms\": 1,\n",
+    "    \n",
+    "    # This says how big each individual request sent to the model should be, at most. One of the things that\n",
+    "    # SageMaker does in the background is to split our data up into chunks so that each chunks stays under\n",
+    "    # this size limit.\n",
+    "    \"MaxPayloadInMB\": 6,\n",
+    "    \n",
+    "    # Sometimes we may want to send only a single sample to our endpoint at a time, however in this case each of\n",
+    "    # the chunks that we send should contain multiple samples of our input data.\n",
+    "    \"BatchStrategy\": \"MultiRecord\",\n",
+    "    \n",
+    "    # This next object describes where the output data should be stored. Some of the more advanced options which\n",
+    "    # we don't cover here also describe how SageMaker should collect output from various batches.\n",
+    "    \"TransformOutput\": {\n",
+    "        \"S3OutputPath\": \"s3://project-completion-udacity/nsfw_dataset/batch-transform/\"\n",
+    "    },\n",
+    "    \n",
+    "    # Here we describe our input data. Of course, we need to tell SageMaker where on S3 our input data is stored, in\n",
+    "    # addition we need to detail the characteristics of our input data. In particular, since SageMaker may need to\n",
+    "    # split our data up into chunks, it needs to know how the individual samples in our data file appear. In our\n",
+    "    # case each line is its own sample and so we set the split type to 'line'. We also need to tell SageMaker what\n",
+    "    # type of data is being sent, in this case csv, so that it can properly serialize the data.\n",
+    "    \"TransformInput\": {\n",
+    "        \"ContentType\": \"application/x-image\",\n",
+    "        \"SplitType\": \"None\",\n",
+    "        \"DataSource\": {\n",
+    "            \"S3DataSource\": {\n",
+    "                \"S3DataType\": \"S3Prefix\",\n",
+    "                \"S3Uri\": 's3://project-completion-udacity/evaluation/testing/',\n",
+    "            }\n",
+    "        }\n",
+    "    },\n",
+    "    \n",
+    "    # And lastly we tell SageMaker what sort of compute instance we would like it to use.\n",
+    "    \"TransformResources\": {\n",
+    "            \"InstanceType\": \"ml.m4.xlarge\",\n",
+    "            \"InstanceCount\": 1\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transform_response = session.sagemaker_client.create_transform_job(**transform_request)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................."
+     ]
+    }
+   ],
+   "source": [
+    "transform_desc = session.wait_for_transform_job(transform_job_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}