-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fd63966
commit eea789a
Showing
18 changed files
with
21,646 additions
and
235 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"This notebook contains code that evaluates the model by supplying images through a batch tranform.\n", | ||
"You will need three things for this notebook to run.<br>\n", | ||
"1. **trainining Job Name (name of the job with which you have trained the model).**\n", | ||
"2. **url of the location in s3 where images are uploaded.**\n", | ||
"3. **url in the s3 where output is to be stored.**\n", | ||
"\n", | ||
"You can then copy them to your computer and follow analyze.results.ipynb to analyze the results.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"\n", | ||
"import time\n", | ||
"from time import gmtime, strftime\n", | ||
"\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"\n", | ||
"from sklearn.datasets import load_boston\n", | ||
"import sklearn.model_selection\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import sagemaker\n", | ||
"from sagemaker import get_execution_role\n", | ||
"from sagemaker.amazon.amazon_estimator import get_image_uri\n", | ||
"\n", | ||
"# This is an object that represents the SageMaker session that we are currently operating in. This\n", | ||
"# object contains some useful information that we will need to access later such as our region.\n", | ||
"session = sagemaker.Session()\n", | ||
"\n", | ||
"# This is an object that represents the IAM role that we are currently assigned. When we construct\n", | ||
"# and launch the training job later we will need to tell it what IAM role it should have. Since our\n", | ||
"# use case is relatively simple we will simply assign the training job the role we currently have.\n", | ||
"role = get_execution_role()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"container = get_image_uri(session.boto_region_name, 'image-classification', repo_version=\"latest\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"job_name = 'image-classification-2019-09-12-17-02-05-917'\n", | ||
"training_job_info = session.sagemaker_client.describe_training_job(TrainingJobName=job_name)\n", | ||
"\n", | ||
"model_artifacts = training_job_info['ModelArtifacts']['S3ModelArtifacts']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"model_name = job_name + \"-model\"\n", | ||
"\n", | ||
"# We also need to tell SageMaker which container should be used for inference and where it should\n", | ||
"# retrieve the model artifacts from. In our case, the xgboost container that we used for training\n", | ||
"# can also be used for inference.\n", | ||
"primary_container = {\n", | ||
" \"Image\": container,\n", | ||
" \"ModelDataUrl\": model_artifacts\n", | ||
"}\n", | ||
"\n", | ||
"# And lastly we construct the SageMaker model\n", | ||
"model_info = session.sagemaker_client.create_model(\n", | ||
" ModelName = model_name,\n", | ||
" ExecutionRoleArn = role,\n", | ||
" PrimaryContainer = primary_container)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"transform_job_name = 'nsfwbatchtransform' + strftime(\"%Y-%m-%d-%H-%M-%S\", gmtime())\n", | ||
"\n", | ||
"# Now we construct the data structure which will describe the batch transform job.\n", | ||
"transform_request = \\\n", | ||
"{\n", | ||
" \"TransformJobName\": transform_job_name,\n", | ||
" \n", | ||
" # This is the name of the model that we created earlier.\n", | ||
" \"ModelName\": model_name,\n", | ||
" \n", | ||
" # This describes how many compute instances should be used at once. If you happen to be doing a very large\n", | ||
" # batch transform job it may be worth running multiple compute instances at once.\n", | ||
" \"MaxConcurrentTransforms\": 1,\n", | ||
" \n", | ||
" # This says how big each individual request sent to the model should be, at most. One of the things that\n", | ||
" # SageMaker does in the background is to split our data up into chunks so that each chunks stays under\n", | ||
" # this size limit.\n", | ||
" \"MaxPayloadInMB\": 6,\n", | ||
" \n", | ||
" # Sometimes we may want to send only a single sample to our endpoint at a time, however in this case each of\n", | ||
" # the chunks that we send should contain multiple samples of our input data.\n", | ||
" \"BatchStrategy\": \"MultiRecord\",\n", | ||
" \n", | ||
" # This next object describes where the output data should be stored. Some of the more advanced options which\n", | ||
" # we don't cover here also describe how SageMaker should collect output from various batches.\n", | ||
" \"TransformOutput\": {\n", | ||
" \"S3OutputPath\": \"s3://project-completion-udacity/nsfw_dataset/batch-transform/\"\n", | ||
" },\n", | ||
" \n", | ||
" # Here we describe our input data. Of course, we need to tell SageMaker where on S3 our input data is stored, in\n", | ||
" # addition we need to detail the characteristics of our input data. In particular, since SageMaker may need to\n", | ||
" # split our data up into chunks, it needs to know how the individual samples in our data file appear. In our\n", | ||
" # case each line is its own sample and so we set the split type to 'line'. We also need to tell SageMaker what\n", | ||
" # type of data is being sent, in this case csv, so that it can properly serialize the data.\n", | ||
" \"TransformInput\": {\n", | ||
" \"ContentType\": \"application/x-image\",\n", | ||
" \"SplitType\": \"None\",\n", | ||
" \"DataSource\": {\n", | ||
" \"S3DataSource\": {\n", | ||
" \"S3DataType\": \"S3Prefix\",\n", | ||
" \"S3Uri\": 's3://project-completion-udacity/evaluation/testing/',\n", | ||
" }\n", | ||
" }\n", | ||
" },\n", | ||
" \n", | ||
" # And lastly we tell SageMaker what sort of compute instance we would like it to use.\n", | ||
" \"TransformResources\": {\n", | ||
" \"InstanceType\": \"ml.m4.xlarge\",\n", | ||
" \"InstanceCount\": 1\n", | ||
" }\n", | ||
"}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"transform_response = session.sagemaker_client.create_transform_job(**transform_request)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"..........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................." | ||
] | ||
} | ||
], | ||
"source": [ | ||
"transform_desc = session.wait_for_transform_job(transform_job_name)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.5.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.