forked from Derrick-Sherrill/DerrickSherrill.com
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautomate_supply_chain.py
238 lines (186 loc) · 8.55 KB
/
automate_supply_chain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
'''
Partial talk through of how I used this script found in this video:
https://youtu.be/x01N1kIQhUs
Description:
AWS SageMaker script I used in automating a supply chain at my previous employer
'''
%matplotlib inline
import sys
from urllib.request import urlretrieve
import zipfile
from dateutil.parser import parse
import json
from random import shuffle
import random
import datetime
import os
import boto3
import s3fs
import sagemaker
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from ipywidgets import IntSlider, FloatSlider, Checkbox
np.random.seed(1)
!conda install -y s3fs
import boto3
import s3fs
import sagemaker
from sagemaker import get_execution_role
"""Input your own file path
in the below lines
bucket='{{{bucket_name}}}7'
prefix = 'prysmian-forecasting' #Used for denoting storage location
sagemaker_session = sagemaker.Session()
role = get_execution_role()
folder1='sagemaker' #First Directory within Bucket
folder2='data' # Second Directory within bucket
folder3='train' # Third Directory within Bucket
data_key = 'RawMaterialItems.xlsx' #File name
s3_data_path = 's3://{}/{}/{}/{}/{}'.format(bucket, folder1, folder2, folder3, data_key)
s3_output_path = "{}/{}/output".format(bucket, prefix)
"""
from sagemaker.amazon.amazon_estimator import get_image_uri
image_name = get_image_uri(boto3.Session().region_name, 'forecasting-deepar')
freq = 'M' #Our spreadsheet uses raw month in YY-MMM format. Cannot process this with pandas so I changed it to month end
prediction_length = 5 #How many months into the future we want to predict into the future
context_length = 12 #How much context we want to supply to the algorithm - Year is more than enough
data = pd.read_excel(s3_data_path, parse_dates=True, index_col=0) #Sets index column as the first column in spreadsheet (The Dates)
num_timeseries = data.shape[1] #Looks at the shape of the first column of data to determine the number of time series
data_length=data.index.size #Determines the dynamic length of data in each time series (changes and gets smarter month to month)
print("This is the number of time series you're running through the algorithm (This many materials):")
print(num_timeseries)
print("This is the # of months of data that your spreadsheet has:")
print(data_length)
t0 = data.index[0]
print("This is the beginning date:")
print(t0)
time_series=[] #Defines the structure of the time series data
for i in range(num_timeseries):
index = pd.DatetimeIndex(start=t0, freq=freq, periods=data_length)
time_series.append(pd.Series(data=data.iloc[:,i], index=index)) #Must treat this dynamically, so for loop (Length changes monthly)
print(time_series[2]) #Visual to make sure data is loaded correctly. No Graph = no bueno amigo
time_series[2].plot()
plt.show()
time_series_training = [] #Creates structure of our training data
for ts in time_series:
time_series_training.append(ts[:-prediction_length]) #appends everything except the prediction length, default of five months because that's what we're predicting
time_series[2].plot(label='test')
time_series_training[2].plot(label='train', ls=':') #Fancy way of determining what we want the model to predict later
plt.legend()
plt.show()
def series_to_obj(ts, cat=None):
obj = {"start": str(ts.index[0]), "target": list(ts)}
if cat is not None:
obj["cat"] = cat
return obj
def series_to_jsonline(ts, cat=None):
return json.dumps(series_to_obj(ts, cat)) #Most annoying process is conversion to JSON data type - makes it easier to dump excel data
encoding = "utf-8" #Takes JSON to unicode b/c have to encode data for s3 writing
s3filesystem = s3fs.S3FileSystem() # Data: Excel -> Dataframe -> series appended -> JSON -> UTF-8 byte type -> S3 Bucket JSON. Brutal Dude
with s3filesystem.open(s3_data_path + "/train/train.json", 'wb') as fp:
for ts in time_series_training:
fp.write(series_to_jsonline(ts).encode(encoding))
fp.write('\n'.encode(encoding))
with s3filesystem.open(s3_data_path + "/test/test.json", 'wb') as fp:
for ts in time_series:
fp.write(series_to_jsonline(ts).encode(encoding))
fp.write('\n'.encode(encoding))
estimator = sagemaker.estimator.Estimator(
sagemaker_session=sagemaker_session,
image_name=image_name,
role=role,
train_instance_count=1,
train_instance_type='ml.c4.xlarge',
base_job_name='DEMO-deepar',
output_path="s3://" + s3_output_path
)
hyperparameters = {
"time_freq": freq,
"context_length": str(context_length),
"prediction_length": str(prediction_length),
"num_cells": "40",
"num_layers": "3",
"likelihood": "gaussian",
"epochs": "80",
"mini_batch_size": "32",
"learning_rate": "0.001",
"dropout_rate": "0.05",
"early_stopping_patience": "10"
}
estimator.set_hyperparameters(**hyperparameters)
%%time
data_channels = {
"train": "{}/train/".format(s3_data_path),
"test": "{}/test/".format(s3_data_path)
}
estimator.fit(inputs=data_channels, wait=True)
job_name = estimator.latest_training_job.name
endpoint_name = sagemaker_session.endpoint_from_job(
job_name=job_name,
initial_instance_count=1,
instance_type='ml.m4.xlarge',
deployment_image=image_name,
role=role
)
class DeepARPredictor(sagemaker.predictor.RealTimePredictor):
def set_prediction_parameters(self, freq, prediction_length):
"""Set the time frequency and prediction length parameters. This method **must** be called
before being able to use `predict`.
Parameters:
freq -- string indicating the time frequency
prediction_length -- integer, number of predicted time points
Return value: none.
"""
self.freq = freq
self.prediction_length = prediction_length
def predict(self, ts, cat=None, encoding="utf-8", num_samples=100, quantiles=["0.1", "0.75", "0.9"]):
"""Requests the prediction of for the time series listed in `ts`, each with the (optional)
corresponding category listed in `cat`.
Parameters:
ts -- list of `pandas.Series` objects, the time series to predict
cat -- list of integers (default: None)
encoding -- string, encoding to use for the request (default: "utf-8")
num_samples -- integer, number of samples to compute at prediction time (default: 100)
quantiles -- list of strings specifying the quantiles to compute (default: ["0.1", "0.5", "0.9"])
Return value: list of `pandas.DataFrame` objects, each containing the predictions
"""
prediction_times = [x.index[-1]+1 for x in ts]
req = self.__encode_request(ts, cat, encoding, num_samples, quantiles)
res = super(DeepARPredictor, self).predict(req)
return self.__decode_response(res, prediction_times, encoding)
def __encode_request(self, ts, cat, encoding, num_samples, quantiles):
instances = [series_to_obj(ts[k], cat[k] if cat else None) for k in range(len(ts))]
configuration = {"num_samples": num_samples, "output_types": ["quantiles"], "quantiles": quantiles}
http_request_data = {"instances": instances, "configuration": configuration}
return json.dumps(http_request_data).encode(encoding)
def __decode_response(self, response, prediction_times, encoding):
response_data = json.loads(response.decode(encoding))
list_of_df = []
for k in range(len(prediction_times)):
prediction_index = pd.DatetimeIndex(start=prediction_times[k], freq=self.freq, periods=self.prediction_length)
list_of_df.append(pd.DataFrame(data=response_data['predictions'][k]['quantiles'], index=prediction_index))
return list_of_df
predictor = DeepARPredictor(
endpoint=endpoint_name,
sagemaker_session=sagemaker_session,
content_type="application/json"
)
predictor.set_prediction_parameters(freq, prediction_length)
list_of_df = predictor.predict(time_series_training[:60])
actual_data = time_series[:5]
for k in range(len(list_of_df)):
plt.figure(figsize=(12,6))
actual_data[k][-prediction_length-context_length:].plot(label='target')
p10 = list_of_df[k]['0.1']
p90 = list_of_df[k]['0.9']
plt.fill_between(p10.index, p10, p90, color='y', alpha=0.5, label='80% confidence interval')
list_of_df[k]['0.75'].plot(label='prediction median')
plt.legend()
plt.show()
print(predictor.predict(time_series[:4
]))
sagemaker_session.delete_endpoint(endpoint_name) #Save my bank account and run this command whenever you're done