diff --git a/Regression_AE5_DSFT.csv b/Regression_AE5_DSFT.csv new file mode 100644 index 0000000..f019f1f --- /dev/null +++ b/Regression_AE5_DSFT.csv @@ -0,0 +1,2 @@ +Index,Container,Size_Grade,Weight_Kg,Low_Price,High_Price,Sales_Total,Total_Qty_Sold,Total_Kg_Sold,Stock_On_Hand,Day,Month,Year,Province_EASTERN CAPE,Province_NATAL,Province_ORANGE FREE STATE,Province_TRANSVAAL,Province_W.CAPE-BERGRIVER ETC,Province_WEST COAST +2.0,0,0,18.3,220.0,220.0,1760.0,8.0,146.4,2.0,20.0,1.0,2020.0,0.0,0.0,0.0,0.0,1.0,0.0 diff --git a/__pycache__/model.cpython-38.pyc b/__pycache__/model.cpython-38.pyc new file mode 100644 index 0000000..490ed1e Binary files /dev/null and b/__pycache__/model.cpython-38.pyc differ diff --git a/__pycache__/model.cpython-39.pyc b/__pycache__/model.cpython-39.pyc new file mode 100644 index 0000000..15aae88 Binary files /dev/null and b/__pycache__/model.cpython-39.pyc differ diff --git a/api.py b/api.py index a206495..c84b59e 100644 --- a/api.py +++ b/api.py @@ -29,8 +29,7 @@ # Load our model into memory. # Please update this path to reflect your own trained model. -static_model = load_model( - path_to_model='assets/trained-models/apples_simple_lm_regression.pkl') +static_model = load_model(path_to_model='assets/trained-models/best_random.pkl') print ('-'*40) print ('Model succesfully loaded') diff --git a/assets/trained-models/best_random.pkl b/assets/trained-models/best_random.pkl new file mode 100644 index 0000000..a977a05 Binary files /dev/null and b/assets/trained-models/best_random.pkl differ diff --git a/model.py b/model.py index d72c14b..f73f03f 100644 --- a/model.py +++ b/model.py @@ -26,6 +26,7 @@ import pandas as pd import pickle import json +from sklearn import preprocessing def _preprocess_data(data): """Private helper function to preprocess data for model prediction. @@ -49,6 +50,7 @@ def _preprocess_data(data): feature_vector_dict = json.loads(data) # Load the dictionary as a Pandas DataFrame. feature_vector_df = pd.DataFrame.from_dict([feature_vector_dict]) + # --------------------------------------------------------------- # NOTE: You will need to swap the lines below for your own data @@ -60,10 +62,19 @@ def _preprocess_data(data): # ----------- Replace this code with your own preprocessing steps -------- - - feature_vector_df = feature_vector_df[(feature_vector_df['Commodities'] == 'APPLE GOLDEN DELICIOUS')] - predict_vector = feature_vector_df[['Total_Qty_Sold','Stock_On_Hand']] - + #feature_vector_df = feature_vector_df[feature_vector_df['Commodities'] == "APPLE GOLDEN DELICIOUS"] + predict_vector = feature_vector_df + predict_vector.index.rename('Index',inplace=True) + predict_vector.index +=1 + predict_vector.set_index('Index', inplace=True) + label_encoder = preprocessing.LabelEncoder() + + # Encode labels in column 'Size_Grade' in training data. + predict_vector['Size_Grade']= label_encoder.fit_transform(predict_vector['Size_Grade']) + + # Encode labels in column 'Container' in training data + predict_vector['Container']= label_encoder.fit_transform(predict_vector['Container']) + predict_vector = pd.get_dummies(predict_vector, drop_first=True) # ------------------------------------------------------------------------ return predict_vector @@ -104,6 +115,7 @@ def make_prediction(data, model): """ # Data preprocessing. prep_data = _preprocess_data(data) + prep_data.to_csv("Regression_AE5_DSFT.csv") # Perform prediction with model and preprocessed data. prediction = model.predict(prep_data) # Format as list for output standerdisation. diff --git a/utils/request.py b/utils/request.py index 79cfe84..bdb1a15 100644 --- a/utils/request.py +++ b/utils/request.py @@ -21,11 +21,29 @@ import requests import pandas as pd import numpy as np +from sklearn import preprocessing # Load data from file to send as an API POST request. # We prepare a DataFrame with the public test set # from the Kaggle challenge. -test = pd.read_csv('data/test_data.csv') +feature_vector_df = pd.read_csv('data/test_data.csv') +feature_vector_df = feature_vector_df[(feature_vector_df['Commodities'] == 'APPLE GOLDEN DELICIOUS')] +predict_vector = feature_vector_df +predict_vector = predict_vector.reset_index(drop=True) +predict_vector.drop('Commodities',axis = 1, inplace = True) +predict_vector['Date'] = predict_vector['Date'].apply(lambda x: pd.to_datetime(x)) +predict_vector['Day'] = predict_vector['Date'].dt.day +predict_vector['Month'] = predict_vector['Date'].dt.month +predict_vector['Year'] = predict_vector['Date'].dt.year +predict_vector.drop('Date', axis = 1, inplace = True) +label_encoder = preprocessing.LabelEncoder() + +# Encode labels in column 'Size_Grade' in training data. +predict_vector['Size_Grade']= label_encoder.fit_transform(predict_vector['Size_Grade']) + +# Encode labels in column 'Container' in training data +predict_vector['Container']= label_encoder.fit_transform(predict_vector['Container']) +test = pd.get_dummies(predict_vector, drop_first=True) # Convert our DataFrame to a JSON string. # This step is necessary in order to transmit our data via HTTP/S