Skip to content
This repository has been archived by the owner on Nov 23, 2023. It is now read-only.

added our best model #6

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Regression_AE5_DSFT.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Index,Container,Size_Grade,Weight_Kg,Low_Price,High_Price,Sales_Total,Total_Qty_Sold,Total_Kg_Sold,Stock_On_Hand,Day,Month,Year,Province_EASTERN CAPE,Province_NATAL,Province_ORANGE FREE STATE,Province_TRANSVAAL,Province_W.CAPE-BERGRIVER ETC,Province_WEST COAST
2.0,0,0,18.3,220.0,220.0,1760.0,8.0,146.4,2.0,20.0,1.0,2020.0,0.0,0.0,0.0,0.0,1.0,0.0
Binary file added __pycache__/model.cpython-38.pyc
Binary file not shown.
Binary file added __pycache__/model.cpython-39.pyc
Binary file not shown.
3 changes: 1 addition & 2 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@

# Load our model into memory.
# Please update this path to reflect your own trained model.
static_model = load_model(
path_to_model='assets/trained-models/apples_simple_lm_regression.pkl')
static_model = load_model(path_to_model='assets/trained-models/best_random.pkl')

print ('-'*40)
print ('Model succesfully loaded')
Expand Down
Binary file added assets/trained-models/best_random.pkl
Binary file not shown.
20 changes: 16 additions & 4 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import pandas as pd
import pickle
import json
from sklearn import preprocessing

def _preprocess_data(data):
"""Private helper function to preprocess data for model prediction.
Expand All @@ -49,6 +50,7 @@ def _preprocess_data(data):
feature_vector_dict = json.loads(data)
# Load the dictionary as a Pandas DataFrame.
feature_vector_df = pd.DataFrame.from_dict([feature_vector_dict])


# ---------------------------------------------------------------
# NOTE: You will need to swap the lines below for your own data
Expand All @@ -60,10 +62,19 @@ def _preprocess_data(data):

# ----------- Replace this code with your own preprocessing steps --------


feature_vector_df = feature_vector_df[(feature_vector_df['Commodities'] == 'APPLE GOLDEN DELICIOUS')]
predict_vector = feature_vector_df[['Total_Qty_Sold','Stock_On_Hand']]

#feature_vector_df = feature_vector_df[feature_vector_df['Commodities'] == "APPLE GOLDEN DELICIOUS"]
predict_vector = feature_vector_df
predict_vector.index.rename('Index',inplace=True)
predict_vector.index +=1
predict_vector.set_index('Index', inplace=True)
label_encoder = preprocessing.LabelEncoder()

# Encode labels in column 'Size_Grade' in training data.
predict_vector['Size_Grade']= label_encoder.fit_transform(predict_vector['Size_Grade'])

# Encode labels in column 'Container' in training data
predict_vector['Container']= label_encoder.fit_transform(predict_vector['Container'])
predict_vector = pd.get_dummies(predict_vector, drop_first=True)
# ------------------------------------------------------------------------

return predict_vector
Expand Down Expand Up @@ -104,6 +115,7 @@ def make_prediction(data, model):
"""
# Data preprocessing.
prep_data = _preprocess_data(data)
prep_data.to_csv("Regression_AE5_DSFT.csv")
# Perform prediction with model and preprocessed data.
prediction = model.predict(prep_data)
# Format as list for output standerdisation.
Expand Down
20 changes: 19 additions & 1 deletion utils/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,29 @@
import requests
import pandas as pd
import numpy as np
from sklearn import preprocessing

# Load data from file to send as an API POST request.
# We prepare a DataFrame with the public test set
# from the Kaggle challenge.
test = pd.read_csv('data/test_data.csv')
feature_vector_df = pd.read_csv('data/test_data.csv')
feature_vector_df = feature_vector_df[(feature_vector_df['Commodities'] == 'APPLE GOLDEN DELICIOUS')]
predict_vector = feature_vector_df
predict_vector = predict_vector.reset_index(drop=True)
predict_vector.drop('Commodities',axis = 1, inplace = True)
predict_vector['Date'] = predict_vector['Date'].apply(lambda x: pd.to_datetime(x))
predict_vector['Day'] = predict_vector['Date'].dt.day
predict_vector['Month'] = predict_vector['Date'].dt.month
predict_vector['Year'] = predict_vector['Date'].dt.year
predict_vector.drop('Date', axis = 1, inplace = True)
label_encoder = preprocessing.LabelEncoder()

# Encode labels in column 'Size_Grade' in training data.
predict_vector['Size_Grade']= label_encoder.fit_transform(predict_vector['Size_Grade'])

# Encode labels in column 'Container' in training data
predict_vector['Container']= label_encoder.fit_transform(predict_vector['Container'])
test = pd.get_dummies(predict_vector, drop_first=True)

# Convert our DataFrame to a JSON string.
# This step is necessary in order to transmit our data via HTTP/S
Expand Down