From 90c5a07661d196c424d2bc104099531c5a5f291f Mon Sep 17 00:00:00 2001 From: akshatcx Date: Thu, 1 Jul 2021 01:34:19 +0530 Subject: [PATCH 1/6] [update] pipeline --- .github/workflows/cicd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 57c7e0c1..a485adbd 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -1,6 +1,6 @@ name: ci-cd -on: pull_request, push +on: [pull_request, push] jobs: build: From 8d3a7976c0b27d57a55fde2d92f7b46c040d5687 Mon Sep 17 00:00:00 2001 From: akshatcx Date: Thu, 1 Jul 2021 01:59:22 +0530 Subject: [PATCH 2/6] [add] README --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 00000000..d0aeeccc --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +# ML-Ops Demo/Assignment + +This repository contains code which demonstrates ML-Ops using a `FastAPI` application which predicts the flower class using the IRIS dataset (https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html) + +## Running Instructions +- Create a fork of the repo using the `fork` button. +- Clone your fork using `git clone https://www.github.com//mlops-iris.git` +- Install dependencies using `pip3 install requirements.txt` +- Run application using `python3 main.py` +- Run tests using `pytest` + +## CI/CD +- `build` (test) for all the pull requests +- `build` (test) and `upload_zip` for all pushes + +## Assignment Tasks +1. Change this README to add your name here: . Add and commit changes to a new branch and create a pull request ONLY TO YOUR OWN FORK to see the CI/CD build happening. If the build succeeds, merge the pull request with master and see the CI/CD `upload_zip` take place. +2. Add 2 more unit tests of your choice to `test_app.py` and make sure they are passing. +3. Add one more classifier to startup and use only the one with better accuracy. +4. Add the attribute `timestamp` to the response and return the current time with it. From 956b24485f910de2b219720b1850e741efb23f0c Mon Sep 17 00:00:00 2001 From: akshatcx Date: Thu, 1 Jul 2021 02:11:12 +0530 Subject: [PATCH 3/6] [add] comments --- .github/workflows/cicd.yml | 7 ++++++- main.py | 21 ++++++++++++--------- ml_utils.py | 31 +++++++++++++------------------ test_app.py | 18 ++++++++++-------- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index a485adbd..6720c644 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -1,8 +1,10 @@ name: ci-cd +# run the action on pull_requests and pushes on: [pull_request, push] jobs: + # first job to test the application using pytest build: runs-on: ubuntu-latest steps: @@ -18,10 +20,13 @@ jobs: - name: Run pytest run: | pytest - + + # second job to zip the codebase and upload it as an artifact when build succeeds upload_zip: runs-on: ubuntu-latest needs: build + + # only run this action for pushes if: ${{ github.event_name == 'push' }} steps: - name: Checkout the repository diff --git a/main.py b/main.py index 28cb98dd..7351584f 100644 --- a/main.py +++ b/main.py @@ -3,34 +3,37 @@ from pydantic import BaseModel from ml_utils import load_model, predict -app = FastAPI( - title="Iris Predictor", - docs_url="/" -) +# defining the main app +app = FastAPI(title="Iris Predictor", docs_url="/") +# calling the load_model during startup app.add_event_handler("startup", load_model) +# class which is expected in the payload class QueryIn(BaseModel): sepal_length: float sepal_width: float petal_length: float petal_width: float + +# class which is returned in the response class QueryOut(BaseModel): flower_class: str +# Route definitions @app.get("/ping") def ping(): return {"ping": "pong"} @app.post("/predict_flower", response_model=QueryOut, status_code=200) -def predict_flower( - query_data: QueryIn -): - output = {'flower_class': predict(query_data)} +def predict_flower(query_data: QueryIn): + output = {"flower_class": predict(query_data)} return output + +# Main function to start the app when main.py is called if __name__ == "__main__": - uvicorn.run("main:app", host='0.0.0.0', port=8888, reload=True) + uvicorn.run("main:app", host="0.0.0.0", port=8888, reload=True) diff --git a/ml_utils.py b/ml_utils.py index b11cdb56..75a358b8 100644 --- a/ml_utils.py +++ b/ml_utils.py @@ -5,27 +5,22 @@ clf = GaussianNB() -classes = { - 0: "Iris Setosa", - 1: "Iris Versicolour", - 2: "Iris Virginica" -} +classes = {0: "Iris Setosa", 1: "Iris Versicolour", 2: "Iris Virginica"} +# function to train and load the model during startup def load_model(): - X, y = datasets.load_iris(return_X_y=True) - - X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2) - clf.fit(X_train, y_train) - - acc = accuracy_score(y_test, clf.predict(X_test)) - print(f"Model trained with accuracy: {round(acc, 3)}") - -def predict(query_data): - x = list(query_data.dict().values()) - prediction = clf.predict([x])[0] - print(f"Model prediction: {classes[prediction]}") - return classes[prediction] + X, y = datasets.load_iris(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + clf.fit(X_train, y_train) + acc = accuracy_score(y_test, clf.predict(X_test)) + print(f"Model trained with accuracy: {round(acc, 3)}") +# function to predict the flower using the model +def predict(query_data): + x = list(query_data.dict().values()) + prediction = clf.predict([x])[0] + print(f"Model prediction: {classes[prediction]}") + return classes[prediction] diff --git a/test_app.py b/test_app.py index 44cb822b..0541cce9 100644 --- a/test_app.py +++ b/test_app.py @@ -1,21 +1,23 @@ from fastapi.testclient import TestClient from main import app - +# test to check the correct functioning of the /ping route def test_ping(): with TestClient(app) as client: response = client.get("/ping") assert response.status_code == 200 - assert response.json() == {"ping":"pong"} + assert response.json() == {"ping": "pong"} + +# test to check if Iris Virginica is classified correctly def test_pred_virginica(): payload = { - "sepal_length": 3, - "sepal_width": 5, - "petal_length": 3.2, - "petal_width": 4.4 + "sepal_length": 3, + "sepal_width": 5, + "petal_length": 3.2, + "petal_width": 4.4, } with TestClient(app) as client: - response = client.post('/predict_flower', json=payload) + response = client.post("/predict_flower", json=payload) assert response.status_code == 200 - assert response.json() == {'flower_class': "Iris Virginica"} \ No newline at end of file + assert response.json() == {"flower_class": "Iris Virginica"} From 1bc156c5fde41b20c6c4143cdc25f24102e23ee3 Mon Sep 17 00:00:00 2001 From: akshatcx Date: Thu, 1 Jul 2021 21:41:25 +0530 Subject: [PATCH 4/6] [add] feedback look as a post request --- main.py | 15 ++++++++++++++- ml_utils.py | 6 ++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 7351584f..99cf35de 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,8 @@ import uvicorn from fastapi import FastAPI from pydantic import BaseModel -from ml_utils import load_model, predict +from ml_utils import load_model, predict, retrain +from typing import List # defining the main app app = FastAPI(title="Iris Predictor", docs_url="/") @@ -21,6 +22,13 @@ class QueryIn(BaseModel): class QueryOut(BaseModel): flower_class: str +# class which is expected in the payload while re-training +class FeedbackIn(BaseModel): + sepal_length: float + sepal_width: float + petal_length: float + petal_width: float + flower_class: str # Route definitions @app.get("/ping") @@ -33,6 +41,11 @@ def predict_flower(query_data: QueryIn): output = {"flower_class": predict(query_data)} return output +@app.post("/feedback_loop", status_code=200) +def feedback_loop(data: List[FeedbackIn]): + retrain(data) + return {"detail": "Feedback loop successful"} + # Main function to start the app when main.py is called if __name__ == "__main__": diff --git a/ml_utils.py b/ml_utils.py index 75a358b8..24703d04 100644 --- a/ml_utils.py +++ b/ml_utils.py @@ -6,6 +6,7 @@ clf = GaussianNB() classes = {0: "Iris Setosa", 1: "Iris Versicolour", 2: "Iris Virginica"} +r_classes = {y: x for x, y in classes.items()} # function to train and load the model during startup def load_model(): @@ -24,3 +25,8 @@ def predict(query_data): prediction = clf.predict([x])[0] print(f"Model prediction: {classes[prediction]}") return classes[prediction] + +def retrain(data): + X = [list(d.dict().values())[:-1] for d in data] + y = [r_classes[d.flower_class] for d in data] + clf.fit(X, y) From c074254d49087e813b076d24799229de434c3830 Mon Sep 17 00:00:00 2001 From: akshatcx Date: Thu, 1 Jul 2021 22:15:55 +0530 Subject: [PATCH 5/6] [add] elaborate comments --- .github/workflows/cicd.yml | 7 +++++-- main.py | 11 ++++++++++- ml_utils.py | 9 +++++++++ test_app.py | 3 +++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 6720c644..94bfae50 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -6,7 +6,8 @@ on: [pull_request, push] jobs: # first job to test the application using pytest build: - runs-on: ubuntu-latest + runs-on: ubuntu-latest # choose the OS for running the action + # define the individual sequential steps to be run steps: - name: Checkout the repository uses: actions/checkout@v2 @@ -23,11 +24,13 @@ jobs: # second job to zip the codebase and upload it as an artifact when build succeeds upload_zip: - runs-on: ubuntu-latest + runs-on: ubuntu-latest # choose the OS for running the action needs: build # only run this action for pushes if: ${{ github.event_name == 'push' }} + + # define the individual sequential steps to be run steps: - name: Checkout the repository uses: actions/checkout@v2 diff --git a/main.py b/main.py index 99cf35de..48d45d79 100644 --- a/main.py +++ b/main.py @@ -7,7 +7,8 @@ # defining the main app app = FastAPI(title="Iris Predictor", docs_url="/") -# calling the load_model during startup +# calling the load_model during startup. +# this will train the model and keep it loaded for prediction. app.add_event_handler("startup", load_model) # class which is expected in the payload @@ -32,16 +33,23 @@ class FeedbackIn(BaseModel): # Route definitions @app.get("/ping") +# Healthcheck route to ensure that the API is up and running def ping(): return {"ping": "pong"} @app.post("/predict_flower", response_model=QueryOut, status_code=200) +# Route to do the prediction using the ML model defined. +# Payload: QueryIn containing the parameters +# Response: QueryOut containing the flower_class predicted (200) def predict_flower(query_data: QueryIn): output = {"flower_class": predict(query_data)} return output @app.post("/feedback_loop", status_code=200) +# Route to further train the model based on user input in form of feedback loop +# Payload: FeedbackIn containing the parameters and correct flower class +# Response: Dict with detail confirming success (200) def feedback_loop(data: List[FeedbackIn]): retrain(data) return {"detail": "Feedback loop successful"} @@ -49,4 +57,5 @@ def feedback_loop(data: List[FeedbackIn]): # Main function to start the app when main.py is called if __name__ == "__main__": + # Uvicorn is used to run the server and listen for incoming API requests on 0.0.0.0:8888 uvicorn.run("main:app", host="0.0.0.0", port=8888, reload=True) diff --git a/ml_utils.py b/ml_utils.py index 24703d04..bdd4dc83 100644 --- a/ml_utils.py +++ b/ml_utils.py @@ -3,18 +3,23 @@ from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score +# define a Gaussain NB classifier clf = GaussianNB() +# define the class encodings and reverse encodings classes = {0: "Iris Setosa", 1: "Iris Versicolour", 2: "Iris Virginica"} r_classes = {y: x for x, y in classes.items()} # function to train and load the model during startup def load_model(): + # load the dataset from the official sklearn datasets X, y = datasets.load_iris(return_X_y=True) + # do the test-train split and train the model X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) clf.fit(X_train, y_train) + # calculate the print the accuracy score acc = accuracy_score(y_test, clf.predict(X_test)) print(f"Model trained with accuracy: {round(acc, 3)}") @@ -26,7 +31,11 @@ def predict(query_data): print(f"Model prediction: {classes[prediction]}") return classes[prediction] +# function to retrain the model as part of the feedback loop def retrain(data): + # pull out the relevant X and y from the FeedbackIn object X = [list(d.dict().values())[:-1] for d in data] y = [r_classes[d.flower_class] for d in data] + + # fit the classifier again based on the new data obtained clf.fit(X, y) diff --git a/test_app.py b/test_app.py index 0541cce9..b65fc902 100644 --- a/test_app.py +++ b/test_app.py @@ -5,12 +5,14 @@ def test_ping(): with TestClient(app) as client: response = client.get("/ping") + # asserting the correct response is received assert response.status_code == 200 assert response.json() == {"ping": "pong"} # test to check if Iris Virginica is classified correctly def test_pred_virginica(): + # defining a sample payload for the testcase payload = { "sepal_length": 3, "sepal_width": 5, @@ -19,5 +21,6 @@ def test_pred_virginica(): } with TestClient(app) as client: response = client.post("/predict_flower", json=payload) + # asserting the correct response is received assert response.status_code == 200 assert response.json() == {"flower_class": "Iris Virginica"} From d17baac7027441cefef389b4d209f2a9e2c1069b Mon Sep 17 00:00:00 2001 From: bksindhu Date: Sun, 11 Jul 2021 18:58:42 +0530 Subject: [PATCH 6/6] task 2 to 4 are done --- README.md | 2 +- main.py | 5 +++-- ml_utils.py | 4 +++- test_app.py | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d0aeeccc..78d2369b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This repository contains code which demonstrates ML-Ops using a `FastAPI` applic - Create a fork of the repo using the `fork` button. - Clone your fork using `git clone https://www.github.com//mlops-iris.git` - Install dependencies using `pip3 install requirements.txt` -- Run application using `python3 main.py` +- Run application using `python main.py` - Run tests using `pytest` ## CI/CD diff --git a/main.py b/main.py index 48d45d79..69509476 100644 --- a/main.py +++ b/main.py @@ -3,6 +3,7 @@ from pydantic import BaseModel from ml_utils import load_model, predict, retrain from typing import List +import datetime # defining the main app app = FastAPI(title="Iris Predictor", docs_url="/") @@ -43,7 +44,7 @@ def ping(): # Payload: QueryIn containing the parameters # Response: QueryOut containing the flower_class predicted (200) def predict_flower(query_data: QueryIn): - output = {"flower_class": predict(query_data)} + output = {"flower_class": predict(query_data), "timestamp": datetime.datetime.now()} return output @app.post("/feedback_loop", status_code=200) @@ -58,4 +59,4 @@ def feedback_loop(data: List[FeedbackIn]): # Main function to start the app when main.py is called if __name__ == "__main__": # Uvicorn is used to run the server and listen for incoming API requests on 0.0.0.0:8888 - uvicorn.run("main:app", host="0.0.0.0", port=8888, reload=True) + uvicorn.run("main:app", host="localhost", port=8888, reload=True) diff --git a/ml_utils.py b/ml_utils.py index bdd4dc83..6f65b0d9 100644 --- a/ml_utils.py +++ b/ml_utils.py @@ -2,9 +2,11 @@ from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score +from sklearn.naive_bayes import MultinomialNB # define a Gaussain NB classifier -clf = GaussianNB() +#clf = GaussianNB() +clf = MultinomialNB() # define the class encodings and reverse encodings classes = {0: "Iris Setosa", 1: "Iris Versicolour", 2: "Iris Virginica"} diff --git a/test_app.py b/test_app.py index b65fc902..9032849a 100644 --- a/test_app.py +++ b/test_app.py @@ -1,5 +1,6 @@ from fastapi.testclient import TestClient from main import app +from datetime import datetime # test to check the correct functioning of the /ping route def test_ping(): @@ -23,4 +24,34 @@ def test_pred_virginica(): response = client.post("/predict_flower", json=payload) # asserting the correct response is received assert response.status_code == 200 - assert response.json() == {"flower_class": "Iris Virginica"} + assert response.json() == {"flower_class": "Iris Virginica", "TimeStamp": datetime.now} + +# test to check if Versicolor Species is classified correctly +def test_pred_versicolor(): + # defining a sample payload for the testcase + payload = { + "sepal_length": 6, + "sepal_width": 2.8, + "petal_length": 4.2, + "petal_width": 1.4, + } + with TestClient(app) as client: + response = client.post("/predict_flower", json=payload) + # asserting the correct response is received + assert response.status_code == 200 + assert response.json() == {"flower_class": "Versicolor Species", "TimeStamp": datetime.now} + +# test to check if Setosa is classified correctly +def test_pred_setosa(): + # defining a sample payload for the testcase + payload = { + "sepal_length": 5, + "sepal_width": 3.4, + "petal_length": 1.8, + "petal_width": 0.2, + } + with TestClient(app) as client: + response = client.post("/predict_flower", json=payload) + # asserting the correct response is received + assert response.status_code == 200 + assert response.json() == {"flower_class": "Iris Setosa", "TimeStamp": datetime.now} \ No newline at end of file