diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 57c7e0c1..94bfae50 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -1,10 +1,13 @@ name: ci-cd -on: pull_request, push +# run the action on pull_requests and pushes +on: [pull_request, push] jobs: + # first job to test the application using pytest build: - runs-on: ubuntu-latest + runs-on: ubuntu-latest # choose the OS for running the action + # define the individual sequential steps to be run steps: - name: Checkout the repository uses: actions/checkout@v2 @@ -18,11 +21,16 @@ jobs: - name: Run pytest run: | pytest - + + # second job to zip the codebase and upload it as an artifact when build succeeds upload_zip: - runs-on: ubuntu-latest + runs-on: ubuntu-latest # choose the OS for running the action needs: build + + # only run this action for pushes if: ${{ github.event_name == 'push' }} + + # define the individual sequential steps to be run steps: - name: Checkout the repository uses: actions/checkout@v2 diff --git a/Gopi.md b/Gopi.md new file mode 100644 index 00000000..d0aeeccc --- /dev/null +++ b/Gopi.md @@ -0,0 +1,20 @@ +# ML-Ops Demo/Assignment + +This repository contains code which demonstrates ML-Ops using a `FastAPI` application which predicts the flower class using the IRIS dataset (https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html) + +## Running Instructions +- Create a fork of the repo using the `fork` button. +- Clone your fork using `git clone https://www.github.com//mlops-iris.git` +- Install dependencies using `pip3 install requirements.txt` +- Run application using `python3 main.py` +- Run tests using `pytest` + +## CI/CD +- `build` (test) for all the pull requests +- `build` (test) and `upload_zip` for all pushes + +## Assignment Tasks +1. Change this README to add your name here: . Add and commit changes to a new branch and create a pull request ONLY TO YOUR OWN FORK to see the CI/CD build happening. If the build succeeds, merge the pull request with master and see the CI/CD `upload_zip` take place. +2. Add 2 more unit tests of your choice to `test_app.py` and make sure they are passing. +3. Add one more classifier to startup and use only the one with better accuracy. +4. Add the attribute `timestamp` to the response and return the current time with it. diff --git a/main.py b/main.py index 28cb98dd..48d45d79 100644 --- a/main.py +++ b/main.py @@ -1,36 +1,61 @@ import uvicorn from fastapi import FastAPI from pydantic import BaseModel -from ml_utils import load_model, predict +from ml_utils import load_model, predict, retrain +from typing import List -app = FastAPI( - title="Iris Predictor", - docs_url="/" -) +# defining the main app +app = FastAPI(title="Iris Predictor", docs_url="/") +# calling the load_model during startup. +# this will train the model and keep it loaded for prediction. app.add_event_handler("startup", load_model) +# class which is expected in the payload class QueryIn(BaseModel): sepal_length: float sepal_width: float petal_length: float petal_width: float + +# class which is returned in the response class QueryOut(BaseModel): flower_class: str +# class which is expected in the payload while re-training +class FeedbackIn(BaseModel): + sepal_length: float + sepal_width: float + petal_length: float + petal_width: float + flower_class: str +# Route definitions @app.get("/ping") +# Healthcheck route to ensure that the API is up and running def ping(): return {"ping": "pong"} @app.post("/predict_flower", response_model=QueryOut, status_code=200) -def predict_flower( - query_data: QueryIn -): - output = {'flower_class': predict(query_data)} +# Route to do the prediction using the ML model defined. +# Payload: QueryIn containing the parameters +# Response: QueryOut containing the flower_class predicted (200) +def predict_flower(query_data: QueryIn): + output = {"flower_class": predict(query_data)} return output +@app.post("/feedback_loop", status_code=200) +# Route to further train the model based on user input in form of feedback loop +# Payload: FeedbackIn containing the parameters and correct flower class +# Response: Dict with detail confirming success (200) +def feedback_loop(data: List[FeedbackIn]): + retrain(data) + return {"detail": "Feedback loop successful"} + + +# Main function to start the app when main.py is called if __name__ == "__main__": - uvicorn.run("main:app", host='0.0.0.0', port=8888, reload=True) + # Uvicorn is used to run the server and listen for incoming API requests on 0.0.0.0:8888 + uvicorn.run("main:app", host="0.0.0.0", port=8888, reload=True) diff --git a/ml_utils.py b/ml_utils.py index b11cdb56..bdd4dc83 100644 --- a/ml_utils.py +++ b/ml_utils.py @@ -3,29 +3,39 @@ from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score +# define a Gaussain NB classifier clf = GaussianNB() -classes = { - 0: "Iris Setosa", - 1: "Iris Versicolour", - 2: "Iris Virginica" -} +# define the class encodings and reverse encodings +classes = {0: "Iris Setosa", 1: "Iris Versicolour", 2: "Iris Virginica"} +r_classes = {y: x for x, y in classes.items()} +# function to train and load the model during startup def load_model(): - X, y = datasets.load_iris(return_X_y=True) - - X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2) - clf.fit(X_train, y_train) - - acc = accuracy_score(y_test, clf.predict(X_test)) - print(f"Model trained with accuracy: {round(acc, 3)}") - -def predict(query_data): - x = list(query_data.dict().values()) - prediction = clf.predict([x])[0] - print(f"Model prediction: {classes[prediction]}") - return classes[prediction] + # load the dataset from the official sklearn datasets + X, y = datasets.load_iris(return_X_y=True) + # do the test-train split and train the model + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + clf.fit(X_train, y_train) + # calculate the print the accuracy score + acc = accuracy_score(y_test, clf.predict(X_test)) + print(f"Model trained with accuracy: {round(acc, 3)}") +# function to predict the flower using the model +def predict(query_data): + x = list(query_data.dict().values()) + prediction = clf.predict([x])[0] + print(f"Model prediction: {classes[prediction]}") + return classes[prediction] + +# function to retrain the model as part of the feedback loop +def retrain(data): + # pull out the relevant X and y from the FeedbackIn object + X = [list(d.dict().values())[:-1] for d in data] + y = [r_classes[d.flower_class] for d in data] + + # fit the classifier again based on the new data obtained + clf.fit(X, y) diff --git a/test_app.py b/test_app.py index 44cb822b..b65fc902 100644 --- a/test_app.py +++ b/test_app.py @@ -1,21 +1,26 @@ from fastapi.testclient import TestClient from main import app - +# test to check the correct functioning of the /ping route def test_ping(): with TestClient(app) as client: response = client.get("/ping") + # asserting the correct response is received assert response.status_code == 200 - assert response.json() == {"ping":"pong"} + assert response.json() == {"ping": "pong"} + +# test to check if Iris Virginica is classified correctly def test_pred_virginica(): + # defining a sample payload for the testcase payload = { - "sepal_length": 3, - "sepal_width": 5, - "petal_length": 3.2, - "petal_width": 4.4 + "sepal_length": 3, + "sepal_width": 5, + "petal_length": 3.2, + "petal_width": 4.4, } with TestClient(app) as client: - response = client.post('/predict_flower', json=payload) + response = client.post("/predict_flower", json=payload) + # asserting the correct response is received assert response.status_code == 200 - assert response.json() == {'flower_class': "Iris Virginica"} \ No newline at end of file + assert response.json() == {"flower_class": "Iris Virginica"}