From cf1ef75d83b120522ad6d5e1687e641e7d72e6a4 Mon Sep 17 00:00:00 2001 From: Tom Date: Wed, 15 Jan 2025 07:30:57 +0100 Subject: [PATCH] Migrate to fastapi and add model (#250) * migrate to fastapi and refactor * fix requirements * Refactor and add models * Refactor and cleanup --- src/backend/ml_api/README.md | 9 +- src/backend/ml_api/app.py | 52 ++++++ .../binary}/__init__.py | 0 .../{flaskr => }/classifiers/binary/base.py | 0 .../{flaskr => }/classifiers/binary/dummy.py | 0 .../classifiers/binary/fasttext_classifier.py | 2 +- .../prompt}/__init__.py | 0 .../classifiers/prompt/gpt_classifier.py | 0 .../binary => external_models}/__init__.py | 0 .../external_models/transformers_model.py | 27 +++ src/backend/ml_api/flaskr/app.py | 158 ------------------ src/backend/ml_api/requirements.txt | 6 +- .../prompt => routers}/__init__.py | 0 src/backend/ml_api/routers/classify.py | 33 ++++ src/backend/ml_api/routers/model.py | 11 ++ src/backend/ml_api/routers/question.py | 25 +++ src/backend/ml_api/routers/summarize.py | 25 +++ src/backend/ml_api/utils/__init__.py | 0 src/backend/ml_api/utils/device.py | 12 ++ src/backend/ml_api/utils/logging_config.py | 10 ++ 20 files changed, 206 insertions(+), 164 deletions(-) create mode 100644 src/backend/ml_api/app.py rename src/backend/ml_api/{flaskr => classifiers/binary}/__init__.py (100%) rename src/backend/ml_api/{flaskr => }/classifiers/binary/base.py (100%) rename src/backend/ml_api/{flaskr => }/classifiers/binary/dummy.py (100%) rename src/backend/ml_api/{flaskr => }/classifiers/binary/fasttext_classifier.py (97%) rename src/backend/ml_api/{flaskr/classifiers => classifiers/prompt}/__init__.py (100%) rename src/backend/ml_api/{flaskr => }/classifiers/prompt/gpt_classifier.py (100%) rename src/backend/ml_api/{flaskr/classifiers/binary => external_models}/__init__.py (100%) create mode 100644 src/backend/ml_api/external_models/transformers_model.py delete mode 100644 src/backend/ml_api/flaskr/app.py rename src/backend/ml_api/{flaskr/classifiers/prompt => routers}/__init__.py (100%) create mode 100644 src/backend/ml_api/routers/classify.py create mode 100644 src/backend/ml_api/routers/model.py create mode 100644 src/backend/ml_api/routers/question.py create mode 100644 src/backend/ml_api/routers/summarize.py create mode 100644 src/backend/ml_api/utils/__init__.py create mode 100644 src/backend/ml_api/utils/device.py create mode 100644 src/backend/ml_api/utils/logging_config.py diff --git a/src/backend/ml_api/README.md b/src/backend/ml_api/README.md index ee6b126..6fddaff 100644 --- a/src/backend/ml_api/README.md +++ b/src/backend/ml_api/README.md @@ -19,13 +19,16 @@ conda install pytorch torchvision torchaudio pytorch-cuda=11.6 -c pytorch -c nvi conda install conda-forge::fasttext ``` -Run the app: +Run the app from the *ml_api* directory: ``` -export FLASK_APP=app.py -flask run +uvicorn app:app ``` +API will be available at [127.0.0.1:8000](http://127.0.0.1:8000) + +Swagger UI will be available at [127.0.0.1:8000/docs](http://127.0.0.1:8000/docs) + Run tests: ``` diff --git a/src/backend/ml_api/app.py b/src/backend/ml_api/app.py new file mode 100644 index 0000000..e9205f8 --- /dev/null +++ b/src/backend/ml_api/app.py @@ -0,0 +1,52 @@ +from fastapi import FastAPI +from fastapi.responses import HTMLResponse +from routers import classify, summarize, question, model +from utils.logging_config import setup_logging + +setup_logging() + +app = FastAPI() + +app.include_router(classify.router, prefix="/classify", tags=["Classification"]) +app.include_router(summarize.router, prefix="/summarize", tags=["Summarization"]) +app.include_router(question.router, prefix="/question", tags=["Question Answering"]) +app.include_router(model.router, prefix="/model", tags=["Models"]) + +@app.get("/", response_class=HTMLResponse, include_in_schema=False) +async def index(): + return """ + + + + + +
+

Machine Learning API

+ Go to API Documentation +
+ + + """ diff --git a/src/backend/ml_api/flaskr/__init__.py b/src/backend/ml_api/classifiers/binary/__init__.py similarity index 100% rename from src/backend/ml_api/flaskr/__init__.py rename to src/backend/ml_api/classifiers/binary/__init__.py diff --git a/src/backend/ml_api/flaskr/classifiers/binary/base.py b/src/backend/ml_api/classifiers/binary/base.py similarity index 100% rename from src/backend/ml_api/flaskr/classifiers/binary/base.py rename to src/backend/ml_api/classifiers/binary/base.py diff --git a/src/backend/ml_api/flaskr/classifiers/binary/dummy.py b/src/backend/ml_api/classifiers/binary/dummy.py similarity index 100% rename from src/backend/ml_api/flaskr/classifiers/binary/dummy.py rename to src/backend/ml_api/classifiers/binary/dummy.py diff --git a/src/backend/ml_api/flaskr/classifiers/binary/fasttext_classifier.py b/src/backend/ml_api/classifiers/binary/fasttext_classifier.py similarity index 97% rename from src/backend/ml_api/flaskr/classifiers/binary/fasttext_classifier.py rename to src/backend/ml_api/classifiers/binary/fasttext_classifier.py index c182e75..b3ab238 100644 --- a/src/backend/ml_api/flaskr/classifiers/binary/fasttext_classifier.py +++ b/src/backend/ml_api/classifiers/binary/fasttext_classifier.py @@ -6,7 +6,7 @@ import re import fasttext -from flaskr.classifiers.binary.base import BaseClassifier +from classifiers.binary.base import BaseClassifier def write_temp_fasttext_train_file( diff --git a/src/backend/ml_api/flaskr/classifiers/__init__.py b/src/backend/ml_api/classifiers/prompt/__init__.py similarity index 100% rename from src/backend/ml_api/flaskr/classifiers/__init__.py rename to src/backend/ml_api/classifiers/prompt/__init__.py diff --git a/src/backend/ml_api/flaskr/classifiers/prompt/gpt_classifier.py b/src/backend/ml_api/classifiers/prompt/gpt_classifier.py similarity index 100% rename from src/backend/ml_api/flaskr/classifiers/prompt/gpt_classifier.py rename to src/backend/ml_api/classifiers/prompt/gpt_classifier.py diff --git a/src/backend/ml_api/flaskr/classifiers/binary/__init__.py b/src/backend/ml_api/external_models/__init__.py similarity index 100% rename from src/backend/ml_api/flaskr/classifiers/binary/__init__.py rename to src/backend/ml_api/external_models/__init__.py diff --git a/src/backend/ml_api/external_models/transformers_model.py b/src/backend/ml_api/external_models/transformers_model.py new file mode 100644 index 0000000..1de7d43 --- /dev/null +++ b/src/backend/ml_api/external_models/transformers_model.py @@ -0,0 +1,27 @@ +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM +from utils.device import get_device +import logging + +class TransformersModel: + def __init__(self, model_name: str): + self.device = get_device() + logging.info("Loading model and tokenizer...") + + self.tokenizer = AutoTokenizer.from_pretrained(model_name) + logging.info("Tokenizer loaded") + + self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name) + self.model.to(self.device) + logging.info("Model loaded") + + def generate_response(self, text: str, max_length=32, num_beams=4): + inputs = self.tokenizer.encode(text, return_tensors="pt").to(self.device) + outputs = self.model.generate(inputs, max_length=max_length, num_beams=num_beams, early_stopping=True) + response = self.tokenizer.decode(outputs[0].to("cpu")) + return response.replace("", "").replace("", "").replace("", "").strip() + +models = { + "bigscience/T0_3B" : TransformersModel("bigscience/T0_3B"), + "google/flan-t5-small" : TransformersModel("google/flan-t5-small"), + "geektech/flan-t5-base-gpt4-relation" : TransformersModel("geektech/flan-t5-base-gpt4-relation"), +} \ No newline at end of file diff --git a/src/backend/ml_api/flaskr/app.py b/src/backend/ml_api/flaskr/app.py deleted file mode 100644 index d27365d..0000000 --- a/src/backend/ml_api/flaskr/app.py +++ /dev/null @@ -1,158 +0,0 @@ -import json -import logging -from typing import Dict - -import numpy as np -from flask import Flask, request -from transformers import AutoTokenizer, AutoModelForSeq2SeqLM - -from flaskr.classifiers.binary.fasttext_classifier import FastTextClassifier - -app = Flask(__name__) - -logging.basicConfig( - level=logging.DEBUG, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - filename="app.log", filemode='a' -) -logging.info("Loading model and tokenizer...") - -tokenizer = AutoTokenizer.from_pretrained("bigscience/T0_3B") -logging.info("Tokenizer loaded") - -model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0_3B") -model.to("cuda") -logging.info("Model loaded") - - -def get_response(text: str) -> str: - inputs = tokenizer.encode(text, return_tensors="pt") - inputs = inputs.to("cuda") - outputs = model.generate(inputs, max_length=32, num_beams=4, early_stopping=True) - out_text = tokenizer.decode(outputs[0].to("cpu")) - - out_text = out_text.replace("", "") - out_text = out_text.replace("", "") - out_text = out_text.replace("", "") - return out_text.strip() - -@app.route("/") -def index(): - page = """ - - - Text to text models server - - - -

Machine learning classification models server

-

Binary models

-

Route: /classify

-

Methods allowed: POST

-

POST data: { - "xy_train": { - "1": {"title": "title 1", "decision": "Label1"}, - "2": {"title": "title 2", "decision": "Label2"} - }, - "x_pred": { - "1": {"title": "Prediction text 1"}, - "2": {"title": "Prediction text 2"} - } - }

-

POST response: { - "y_pred": {"status": "OK", "predictions": [{"probability": 0.9, "label": 1}, {"probability": 0.1, "label": 0}]}, - "algorithm_id": "FastTextClassifier object at ..." - }

-

Prompt-based models

-

Route: /summarize

-

Methods allowed: POST

-

POST data: {"text": "text to be summarized"}

-

POST response: {"response": "summary of the text"}

-
-

Route: /question

-

Methods allowed: POST

-

POST data: {"text": "Your question"}

-

POST response: {"response": "Models answer", "status": "status_string"}

-
-

Models supported: bigscience/T0_3B

- - - """ - return page - - -@app.route("/classify", methods=["POST"]) -def classify() -> Dict: - if request.method == "POST": - try: - in_data = request.get_json() - # save to file in_data - with open("in_data.json", "w") as f: - f.write(str(in_data)) - - - review_id = in_data["review_id"] - xy_train = in_data["xy_train"] - x_pred = in_data["x_pred"] - - algorithm_object = FastTextClassifier() - algorithm_object.train( - input_data=[x["title"] for x in xy_train.values()], - true_labels=[x["decision"] for x in xy_train.values()], - ) - y_pred = algorithm_object.predict([x["title"] for x in x_pred.values()]) - response = {"y_pred": y_pred, "algorithm_id": str(algorithm_object)} - except Exception as e: - response = str(e) - else: - response = "Only POST allowed" - return json.dumps(response, default=numpy_encoder) - -def numpy_encoder(obj): - if isinstance(obj, np.generic): - return obj.item() # Converts np.float32 to float - raise TypeError("Type not serializable") - -@app.route("/summarize", methods=["POST"]) -def summarize() -> Dict: - if request.method == "POST": - in_data = request.get_json() - text = in_data["text"] - out_text = get_response(f"Summarise the following text: {text}") - else: - out_text = "Only POST allowed" - return {"results": out_text} - - -@app.route("/question", methods=["POST"]) -def question() -> Dict[str, str]: - if request.method != "POST": - return {"response": "Only POST allowed", "status": "ERROR"} - - text = request.json["text"] - try: - response = get_response(text) - logging.debug("text: %s, response: %s", text, response) - status = "OK" - except Exception as e: - logging.error(e) - logging.error(text) - response = "Error" - status = "ERROR" - return {"response": response, "status": status} - -logging.info("App started") -logging.info("Waiting for requests at /search") diff --git a/src/backend/ml_api/requirements.txt b/src/backend/ml_api/requirements.txt index fddaeb8..6ef1d5b 100644 --- a/src/backend/ml_api/requirements.txt +++ b/src/backend/ml_api/requirements.txt @@ -1,9 +1,11 @@ transformers==4.34.0 -flask==3.0.0 +torch==2.5.1 requests==2.31.0 sentencepiece==0.1.97 gunicorn==21.2.0 protobuf==4.24.4 pytest==8.3.3 openai==1.55.1 -scikit-learn==1.3.2 \ No newline at end of file +scikit-learn==1.3.2 +numpy==1.24.4 +fastapi[standard]==0.115.6 \ No newline at end of file diff --git a/src/backend/ml_api/flaskr/classifiers/prompt/__init__.py b/src/backend/ml_api/routers/__init__.py similarity index 100% rename from src/backend/ml_api/flaskr/classifiers/prompt/__init__.py rename to src/backend/ml_api/routers/__init__.py diff --git a/src/backend/ml_api/routers/classify.py b/src/backend/ml_api/routers/classify.py new file mode 100644 index 0000000..352cc93 --- /dev/null +++ b/src/backend/ml_api/routers/classify.py @@ -0,0 +1,33 @@ +import logging +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel +from typing import Dict +from classifiers.binary.fasttext_classifier import FastTextClassifier + +router = APIRouter() + +class ClassifyInput(BaseModel): + xy_train: Dict[str, Dict[str, str]] + x_pred: Dict[str, Dict[str, str]] + +class ClassifyOutput(BaseModel): + y_pred: Dict[str, str] + algorithm_id: str + +@router.post("/") +async def classify(data: ClassifyInput) -> ClassifyOutput: + try: + xy_train = data.xy_train + x_pred = data.x_pred + + algorithm = FastTextClassifier() + algorithm.train( + input_data=[x["title"] for x in xy_train.values()], + true_labels=[x["decision"] for x in xy_train.values()], + ) + + predictions = algorithm.predict([x["title"] for x in x_pred.values()]) + return ClassifyOutput(y_pred=predictions, algorithm_id="FastText") + except Exception as e: + logging.error(f"Error during classification: {e}") + raise HTTPException(status_code=503, detail="Service Unavailable: Unable to process the request.") diff --git a/src/backend/ml_api/routers/model.py b/src/backend/ml_api/routers/model.py new file mode 100644 index 0000000..0852023 --- /dev/null +++ b/src/backend/ml_api/routers/model.py @@ -0,0 +1,11 @@ +from fastapi import APIRouter +from pydantic import BaseModel +from external_models.transformers_model import models + +router = APIRouter() +class GetModelsResponse(BaseModel): + models: list + +@router.get("/") +async def get_models() -> GetModelsResponse: + return GetModelsResponse(models=list(models.keys())) \ No newline at end of file diff --git a/src/backend/ml_api/routers/question.py b/src/backend/ml_api/routers/question.py new file mode 100644 index 0000000..44fa1b6 --- /dev/null +++ b/src/backend/ml_api/routers/question.py @@ -0,0 +1,25 @@ +import logging +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel +from external_models.transformers_model import models +from typing import Dict + +router = APIRouter() + +class QuestionInput(BaseModel): + text: str + model: str + +class QuestionResponse(BaseModel): + response: str + +@router.post("/") +async def question(data: QuestionInput) -> QuestionResponse: + try: + model = models[data.model] + response = model.generate_response(data.text) + logging.debug("text: %s, response: %s", data.text, response) + return QuestionResponse(response=response) + except Exception as e: + logging.error(f"Error during question answering: {e}") + raise HTTPException(status_code=503, detail="Service Unavailable: Unable to process the request.") diff --git a/src/backend/ml_api/routers/summarize.py b/src/backend/ml_api/routers/summarize.py new file mode 100644 index 0000000..4c21f5e --- /dev/null +++ b/src/backend/ml_api/routers/summarize.py @@ -0,0 +1,25 @@ +import logging +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel +from external_models.transformers_model import models +from typing import Dict + +router = APIRouter() + +class SummarizeInputRequest(BaseModel): + text: str + model: str + +class SummarizeOutputResponse(BaseModel): + response: str + +@router.post("/") +async def summarize(data: SummarizeInputRequest) -> SummarizeOutputResponse: + try: + model = models[data.model] + summary = model.generate_response(f"Summarize the following text: {data.text}") + return SummarizeOutputResponse(response=summary) + except Exception as e: + logging.error(f"Error during summarization: {e}") + raise HTTPException(status_code=503, detail="Service Unavailable: Unable to process the request.") + diff --git a/src/backend/ml_api/utils/__init__.py b/src/backend/ml_api/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/backend/ml_api/utils/device.py b/src/backend/ml_api/utils/device.py new file mode 100644 index 0000000..16df013 --- /dev/null +++ b/src/backend/ml_api/utils/device.py @@ -0,0 +1,12 @@ +import torch +import logging + +def get_device(): + if torch.cuda.is_available(): + logging.info("CUDA is available.") + return torch.device("cuda") + elif torch.backends.mps.is_available() and torch.backends.mps.is_built(): + logging.info("MPS is available.") + return torch.device("mps") + logging.info("CUDA and MPS are not available. Using CPU.") + return torch.device("cpu") diff --git a/src/backend/ml_api/utils/logging_config.py b/src/backend/ml_api/utils/logging_config.py new file mode 100644 index 0000000..dd21bf4 --- /dev/null +++ b/src/backend/ml_api/utils/logging_config.py @@ -0,0 +1,10 @@ +import logging + +def setup_logging(): + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + filename="app.log", + filemode="a", + ) + logging.info("Logging is configured.")