Skip to content

Commit

Permalink
feat: ollama-like ps endpoints
Browse files Browse the repository at this point in the history
  • Loading branch information
Fedir Zadniprovskyi authored and fedirz committed Sep 5, 2024
1 parent 87d5890 commit 220ad22
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions faster_whisper_server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import asyncio
from collections import OrderedDict
from contextlib import asynccontextmanager
import gc
from io import BytesIO
import time
from typing import TYPE_CHECKING, Annotated, Literal
Expand Down Expand Up @@ -107,6 +108,29 @@ def health() -> Response:
return Response(status_code=200, content="OK")


@app.get("/api/ps", tags=["experimental"], summary="Get a list of loaded models.")
def get_running_models() -> dict[str, list[str]]:
return {"models": list(loaded_models.keys())}


@app.post("/api/ps/{model_name:path}", tags=["experimental"], summary="Load a model into memory.")
def load_model_route(model_name: str) -> Response:
if model_name in loaded_models:
return Response(status_code=409, content="Model already loaded")
load_model(model_name)
return Response(status_code=201)


@app.delete("/api/ps/{model_name:path}", tags=["experimental"], summary="Unload a model from memory.")
def stop_running_model(model_name: str) -> Response:
model = loaded_models.get(model_name)
if model is not None:
del loaded_models[model_name]
gc.collect()
return Response(status_code=204)
return Response(status_code=404)


@app.get("/v1/models")
def get_models() -> ModelListResponse:
models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition", cardData=True)
Expand Down

0 comments on commit 220ad22

Please sign in to comment.