Skip to content

Commit

Permalink
Make endpoints asynchronous
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-piles committed Jul 16, 2024
1 parent 280d964 commit 6676d7f
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 22 deletions.
2 changes: 1 addition & 1 deletion docker-compose-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ services:
worker-pdf-layout:
container_name: "worker-pdf-layout"
entrypoint: [ "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "--chdir", "./src", "app:app", "--bind", "0.0.0.0:5060", "--timeout", "10000"]
image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.6
image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.7
init: true
restart: unless-stopped
ports:
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ services:
worker-pdf-layout-gpu:
container_name: "worker-pdf-layout-gpu"
entrypoint: [ "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "--chdir", "./src", "app:app", "--bind", "0.0.0.0:5060", "--timeout", "10000"]
image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.6
image: ghcr.io/huridocs/pdf-document-layout-analysis:0.0.7
init: true
restart: unless-stopped
network_mode: host
Expand Down
1 change: 0 additions & 1 deletion src/QueueProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from sentry_sdk.integrations.redis import RedisIntegration
import sentry_sdk

from PdfFile import PdfFile
from configuration import (
MONGO_HOST,
MONGO_PORT,
Expand Down
26 changes: 7 additions & 19 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@

from sentry_sdk.integrations.asgi import SentryAsgiMiddleware
import sentry_sdk
from starlette.concurrency import run_in_threadpool

from catch_exceptions import catch_exceptions
from configuration import MONGO_HOST, MONGO_PORT, service_logger
from data_model.ExtractionData import ExtractionData
from PdfFile import PdfFile
from data_model.Params import Params
from data_model.Task import Task
from extract_segments import extract_segments
from get_paragraphs import get_paragraphs
from run import extract_segments_from_file


@asynccontextmanager
Expand Down Expand Up @@ -54,30 +53,19 @@ async def error():
@app.post("/")
@catch_exceptions
async def post_extract_paragraphs(file: UploadFile):
filename = file.filename
default_tenant = "default"
task = Task(tenant=default_tenant, task="extract_segments", params=Params(filename=filename))
PdfFile(default_tenant).save(pdf_file_name=filename, file=file.file.read())
extraction_data = extract_segments(task)
return extraction_data.paragraphs
return await run_in_threadpool(extract_segments_from_file, file)


@app.post("/async_extraction/{tenant}")
@catch_exceptions
async def async_extraction(tenant, file: UploadFile = File(...)):
filename = file.filename
pdf_file = PdfFile(tenant)
pdf_file.save(pdf_file_name=filename, file=file.file.read())
await run_in_threadpool(pdf_file.save, filename, file.file.read())
return "task registered"


@app.get("/get_paragraphs/{tenant}/{pdf_file_name}")
@catch_exceptions
async def get_paragraphs(tenant: str, pdf_file_name: str):
suggestions_filter = {"tenant": tenant, "file_name": pdf_file_name}
pdf_paragraph_db = app.mongodb_client["pdf_paragraph"]
extraction_data_dict = pdf_paragraph_db.paragraphs.find_one(suggestions_filter)
pdf_paragraph_db.paragraphs.delete_many(suggestions_filter)

extraction_data = ExtractionData(**extraction_data_dict)
return extraction_data.model_dump_json()
async def get_paragraphs_endpoint(tenant: str, pdf_file_name: str):
return await run_in_threadpool(get_paragraphs, app.mongodb_client, tenant, pdf_file_name)
11 changes: 11 additions & 0 deletions src/get_paragraphs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from data_model.ExtractionData import ExtractionData


def get_paragraphs(mongodb_client, tenant, pdf_file_name):
suggestions_filter = {"tenant": tenant, "file_name": pdf_file_name}
pdf_paragraph_db = mongodb_client["pdf_paragraph"]
extraction_data_dict = pdf_paragraph_db.paragraphs.find_one(suggestions_filter)
pdf_paragraph_db.paragraphs.delete_many(suggestions_filter)

extraction_data = ExtractionData(**extraction_data_dict)
return extraction_data.model_dump_json()
13 changes: 13 additions & 0 deletions src/run.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
from fastapi import UploadFile
from rsmq import RedisSMQ

from PdfFile import PdfFile
from data_model.Params import Params
from data_model.Task import Task
from extract_segments import extract_segments


def extract_segments_from_file(file: UploadFile):
filename = file.filename
default_tenant = "default"
task = Task(tenant=default_tenant, task="extract_segments", params=Params(filename=filename))
PdfFile(default_tenant).save(pdf_file_name=filename, file=file.file.read())
extraction_data = extract_segments(task)
return extraction_data.paragraphs


if __name__ == "__main__":
extractions_tasks_queue = RedisSMQ(
Expand Down

0 comments on commit 6676d7f

Please sign in to comment.