Skip to content

Commit

Permalink
Return XML using the API container
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-piles committed Jul 18, 2024
1 parent 6676d7f commit 2afc64b
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 2 deletions.
2 changes: 2 additions & 0 deletions docker-compose-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ services:
- "5060:5060"
networks:
- network-pdf-layout
volumes:
- data:/app/xmls

mongo-pdf-layout:
container_name: "mongo-pdf-layout"
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ services:
- driver: nvidia
count: 1
capabilities: [ gpu ]
volumes:
- data:/app/xmls

mongo-pdf-layout:
container_name: "mongo-pdf-layout"
Expand Down
3 changes: 1 addition & 2 deletions src/QueueProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,13 @@ def process(self, id, message, rc, ts):
xml_file_name = get_xml_name(task)
extraction_data = extract_segments(task, xml_file_name)
service_url = f"{SERVICE_HOST}:{SERVICE_PORT}"
get_xml_url = f"{SERVICE_HOST}:{DOCUMENT_LAYOUT_ANALYSIS_PORT}"
extraction_message = ResultMessage(
tenant=extraction_data.tenant,
task=task.task,
params=task.params,
success=True,
data_url=f"{service_url}/get_paragraphs/{task.tenant}/{task.params.filename}",
file_url=f"{get_xml_url}/get_xml/{xml_file_name}",
file_url=f"{service_url}/get_xml/{xml_file_name}",
)

extraction_data_json = extraction_data.model_dump_json()
Expand Down
8 changes: 8 additions & 0 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
from sentry_sdk.integrations.asgi import SentryAsgiMiddleware
import sentry_sdk
from starlette.concurrency import run_in_threadpool
from starlette.responses import PlainTextResponse

from catch_exceptions import catch_exceptions
from configuration import MONGO_HOST, MONGO_PORT, service_logger
from PdfFile import PdfFile
from get_paragraphs import get_paragraphs
from get_xml import get_xml
from run import extract_segments_from_file


Expand Down Expand Up @@ -69,3 +71,9 @@ async def async_extraction(tenant, file: UploadFile = File(...)):
@catch_exceptions
async def get_paragraphs_endpoint(tenant: str, pdf_file_name: str):
return await run_in_threadpool(get_paragraphs, app.mongodb_client, tenant, pdf_file_name)


@app.get("/get_xml/{xml_file_name}", response_class=PlainTextResponse)
@catch_exceptions
async def get_xml_by_name(xml_file_name: str):
return await run_in_threadpool(get_xml, xml_file_name)
14 changes: 14 additions & 0 deletions src/get_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import os
from os.path import join
from pathlib import Path

from configuration import DATA_PATH


def get_xml(xml_file_name: str) -> str:
xml_file_path = Path(join(DATA_PATH, xml_file_name))

with open(xml_file_path, mode="r") as file:
content = file.read()
os.remove(xml_file_path)
return content

0 comments on commit 2afc64b

Please sign in to comment.