Skip to content

Commit

Permalink
Replace Cached with Yolox-graphic-elements and drop Deplot (#365)
Browse files Browse the repository at this point in the history
Co-authored-by: Jeremy Dyer <[email protected]>
  • Loading branch information
edknv and jdye64 authored Feb 13, 2025
1 parent b6ba1a0 commit 32a9021
Show file tree
Hide file tree
Showing 21 changed files with 931 additions and 728 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All rights reserved.
SPDX-License-Identifier: Apache-2.0
-->

> [!Note]
> Cached and Deplot are deprecated, docker-compose now points to a beta version of the yolox-graphic-elements container instead. That model and container is slated for full release in March.
> With this change, you should now be able to run on a single 80GB A100 or H100 GPU.
> If you want to continue using the old pipeline with Cached and Deplot, please use the [24.12.1 release](https://github.com/NVIDIA/nv-ingest/tree/24.12.1).
## NVIDIA-Ingest: Multi-modal data extraction

Expand Down Expand Up @@ -44,8 +48,8 @@ A service that:

| GPU | Family | Memory | # of GPUs (min.) |
| ------ | ------ | ------ | ------ |
| H100 | SXM or PCIe | 80GB | 2 |
| A100 | SXM or PCIe | 80GB | 2 |
| H100 | SXM or PCIe | 80GB | 1 |
| A100 | SXM or PCIe | 80GB | 1 |

### Software

Expand Down
5 changes: 4 additions & 1 deletion client/src/nv_ingest_client/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,13 @@ def __init__(
self._message_client_hostname = message_client_hostname or "localhost"
self._message_client_port = message_client_port or 7670
self._message_counter_id = msg_counter_id or "nv-ingest-message-id"
self._message_client_kwargs = message_client_kwargs or {}

logger.debug("Instantiate NvIngestClient:\n%s", str(self))
self._message_client = message_client_allocator(
host=self._message_client_hostname, port=self._message_client_port
host=self._message_client_hostname,
port=self._message_client_port,
**self._message_client_kwargs,
)

# Initialize the worker pool with the specified size
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def fetch_message(self, job_id: str, timeout: float = 10) -> ResponseSchema:
except RuntimeError as rte:
raise rte

except requests.HTTPError as err:
except (ConnectionError, requests.HTTPError, requests.exceptions.ConnectionError) as err:
logger.error(f"Error during fetching, retrying... Error: {err}")
self._client = None # Invalidate client to force reconnection
try:
Expand Down
56 changes: 12 additions & 44 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ services:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
device_ids: ["0"]
capabilities: [gpu]
runtime: nvidia

deplot:
image: ${DEPLOT_IMAGE:-nvcr.io/nvidia/nemo-microservices/deplot}:${DEPLOT_TAG:-1.0.0}
yolox-graphic-elements:
image: ${YOLOX_GRAPHIC_ELEMENTS_IMAGE:-nvcr.io/nvidia/nemo-microservices/nemoretriever-graphic-elements-v1}:${YOLOX_GRAPHIC_ELEMENTS_TAG:-1.1}
ports:
- "8003:8000"
- "8004:8001"
Expand All @@ -59,28 +59,6 @@ services:
capabilities: [gpu]
runtime: nvidia

cached:
image: ${CACHED_IMAGE:-nvcr.io/nvidia/nemo-microservices/cached}:${CACHED_TAG:-0.2.1}
shm_size: 2gb
ports:
- "8006:8000"
- "8007:8001"
- "8008:8002"
user: root
environment:
- NIM_HTTP_API_PORT=8000
- NIM_TRITON_LOG_VERBOSE=1
- NGC_API_KEY=${NIM_NGC_API_KEY:-${NGC_API_KEY:-ngcapikey}}
- CUDA_VISIBLE_DEVICES=0
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
capabilities: [gpu]
runtime: nvidia

paddle:
image: ${PADDLE_IMAGE:-nvcr.io/nvidia/nemo-microservices/paddleocr}:${PADDLE_TAG:-1.0.0}
shm_size: 2gb
Expand All @@ -99,13 +77,13 @@ services:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
device_ids: ["0"]
capabilities: [gpu]
runtime: nvidia

embedding:
# NIM ON
image: ${EMBEDDING_IMAGE:-nvcr.io/nim/nvidia/nv-embedqa-e5-v5}:${EMBEDDING_TAG:-1.1.0}
image: ${EMBEDDING_IMAGE:-nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2}:${EMBEDDING_TAG:-1.3.0}
shm_size: 16gb
ports:
- "8012:8000"
Expand All @@ -121,7 +99,7 @@ services:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
device_ids: ["0"]
capabilities: [gpu]
runtime: nvidia

Expand All @@ -141,22 +119,9 @@ services:
cap_add:
- sys_nice
environment:
# Self-hosted cached endpoints.
- CACHED_GRPC_ENDPOINT=cached:8001
- CACHED_HTTP_ENDPOINT=http://cached:8000/v1/infer
- CACHED_INFER_PROTOCOL=grpc
# build.nvidia.com hosted cached endpoints.
#- CACHED_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/university-at-buffalo/cached
#- CACHED_INFER_PROTOCOL=http
- CUDA_VISIBLE_DEVICES=0
#- DEPLOT_GRPC_ENDPOINT=""
# Self-hosted deplot endpoints.
- DEPLOT_HTTP_ENDPOINT=http://deplot:8000/v1/chat/completions
# build.nvidia.com hosted deplot
#- DEPLOT_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/vlm/google/deplot
- DEPLOT_INFER_PROTOCOL=http
- DOUGHNUT_GRPC_TRITON=triton-doughnut:8001
- EMBEDDING_NIM_MODEL_NAME=${EMBEDDING_NIM_MODEL_NAME:-nvidia/nv-embedqa-e5-v5}
- EMBEDDING_NIM_MODEL_NAME=${EMBEDDING_NIM_MODEL_NAME:-nvidia/llama-3.2-nv-embedqa-1b-v2}
- INGEST_LOG_LEVEL=DEFAULT
# Message client for development
#- MESSAGE_CLIENT_HOST=0.0.0.0
Expand Down Expand Up @@ -187,6 +152,9 @@ services:
# build.nvidia.com hosted yolox endpoints.
#- YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nv-yolox-page-elements-v1
#- YOLOX_INFER_PROTOCOL=http
- YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT=yolox-graphic-elements:8001
- YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=http://yolox-graphic-elements:8000/v1/infer
- YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=grpc
- VLM_CAPTION_ENDPOINT=https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions
- VLM_CAPTION_MODEL_NAME=meta/llama-3.2-90b-vision-instruct
healthcheck:
Expand All @@ -199,7 +167,7 @@ services:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
device_ids: ["0"]
capabilities: [gpu]

otel-collector:
Expand Down Expand Up @@ -321,7 +289,7 @@ services:
reservations:
devices:
- driver: nvidia
device_ids: ["1"]
device_ids: ["0"]
capabilities: [gpu]
depends_on:
- "etcd"
Expand Down
20 changes: 13 additions & 7 deletions src/nv_ingest/api/v1/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,20 +64,26 @@ async def get_ready_state() -> dict:
# We give the users an option to disable checking all distributed services for "readiness"
check_all_components = os.getenv("READY_CHECK_ALL_COMPONENTS", "True").lower()
if check_all_components in ["1", "true", "yes"]:
yolox_ready = is_ready(os.getenv("YOLOX_HTTP_ENDPOINT", None), "/v1/health/ready")
deplot_ready = is_ready(os.getenv("DEPLOT_HTTP_ENDPOINT", None), "/v1/health/ready")
cached_ready = is_ready(os.getenv("CACHED_HTTP_ENDPOINT", None), "/v1/health/ready")
yolox_page_elements_ready = is_ready(os.getenv("YOLOX_HTTP_ENDPOINT", None), "/v1/health/ready")
yolox_graphic_elements_ready = is_ready(
os.getenv("YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT", None), "/v1/health/ready"
)
paddle_ready = is_ready(os.getenv("PADDLE_HTTP_ENDPOINT", None), "/v1/health/ready")

if ingest_ready and morpheus_pipeline_ready and yolox_ready and deplot_ready and cached_ready and paddle_ready:
if (
ingest_ready
and morpheus_pipeline_ready
and yolox_page_elements_ready
and yolox_graphic_elements_ready
and paddle_ready
):
return JSONResponse(content={"ready": True}, status_code=200)
else:
ready_statuses = {
"ingest_ready": ingest_ready,
"morpheus_pipeline_ready": morpheus_pipeline_ready,
"yolox_ready": yolox_ready,
"deplot_ready": deplot_ready,
"cached_ready": cached_ready,
"yolox_page_elemenst_ready": yolox_page_elements_ready,
"yolox_graphic_elements_ready": yolox_graphic_elements_ready,
"paddle_ready": paddle_ready,
}
logger.debug(f"Ready Statuses: {ready_statuses}")
Expand Down
2 changes: 1 addition & 1 deletion src/nv_ingest/extraction_workflows/image/image_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def extract_table_and_chart_images(
objects = annotation_dict[label]
for idx, bboxes in enumerate(objects):
*bbox, _ = bboxes
h1, w1, h2, w2 = np.array(bbox) * np.array([height, width, height, width])
h1, w1, h2, w2 = bbox

base64_img = crop_image(original_image, (int(h1), int(w1), int(h2), int(w2)))

Expand Down
6 changes: 3 additions & 3 deletions src/nv_ingest/extraction_workflows/pdf/pdfium_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,15 +173,15 @@ def extract_table_and_chart_images(
objects = annotation_dict[label]
for idx, bboxes in enumerate(objects):
*bbox, _ = bboxes
h1, w1, h2, w2 = bbox * np.array([height, width, height, width])
h1, w1, h2, w2 = bbox

cropped = crop_image(original_image, (h1, w1, h2, w2))
cropped = crop_image(original_image, (int(h1), int(w1), int(h2), int(w2)))
base64_img = numpy_to_base64(cropped)

table_data = CroppedImageWithContent(
content="",
image=base64_img,
bbox=(w1, h1, w2, h2),
bbox=(int(w1), int(h1), int(w2), int(h2)),
max_width=width,
max_height=height,
type_string=label,
Expand Down
20 changes: 6 additions & 14 deletions src/nv_ingest/schemas/chart_extractor_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,8 @@ class ChartExtractorConfigSchema(BaseModel):
auth_token : Optional[str], default=None
Authentication token required for secure services.
cached_endpoints : Tuple[Optional[str], Optional[str]], default=(None, None)
A tuple containing the gRPC and HTTP services for the cached endpoint.
Either the gRPC or HTTP service can be empty, but not both.
deplot_endpoints : Tuple[Optional[str], Optional[str]], default=(None, None)
A tuple containing the gRPC and HTTP services for the deplot endpoint.
yolox_endpoints : Tuple[Optional[str], Optional[str]], default=(None, None)
A tuple containing the gRPC and HTTP services for the yolox endpoint.
Either the gRPC or HTTP service can be empty, but not both.
paddle_endpoints : Tuple[Optional[str], Optional[str]], default=(None, None)
Expand All @@ -50,13 +46,9 @@ class ChartExtractorConfigSchema(BaseModel):

auth_token: Optional[str] = None

cached_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
cached_infer_protocol: str = ""

deplot_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
deplot_infer_protocol: str = ""
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
yolox_infer_protocol: str = ""

## NOTE: Paddle isn't currently called independently of the cached NIM, but will be in the future.
paddle_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
paddle_infer_protocol: str = ""

Expand Down Expand Up @@ -94,7 +86,7 @@ def clean_service(service):
return None
return service

for endpoint_name in ["cached_endpoints", "deplot_endpoints", "paddle_endpoints"]:
for endpoint_name in ["yolox_endpoints", "paddle_endpoints"]:
grpc_service, http_service = values.get(endpoint_name, (None, None))
grpc_service = clean_service(grpc_service)
http_service = clean_service(http_service)
Expand Down Expand Up @@ -125,7 +117,7 @@ class ChartExtractorSchema(BaseModel):
A flag indicating whether to raise an exception if a failure occurs during chart extraction.
stage_config : Optional[ChartExtractorConfigSchema], default=None
Configuration for the chart extraction stage, including cached, deplot, and paddle service endpoints.
Configuration for the chart extraction stage, including yolox and paddle service endpoints.
"""

max_queue_size: int = 1
Expand Down
Loading

0 comments on commit 32a9021

Please sign in to comment.