Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Memory footprint reduction and pdfium isolation #488

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
18 changes: 10 additions & 8 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ services:
- sys_nice
environment:
- CUDA_VISIBLE_DEVICES=-1
- DISABLE_FAST_API_ACCESS_LOGGING=true
- EMBEDDING_NIM_MODEL_NAME=${EMBEDDING_NIM_MODEL_NAME:-nvidia/llama-3.2-nv-embedqa-1b-v2}
- INGEST_LOG_LEVEL=DEFAULT
# Message client for development
Expand All @@ -210,27 +211,28 @@ services:
- NVIDIA_BUILD_API_KEY=${NVIDIA_BUILD_API_KEY:-${NGC_API_KEY:-ngcapikey}}
- OTEL_EXPORTER_OTLP_ENDPOINT=otel-collector:4317
# Self-hosted paddle endpoints.
- PADDLE_GRPC_ENDPOINT=paddle:8001
#- PADDLE_GRPC_ENDPOINT=paddle:8001
- PADDLE_HTTP_ENDPOINT=http://paddle:8000/v1/infer
- PADDLE_INFER_PROTOCOL=grpc
- PADDLE_INFER_PROTOCOL=http
# build.nvidia.com hosted paddle endpoints.
#- PADDLE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/baidu/paddleocr
#- PADDLE_INFER_PROTOCOL=http
- READY_CHECK_ALL_COMPONENTS=True
- REDIS_MORPHEUS_TASK_QUEUE=morpheus_task_queue
# Self-hosted redis endpoints.
- YOLOX_GRPC_ENDPOINT=yolox:8001
#- YOLOX_GRPC_ENDPOINT=yolox:8001
- YOLOX_HTTP_ENDPOINT=http://yolox:8000/v1/infer
- YOLOX_INFER_PROTOCOL=grpc
- YOLOX_INFER_PROTOCOL=http
# build.nvidia.com hosted yolox endpoints.
#- YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nv-yolox-page-elements-v1
#- YOLOX_INFER_PROTOCOL=http
- YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT=yolox-graphic-elements:8001
#- YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT=yolox-graphic-elements:8001
- YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=http://yolox-graphic-elements:8000/v1/infer
- YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=grpc
- YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT=yolox-table-structure:8001
- YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=http
#- YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT=yolox-table-structure:8001
- YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=http://yolox-table-structure:8000/v1/infer
- YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL=grpc
- YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL=http
- VLM_CAPTION_ENDPOINT=https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct/chat/completions
- VLM_CAPTION_ENDPOINT=http://vlm:8000/v1/chat/completions
- VLM_CAPTION_MODEL_NAME=meta/llama-3.2-11b-vision-instruct
healthcheck:
Expand Down
23 changes: 19 additions & 4 deletions docker/scripts/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,29 @@ SRC_FILE="/opt/docker/bin/entrypoint_source"

# Check if user supplied a command
if [ "$#" -gt 0 ]; then
# If a command is provided, run it
# If a command is provided, run it.
exec "$@"
else
# If no command is provided, run the default startup launch
# If no command is provided, run the default startup launch.
if [ "${MESSAGE_CLIENT_TYPE}" != "simple" ]; then
# Start uvicorn if MESSAGE_CLIENT_TYPE is not 'simple'
uvicorn nv_ingest.main:app --workers 32 --host 0.0.0.0 --port 7670 &
# Determine the log level for uvicorn.
log_level=$(echo "${INGEST_LOG_LEVEL:-default}" | tr '[:upper:]' '[:lower:]')
if [ "$log_level" = "default" ]; then
log_level="info"
fi

# Build the uvicorn command with the specified log level.
uvicorn_cmd="uvicorn nv_ingest.main:app --workers 32 --host 0.0.0.0 --port 7670 --log-level ${log_level}"

# If DISABLE_FAST_API_ACCESS_LOGGING is true, disable access logs.
if [ "${DISABLE_FAST_API_ACCESS_LOGGING}" == "true" ]; then
uvicorn_cmd="${uvicorn_cmd} --no-access-log"
fi

# Start uvicorn in the background.
$uvicorn_cmd &
fi

# Start the microservice entrypoint.
python /workspace/microservice_entrypoint.py
fi
Loading
Loading