diff --git a/helm/Chart.lock b/helm/Chart.lock deleted file mode 100644 index 1a6670d5..00000000 --- a/helm/Chart.lock +++ /dev/null @@ -1,36 +0,0 @@ -dependencies: -- name: common - repository: oci://registry-1.docker.io/bitnamicharts - version: 2.30.0 -- name: redis - repository: oci://registry-1.docker.io/bitnamicharts - version: 19.1.3 -- name: zipkin - repository: https://zipkin.io/zipkin-helm - version: 0.1.2 -- name: opentelemetry-collector - repository: https://open-telemetry.github.io/opentelemetry-helm-charts - version: 0.78.1 -- name: yolox-nim - repository: https://helm.ngc.nvidia.com/nvidia/nemo-microservices - version: 0.2.0 -- name: cached-nim - repository: https://helm.ngc.nvidia.com/nvidia/nemo-microservices - version: 0.2.0 -- name: paddleocr-nim - repository: https://helm.ngc.nvidia.com/nvidia/nemo-microservices - version: 0.2.0 -- name: deplot-nim - repository: https://helm.ngc.nvidia.com/nvidia/nemo-microservices - version: 0.1.12 -- name: text-embedding-nim - repository: https://helm.ngc.nvidia.com/nim/nvidia - version: 1.1.0 -- name: nvidia-nim-llama-32-nv-embedqa-1b-v2 - repository: https://helm.ngc.nvidia.com/nim/nvidia - version: 1.3.0 -- name: milvus - repository: https://zilliztech.github.io/milvus-helm - version: 4.1.11 -digest: sha256:6571e9d143f8b94f4cfa0e1edca3f2ac23189c07315f4908ecb6af80c5901cb4 -generated: "2025-02-19T11:00:07.532041109-05:00" diff --git a/helm/Chart.yaml b/helm/Chart.yaml index f8b0c150..c6711c28 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -2,55 +2,86 @@ apiVersion: v2 name: nv-ingest description: NV-Ingest Microservice type: application -version: 0.4.0 +version: 0.4.2 maintainers: - name: NVIDIA Corporation url: https://www.nvidia.com/ dependencies: - - name: common - repository: oci://registry-1.docker.io/bitnamicharts - tags: - - bitnami-common - version: 2.x.x - - name: redis - repository: oci://registry-1.docker.io/bitnamicharts - tags: - - redis - version: 19.1.3 - condition: redisDeployed - - name: zipkin - repository: https://zipkin.io/zipkin-helm - version: 0.1.2 - condition: zipkinDeployed - - name: opentelemetry-collector - repository: https://open-telemetry.github.io/opentelemetry-helm-charts - version: 0.78.1 - condition: otelDeployed + # Yolox NIM - name: yolox-nim repository: "alias:nemo-microservices" version: 0.2.0 - condition: yoloxDeployed - - name: cached-nim + condition: yolox-nim.deployed + + # Yolox base architecture variant NIM for extracting Graphic Elements from images + - name: nvidia-nim-nemoretriever-graphic-elements-v1 repository: "alias:nemo-microservices" - version: 0.2.0 - condition: cachedDeployed + version: 1.2.0 + condition: nvidia-nim-nemoretriever-graphic-elements-v1.deployed + + # Yolox base architecture variant NIM for extracting Table Structures from images + - name: nvidia-nim-nemoretriever-table-structure-v1 + repository: "alias:nvstaging-nim" + version: 1.1.0 + condition: nvidia-nim-nemoretriever-table-structure-v1.deployed + + # # VLM for image captioning. + # # This same chart can power several different underlying VLM models. Configure in values.yaml + # - name: nim-vlm + # repository: "alias:nvstaging-nim" + # version: 1.2.0-ea-v4 + # alias: nim-vlm-image-captioning + # condition: nim-vlm-image-captioning.deployed + + # VLM for text extraction. Same helm chart as image captioning VLM with different NIM engine; eclair vs llama vision + - name: nim-vlm + repository: "alias:nvstaging-nim" + version: 1.2.0-ea-v4 + alias: nim-vlm-text-extraction + condition: nim-vlm-text-extraction.deployed + + # PaddleOCR NIM - name: paddleocr-nim repository: "alias:nemo-microservices" version: 0.2.0 - condition: paddleocrDeployed - - name: deplot-nim - repository: "alias:nemo-microservices" - version: 0.1.12 - condition: deplotDeployed + condition: paddleocr-nim.deployed + + # nv-embedqa-e5-v5 NIM. Note only text-embedding-nim OR nvidia-nim-llama-32-nv-embedqa-1b-v2 can be deployed at the same time but not both - name: text-embedding-nim repository: "alias:nvidia-nim" version: 1.1.0 - condition: embedqaDeployed + condition: text-embedding-nim.deployed + + # nvidia-nim-llama-32-nv-embedqa-1b-v2 NIM. Note only text-embedding-nim OR nvidia-nim-llama-32-nv-embedqa-1b-v2 can be deployed at the same time but not both - name: nvidia-nim-llama-32-nv-embedqa-1b-v2 repository: "alias:nvidia-nim" version: 1.3.0 - condition: nvEmbedqaDeployed + condition: nvidia-nim-llama-32-nv-embedqa-1b-v2.deployed + + # # Nvidia llama-3.2-nv-rerankqa-1b-v2 reranking NIM + # - name: llama-3.2-nv-rerankqa-1b-v2 + # repository: "alias:nvstaging-nim" + # version: 1.4.0-rc1-latest-datacenter-release-24598430 + # condition: llama-3.2-nv-rerankqa-1b-v2.deployed + - name: milvus repository: https://zilliztech.github.io/milvus-helm version: 4.1.11 condition: milvusDeployed + + - name: redis + repository: oci://registry-1.docker.io/bitnamicharts + tags: + - redis + version: 19.1.3 + condition: redisDeployed + + - name: zipkin + repository: https://zipkin.io/zipkin-helm + version: 0.1.2 + condition: zipkinDeployed + + - name: opentelemetry-collector + repository: https://open-telemetry.github.io/opentelemetry-helm-charts + version: 0.78.1 + condition: otelDeployed diff --git a/helm/README.md b/helm/README.md index 51764cef..6d832fdc 100644 --- a/helm/README.md +++ b/helm/README.md @@ -1,411 +1,254 @@ -# NVIDIA-Ingest Helm Charts - -> [!WARNING] -> NV-Ingest version 24.08 exposed Redis directly to the client, as such setup for the [24.08](https://github.com/NVIDIA/nv-ingest/releases/tag/24.08) `nv-ingest-cli` differs. -> -> If using [24.08](https://github.com/NVIDIA/nv-ingest/releases/tag/24.08), refer to [this section](#2408-cli-setup-and-usage). However, we strongly recommend upgrading to `24.12`+ when available. - - -## Prerequisites - -### Hardware/Software -[Refer to our supported hardware/software configurations here](https://github.com/NVIDIA/nv-ingest?tab=readme-ov-file#hardware). - -## Setup Environment - -- First create your namespace - -```bash -NAMESPACE=nv-ingest -kubectl create namespace ${NAMESPACE} -``` - -- Install the Helm repos - -```bash -# Nvidia nemo-microservices NGC repository -helm repo add nemo-microservices https://helm.ngc.nvidia.com/nvidia/nemo-microservices --username='$oauthtoken' --password= - -# Nvidia NIM NGC repository -helm repo add nvidia-nim https://helm.ngc.nvidia.com/nim/nvidia --username='$oauthtoken' --password= -``` - -- Install the chart - -```bash -helm upgrade \ - --install \ - nv-ingest \ - https://helm.ngc.nvidia.com/nvidia/nemo-microservices/charts/nv-ingest-0.4.0.tgz \ - -n ${NAMESPACE} \ - --username '$oauthtoken' \ - --password "${NGC_API_KEY}" \ - --set imagePullSecret.create=true \ - --set imagePullSecret.password="${NGC_API_KEY}" \ - --set ngcSecret.create=true \ - --set ngcSecret.password="${NGC_API_KEY}" \ - --set image.repository="nvcr.io/nvidia/nemo-microservices/nv-ingest" \ - --set image.tag="24.12" -``` - -Optionally you can create your own versions of the `Secrets` if you do not want to use the creation via the helm chart. - - -```bash - -NAMESPACE=nv-ingest -DOCKER_CONFIG='{"auths":{"nvcr.io":{"username":"$oauthtoken", "password":"'${NGC_API_KEY}'" }}}' -echo -n $DOCKER_CONFIG | base64 -w0 -NGC_REGISTRY_PASSWORD=$(echo -n $DOCKER_CONFIG | base64 -w0 ) - -kubectl apply -n ${NAMESPACE} -f - < [!TIP] -> This means that the `nv-ingest-cli` no longer uses a Redis client so users must use the appropriate version to ensure the client is not still trying to use the RedisClient. - -First, build `nv-ingest-cli` from the source to ensure you have the latest code. -For more information, refer to [NV-Ingest-Client](https://github.com/NVIDIA/nv-ingest/tree/main/client). - -```bash -# Just to be cautious we remove any existing installation -pip uninstall nv-ingest-cli - -# Build the wheel from source -~~~~~~~[INSERT STEPS TO BUILD FROM SOURCE]~~~~~~~~~~~~~~~ - -# Pip install that .whl -pip install that wheel made above -``` - -#### Rest Endpoint Ingress - -It is recommended that the end user provide a mechanism for [`Ingress`](https://kubernetes.io/docs/concepts/services-networking/ingress/) for the NV-Ingest pod. -You can test outside of your Kubernetes cluster by [port-forwarding](https://kubernetes.io/docs/reference/kubectl/generated/kubectl_port-forward/) the NV-Ingest pod to your local environment. - -Example: - -You can find the name of your NV-Ingest pod you want to forward traffic to by running: - -```bash -kubectl get pods -n --no-headers -o custom-columns=":metadata.name" -kubectl port-forward -n ${NAMESPACE} service/nv-ingest 7670:7670 -``` - -The output will look similar to the following but with different auto-generated sequences. - -``` -nv-ingest-674f6b7477-65nvm -nv-ingest-cached-0 -nv-ingest-deplot-0 -nv-ingest-etcd-0 -nv-ingest-milvus-standalone-7f8ffbdfbc-jpmlj -nv-ingest-minio-7cbd4f5b9d-99hl4 -nv-ingest-opentelemetry-collector-7bb59d57fc-4h59q -nv-ingest-paddle-0 -nv-ingest-redis-master-0 -nv-ingest-redis-replicas-0 -nv-ingest-yolox-0 -nv-ingest-zipkin-77b5fc459f-ptsj6 -``` - -```bash -kubectl port-forward -n ${NAMESPACE} nv-ingest-674f6b7477-65nvm 7670:7670 -``` - -#### Executing Jobs - -Here is a sample invocation of a PDF extraction task using the port forward above: - -```bash -mkdir -p ./processed_docs - -nv-ingest-cli \ - --doc /path/to/your/unique.pdf \ - --output_directory ./processed_docs \ - --task='extract:{"document_type": "pdf", "extract_text": true, "extract_images": true, "extract_tables": true}' \ - --client_host=localhost \ - --client_port=7670 -``` - -You can also use NV-Ingest's Python client API to interact with the service running in the cluster. Use the same host and port as in the previous nv-ingest-cli example. - -## Parameters - -### Deployment parameters - -| Name | Description | Value | -| ----------------------------------- | ---------------------------------------------------------------------------------------------------------------- | ------- | -| `affinity` | [default: {}] Affinity settings for deployment. | `{}` | -| `nodeSelector` | Sets node selectors for the NIM -- for example `nvidia.com/gpu.present: "true"` | `{}` | -| `logLevel` | Log level of NV-Ingest service. Possible values of the variable are TRACE, DEBUG, INFO, WARNING, ERROR, CRITICAL. | `DEBUG` | -| `extraEnvVarsCM` | [default: ""] A Config map holding Environment variables to include in the NV-Ingest container | `""` | -| `extraEnvVarsSecret` | [default: ""] A K8S Secret to map to Environment variables to include in the NV-Ingest container | `""` | -| `fullnameOverride` | [default: ""] A name to force the fullname of the NV-Ingest container to have, defaults to the Helm Release Name | `""` | -| `nameOverride` | [default: ""] A name to base the objects created by this helm chart | `""` | -| `image.repository` | NIM Image Repository | `""` | -| `image.tag` | Image tag or version | `""` | -| `image.pullPolicy` | Image pull policy | `""` | -| `podAnnotations` | Sets additional annotations on the main deployment pods | `{}` | -| `podLabels` | Specify extra labels to be add to on deployed pods. | `{}` | -| `podSecurityContext` | Specify privilege and access control settings for pod | | -| `podSecurityContext.fsGroup` | Specify file system owner group id. | `1000` | -| `extraVolumes` | Adds arbitrary additional volumes to the deployment set definition | `{}` | -| `extraVolumeMounts` | Specify volume mounts to the main container from `extraVolumes` | `{}` | -| `imagePullSecrets` | Specify list of secret names that are needed for the main container and any init containers. | | -| `containerSecurityContext` | Sets privilege and access control settings for container (Only affects the main container, not pod-level) | `{}` | -| `tolerations` | Specify tolerations for pod assignment. Allows the scheduler to schedule pods with matching taints. | | -| `replicaCount` | The number of replicas for NV-Ingest when autoscaling is disabled | `1` | -| `resources.limits."nvidia.com/gpu"` | Specify number of GPUs to present to the running service. | | -| `resources.limits.memory` | Specify limit for memory | `32Gi` | -| `resources.requests.memory` | Specify request for memory | `16Gi` | -| `tmpDirSize` | Specify the amount of space to reserve for temporary storage | `8Gi` | - -### NIM Configuration - -Define additional values to the dependent NIM helm charts by updating the "yolox-nim", "cached-nim", "deplot-nim", and "paddleocr-nim" -values. A sane set of configurations are already included in this value file and only the "image.repository" and "image.tag" fields are -explicitly called out here. - -| Name | Description | Value | -| -------------------------------- | --------------------------------------------------------------- | ----- | -| `yolox-nim.image.repository` | The repository to override the location of the YOLOX | | -| `yolox-nim.image.tag` | The tag override for YOLOX | | -| `cached-nim.image.repository` | The repository to override the location of the Cached Model NIM | | -| `cached-nim.image.tag` | The tag override for Cached Model NIM | | -| `paddleocr-nim.image.repository` | The repository to override the location of the Paddle OCR NIM | | -| `paddleocr-nim.image.tag` | The tag override for Paddle OCR NIM | | -| `deplot-nim.image.repository` | The repository to override the location of the Deplot NIM | | -| `deplot-nim.image.tag` | The tag override for Deplot NIM | | - -### Milvus Deployment parameters - -NV-Ingest uses Milvus and Minio to store extracted images from a document -This chart by default sets up a Milvus standalone instance in the namespace using the -Helm chart at found https://artifacthub.io/packages/helm/milvus-helm/milvus - -| Name | Description | Value | -| ---------------- | ---------------------------------------------------------------------- | --------- | -| `milvusDeployed` | Whether to deploy Milvus and Minio from this helm chart | `true` | -| `milvus` | Find values at https://artifacthub.io/packages/helm/milvus-helm/milvus | `sane {}` | - -### Autoscaling parameters - -Values used for creating a `Horizontal Pod Autoscaler`. If autoscaling is not enabled, the rest are ignored. -NVIDIA recommends usage of the custom metrics API, commonly implemented with the prometheus-adapter. -Standard metrics of CPU and memory are of limited use in scaling NIM. - -| Name | Description | Value | -| ------------------------- | ----------------------------------------- | ------- | -| `autoscaling.enabled` | Enables horizontal pod autoscaler. | `false` | -| `autoscaling.minReplicas` | Specify minimum replicas for autoscaling. | `1` | -| `autoscaling.maxReplicas` | Specify maximum replicas for autoscaling. | `100` | -| `autoscaling.metrics` | Array of metrics for autoscaling. | `[]` | - -### Redis configurations - -Include any redis configuration that you'd like with the deployed Redis -Find values at https://github.com/bitnami/charts/tree/main/bitnami/redis - -| Name | Description | Value | -| --------------- | ------------------------------------------------------------------------ | --------- | -| `redisDeployed` | Whether to deploy Redis from this helm chart | `true` | -| `redis` | Find values at https://github.com/bitnami/charts/tree/main/bitnami/redis | `sane {}` | - -### Environment Variables - -Define environment variables as key/value dictionary pairs - -| Name | Description | Value | -| -------------------------------------- | --------------------------------------------------------------------------------------------------------- | -------------------------- | -| `envVars` | Adds arbitrary environment variables to the main container using key-value pairs, for example NAME: value | `sane {}` | -| `envVars.MESSAGE_CLIENT_HOST` | Override this value if disabling Redis deployment in this chart. | `"nv-ingest-redis-master"` | -| `envVars.MESSAGE_CLIENT_PORT` | Override this value if disabling Redis deployment in this chart. | `"7670"` | -| `envVars.NV_INGEST_DEFAULT_TIMEOUT_MS` | Override the Timeout of the NV-Ingest requests. | `"1234"` | -| `envVars.MINIO_INTERNAL_ADDRESS` | Override this to the cluster local DNS name of minio | `"nv-ingest-minio:9000"` | -| `envVars.MINIO_PUBLIC_ADDRESS` | Override this to publicly routable minio address, default assumes port-forwarding | `"http://localhost:9000"` | -| `envVars.MINIO_BUCKET` | Override this for specific minio bucket to upload extracted images to | `"nv-ingest"` | - -### Open Telemetry - -Define environment variables as key/value dictionary pairs for configuring OTEL Deployments -A sane set of parameters is set for the deployed version of OpenTelemetry with this Helm Chart. -Override any values to the Open Telemetry helm chart by overriding the `open-telemetry` value. - -| Name | Description | Value | -| ----------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- | -| `otelEnabled` | Whether to enable OTEL collection | `true` | -| `otelDeployed` | Whether to deploy OTEL from this helm chart | `true` | -| `opentelemetry-collector` | Configures the opentelemetry helm chart - see https://github.com/open-telemetry/opentelemetry-helm-charts/blob/main/charts/opentelemetry-collector/values.yaml | | -| `otelEnvVars` | Adds arbitrary environment variables for configuring OTEL using key-value pairs, for example NAME: value | `sane {}` | -| `otelEnvVars.OTEL_EXPORTER_OTLP_ENDPOINT` | Default deployment target for GRPC otel - Default "http://{{ .Release.Name }}-opentelemetry-collector:4317" | | -| `otelEnvVars.OTEL_SERVICE_NAME` | | `"nemo-retrieval-service"` | -| `otelEnvVars.OTEL_TRACES_EXPORTER` | | `"otlp"` | -| `otelEnvVars.OTEL_METRICS_EXPORTER` | | `"otlp"` | -| `otelEnvVars.OTEL_LOGS_EXPORTER` | | `"none"` | -| `otelEnvVars.OTEL_PROPAGATORS` | | `"tracecontext baggage"` | -| `otelEnvVars.OTEL_RESOURCE_ATTRIBUTES` | | `"deployment.environment=$(NAMESPACE)"` | -| `otelEnvVars.OTEL_PYTHON_EXCLUDED_URLS` | | `"health"` | -| `zipkinDeployed` | Whether to deploy Zipkin with OpenTelemetry from this helm chart | `true` | - -### Ingress parameters - -| Name | Description | Value | -| ------------------------------------ | ----------------------------------------------------- | ------------------------ | -| `ingress.enabled` | Enables ingress. | `false` | -| `ingress.className` | Specify class name for Ingress. | `""` | -| `ingress.annotations` | Specify additional annotations for ingress. | `{}` | -| `ingress.hosts` | Specify list of hosts each containing lists of paths. | | -| `ingress.hosts[0].host` | Specify name of host. | `chart-example.local` | -| `ingress.hosts[0].paths[0].path` | Specify ingress path. | `/` | -| `ingress.hosts[0].paths[0].pathType` | Specify path type. | `ImplementationSpecific` | -| `ingress.tls` | Specify list of pairs of TLS `secretName` and hosts. | `[]` | - -### Probe parameters - -| Name | Description | Value | -| ----------------------------------- | ----------------------------------------- | --------- | -| `livenessProbe.enabled` | Enables `livenessProbe`` | `false` | -| `livenessProbe.httpGet.path` | `LivenessProbe`` endpoint path | `/health` | -| `livenessProbe.httpGet.port` | `LivenessProbe`` endpoint port | `http` | -| `livenessProbe.initialDelaySeconds` | Initial delay seconds for `livenessProbe` | `120` | -| `livenessProbe.timeoutSeconds` | Timeout seconds for `livenessProbe` | `20` | -| `livenessProbe.periodSeconds` | Period seconds for `livenessProbe` | `10` | -| `livenessProbe.successThreshold` | Success threshold for `livenessProbe` | `1` | -| `livenessProbe.failureThreshold` | Failure threshold for `livenessProbe` | `20` | - -### Probe parameters - -| Name | Description | Value | -| ---------------------------------- | ---------------------------------------- | --------- | -| `startupProbe.enabled` | Enables `startupProbe`` | `false` | -| `startupProbe.httpGet.path` | `StartupProbe`` endpoint path | `/health` | -| `startupProbe.httpGet.port` | `StartupProbe`` endpoint port | `http` | -| `startupProbe.initialDelaySeconds` | Initial delay seconds for `startupProbe` | `120` | -| `startupProbe.timeoutSeconds` | Timeout seconds for `startupProbe` | `10` | -| `startupProbe.periodSeconds` | Period seconds for `startupProbe` | `30` | -| `startupProbe.successThreshold` | Success threshold for `startupProbe` | `1` | -| `startupProbe.failureThreshold` | Failure threshold for `startupProbe` | `220` | - -### Probe parameters - -| Name | Description | Value | -| ------------------------------------ | ------------------------------------------ | --------- | -| `readinessProbe.enabled` | Enables `readinessProbe`` | `false` | -| `readinessProbe.httpGet.path` | `ReadinessProbe`` endpoint path | `/health` | -| `readinessProbe.httpGet.port` | `ReadinessProbe`` endpoint port | `http` | -| `readinessProbe.initialDelaySeconds` | Initial delay seconds for `readinessProbe` | `120` | -| `readinessProbe.timeoutSeconds` | Timeout seconds for `readinessProbe` | `10` | -| `readinessProbe.periodSeconds` | Period seconds for `readinessProbe` | `30` | -| `readinessProbe.successThreshold` | Success threshold for `readinessProbe` | `1` | -| `readinessProbe.failureThreshold` | Failure threshold for `readinessProbe` | `220` | - -### Service parameters - -| Name | Description | Value | -| ---------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -| `service.type` | Specifies the service type for the deployment. | `ClusterIP` | -| `service.name` | Overrides the default service name | `""` | -| `service.port` | Specifies the HTTP Port for the service. | `8000` | -| `service.nodePort` | Specifies an optional HTTP Node Port for the service. | `nil` | -| `service.annotations` | Specify additional annotations to be added to service. | `{}` | -| `service.labels` | Specifies additional labels to be added to service. | `{}` | -| `serviceAccount` | Options to specify service account for the deployment. | | -| `serviceAccount.create` | Specifies whether a service account should be created. | `true` | -| `serviceAccount.annotations` | Sets annotations to be added to the service account. | `{}` | -| `serviceAccount.name` | Specifies the name of the service account to use. If it is not set and create is "true", a name is generated using a "fullname" template. | `""` | - -### Secret Creation - -Manage the creation of secrets used by the helm chart - -| Name | Description | Value | -| -------------------------- | ------------------------------------------------------ | ------- | -| `ngcSecret.create` | Specifies whether to create the ngc api secret | `false` | -| `ngcSecret.password` | The password to use for the NGC Secret | `""` | -| `imagePullSecret.create` | Specifies whether to create the NVCR Image Pull secret | `false` | -| `imagePullSecret.password` | The password to use for the NVCR Image Pull Secret | `""` | - - -## 24.08 CLI Setup and Usage - -#### NV-Ingest CLI Installation: `24.08` - -You can find the Python wheel for the `nv-ingest-cli` located in our [NV-Ingest 24.08 release artifacts](https://github.com/NVIDIA/nv-ingest/releases/tag/24.08). Installation of the `nv-ingest-cli` goes as follows. - -```shell -# Just to be cautious we remove any existing installation -pip uninstall nv-ingest-cli - -# Download the 24.08 .whl -wget https://github.com/NVIDIA/nv-ingest/releases/download/24.08/nv_ingest_client-24.08-py3-none-any.whl - -# Pip install that .whl -pip install nv_ingest_client-24.08-py3-none-any.whl -``` - -### Access To Redis: `24.08` - -It is recommended that the end user provide a mechanism for [`Ingress`](https://kubernetes.io/docs/concepts/services-networking/ingress/) for the Redis pod. -You can test outside of your Kuberenetes cluster by [port-forwarding](https://kubernetes.io/docs/reference/kubectl/generated/kubectl_port-forward/) the Redis pod to your local environment. - -Example: - -```bash -kubectl port-forward -n ${NAMESPACE} nv-ingest-redis-master-0 6379:6379 -``` - - -#### Executing Jobs: `24.08` - -Here is a sample invocation of a PDF extraction task using the port forward above: - -```bash -mkdir -p ./processed_docs - -nv-ingest-cli \ - --doc /path/to/your/unique.pdf \ - --output_directory ./processed_docs \ - --task='extract:{"document_type": "pdf", "extract_text": true, "extract_images": true, "extract_tables": true}' \ - --client_host=localhost \ - --client_port=6379 -``` +# nv-ingest + +![Version: 0.4.0](https://img.shields.io/badge/Version-0.4.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) + +NV-Ingest Microservice + +## Maintainers + +| Name | Email | Url | +| ---- | ------ | --- | +| NVIDIA Corporation | | | + +## Requirements + +| Repository | Name | Version | +|------------|------|---------| +| alias:nemo-microservices | nvidia-nim-nemoretriever-graphic-elements-v1 | 1.2.0 | +| alias:nemo-microservices | paddleocr-nim | 0.2.0 | +| alias:nemo-microservices | yolox-nim | 0.2.0 | +| alias:nvidia-nim | nvidia-nim-llama-32-nv-embedqa-1b-v2 | 1.3.0 | +| alias:nvidia-nim | text-embedding-nim | 1.1.0 | +| alias:nvstaging-nim | nim-vlm | 1.2.0-ea-v4 | +| alias:nvstaging-nim | nvidia-nim-nemoretriever-table-structure-v1 | 1.1.0 | +| https://open-telemetry.github.io/opentelemetry-helm-charts | opentelemetry-collector | 0.78.1 | +| https://zilliztech.github.io/milvus-helm | milvus | 4.1.11 | +| https://zipkin.io/zipkin-helm | zipkin | 0.1.2 | +| oci://registry-1.docker.io/bitnamicharts | common | 2.x.x | +| oci://registry-1.docker.io/bitnamicharts | redis | 19.1.3 | + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | | +| autoscaling.enabled | bool | `false` | | +| autoscaling.maxReplicas | int | `100` | | +| autoscaling.metrics | list | `[]` | | +| autoscaling.minReplicas | int | `1` | | +| containerArgs | list | `[]` | | +| containerSecurityContext | object | `{}` | | +| embedqaDeployed | bool | `false` | | +| envVars.EMBEDDING_NIM_ENDPOINT | string | `"http://nv-ingest-embedqa:8000/v1"` | | +| envVars.EMBEDDING_NIM_MODEL_NAME | string | `"nvidia/llama-3.2-nv-embedqa-1b-v2"` | | +| envVars.MESSAGE_CLIENT_HOST | string | `"nv-ingest-redis-master"` | | +| envVars.MESSAGE_CLIENT_PORT | string | `"6379"` | | +| envVars.MILVUS_ENDPOINT | string | `"http://nv-ingest-milvus:19530"` | | +| envVars.MINIO_BUCKET | string | `"nv-ingest"` | | +| envVars.MINIO_INTERNAL_ADDRESS | string | `"nv-ingest-minio:9000"` | | +| envVars.MINIO_PUBLIC_ADDRESS | string | `"http://localhost:9000"` | | +| envVars.NEMORETRIEVER_PARSE_HTTP_ENDPOINT | string | `"http://nim-vlm:8000/v1/chat/completions"` | | +| envVars.NEMORETRIEVER_PARSE_INFER_PROTOCOL | string | `"http"` | | +| envVars.NV_INGEST_DEFAULT_TIMEOUT_MS | string | `"1234"` | | +| envVars.PADDLE_GRPC_ENDPOINT | string | `"nv-ingest-paddle:8001"` | | +| envVars.PADDLE_HTTP_ENDPOINT | string | `"http://nv-ingest-paddle:8000/v1/infer"` | | +| envVars.PADDLE_INFER_PROTOCOL | string | `"grpc"` | | +| envVars.REDIS_MORPHEUS_TASK_QUEUE | string | `"morpheus_task_queue"` | | +| envVars.VLM_CAPTION_ENDPOINT | string | `"https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions"` | | +| envVars.VLM_CAPTION_MODEL_NAME | string | `"meta/llama-3.2-11b-vision-instruct"` | | +| envVars.YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT | string | `"nv-ingest-nvidia-nim-nemoretriever-graphic-elements-v1:8001"` | | +| envVars.YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT | string | `"http://nv-ingest-nvidia-nim-nemoretriever-graphic-elements-v1:8000/v1/infer"` | | +| envVars.YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL | string | `"http"` | | +| envVars.YOLOX_GRPC_ENDPOINT | string | `"nv-ingest-yolox:8001"` | | +| envVars.YOLOX_HTTP_ENDPOINT | string | `"http://nv-ingest-yolox:8000/v1/infer"` | | +| envVars.YOLOX_INFER_PROTOCOL | string | `"grpc"` | | +| envVars.YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT | string | `"nv-ingest-nvidia-nim-nemoretriever-table-structure-v1:8001"` | | +| envVars.YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT | string | `"http://nv-ingest-nvidia-nim-nemoretriever-table-structure-v1:8000/v1/infer"` | | +| envVars.YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL | string | `"http"` | | +| extraEnvVarsCM | string | `""` | | +| extraEnvVarsSecret | string | `""` | | +| extraVolumeMounts | object | `{}` | | +| extraVolumes | object | `{}` | | +| fullnameOverride | string | `""` | | +| image.pullPolicy | string | `"IfNotPresent"` | | +| image.repository | string | `"nvcr.io/nvidia/nemo-microservices/nv-ingest"` | | +| image.tag | string | `"24.12"` | | +| imagePullSecret.create | bool | `false` | | +| imagePullSecret.name | string | `"nvcrimagepullsecret"` | | +| imagePullSecret.password | string | `""` | | +| imagePullSecret.registry | string | `"nvcr.io"` | | +| imagePullSecret.username | string | `"$oauthtoken"` | | +| imagePullSecrets[0].name | string | `"nvcrimagepullsecret"` | | +| imagePullSecrets[1].name | string | `"nemoMicroservicesPullSecret"` | | +| ingress.annotations | object | `{}` | | +| ingress.className | string | `""` | | +| ingress.enabled | bool | `false` | | +| ingress.hosts[0].host | string | `"chart-example.local"` | | +| ingress.hosts[0].paths[0].path | string | `"/"` | | +| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` | | +| ingress.tls | list | `[]` | | +| livenessProbe.enabled | bool | `false` | | +| livenessProbe.enabled | bool | `true` | | +| livenessProbe.failureThreshold | int | `20` | | +| livenessProbe.httpGet.path | string | `"/v1/health/live"` | | +| livenessProbe.httpGet.path | string | `"/health"` | | +| livenessProbe.httpGet.port | int | `7670` | | +| livenessProbe.httpGet.port | string | `"http"` | | +| livenessProbe.initialDelaySeconds | int | `15` | | +| livenessProbe.initialDelaySeconds | int | `120` | | +| livenessProbe.periodSeconds | int | `10` | | +| livenessProbe.periodSeconds | int | `20` | | +| livenessProbe.successThreshold | int | `1` | | +| livenessProbe.timeoutSeconds | int | `20` | | +| logLevel | string | `"DEBUG"` | | +| milvus.cluster.enabled | bool | `false` | | +| milvus.etcd.persistence.storageClass | string | `nil` | | +| milvus.etcd.replicaCount | int | `1` | | +| milvus.minio.mode | string | `"standalone"` | | +| milvus.minio.persistence.size | string | `"10Gi"` | | +| milvus.minio.persistence.storageClass | string | `nil` | | +| milvus.pulsar.enabled | bool | `false` | | +| milvus.standalone.extraEnv[0].name | string | `"LOG_LEVEL"` | | +| milvus.standalone.extraEnv[0].value | string | `"error"` | | +| milvus.standalone.persistence.persistentVolumeClaim.size | string | `"10Gi"` | | +| milvus.standalone.persistence.persistentVolumeClaim.storageClass | string | `nil` | | +| milvusDeployed | bool | `true` | | +| nameOverride | string | `""` | | +| nemo.groupID | string | `"1000"` | | +| nemo.userID | string | `"1000"` | | +| ngcSecret.create | bool | `false` | | +| ngcSecret.password | string | `""` | | +| nodeSelector | object | `{}` | | +| nvEmbedqaDeployed | bool | `true` | | +| nvidia-nim-llama-32-nv-embedqa-1b-v2.env[0].name | string | `"NIM_HTTP_API_PORT"` | | +| nvidia-nim-llama-32-nv-embedqa-1b-v2.env[0].value | string | `"8000"` | | +| nvidia-nim-llama-32-nv-embedqa-1b-v2.fullnameOverride | string | `"nv-ingest-embedqa"` | | +| nvidia-nim-llama-32-nv-embedqa-1b-v2.image.repository | string | `"nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2"` | | +| nvidia-nim-llama-32-nv-embedqa-1b-v2.image.tag | string | `"1.3.1"` | | +| nvidia-nim-llama-32-nv-embedqa-1b-v2.nim.grpcPort | int | `8001` | | +| nvidia-nim-llama-32-nv-embedqa-1b-v2.service.grpcPort | int | `8001` | | +| nvidia-nim-llama-32-nv-embedqa-1b-v2.service.name | string | `"nv-ingest-embedqa"` | | +| opentelemetry-collector.config.exporters.debug.verbosity | string | `"detailed"` | | +| opentelemetry-collector.config.exporters.zipkin.endpoint | string | `"http://nv-ingest-zipkin:9411/api/v2/spans"` | | +| opentelemetry-collector.config.extensions.health_check | object | `{}` | | +| opentelemetry-collector.config.extensions.zpages.endpoint | string | `"0.0.0.0:55679"` | | +| opentelemetry-collector.config.processors.batch | object | `{}` | | +| opentelemetry-collector.config.processors.tail_sampling.policies[0].name | string | `"drop_noisy_traces_url"` | | +| opentelemetry-collector.config.processors.tail_sampling.policies[0].string_attribute.enabled_regex_matching | bool | `true` | | +| opentelemetry-collector.config.processors.tail_sampling.policies[0].string_attribute.invert_match | bool | `true` | | +| opentelemetry-collector.config.processors.tail_sampling.policies[0].string_attribute.key | string | `"http.target"` | | +| opentelemetry-collector.config.processors.tail_sampling.policies[0].string_attribute.values[0] | string | `"\\/health"` | | +| opentelemetry-collector.config.processors.tail_sampling.policies[0].type | string | `"string_attribute"` | | +| opentelemetry-collector.config.processors.transform.trace_statements[0].context | string | `"span"` | | +| opentelemetry-collector.config.processors.transform.trace_statements[0].statements[0] | string | `"set(status.code, 1) where attributes[\"http.path\"] == \"/health\""` | | +| opentelemetry-collector.config.processors.transform.trace_statements[0].statements[1] | string | `"replace_match(attributes[\"http.route\"], \"/v1\", attributes[\"http.target\"]) where attributes[\"http.target\"] != nil"` | | +| opentelemetry-collector.config.processors.transform.trace_statements[0].statements[2] | string | `"replace_pattern(name, \"/v1\", attributes[\"http.route\"]) where attributes[\"http.route\"] != nil"` | | +| opentelemetry-collector.config.processors.transform.trace_statements[0].statements[3] | string | `"set(name, Concat([name, attributes[\"http.url\"]], \" \")) where name == \"POST\""` | | +| opentelemetry-collector.config.receivers.otlp.protocols.grpc.endpoint | string | `"${env:MY_POD_IP}:4317"` | | +| opentelemetry-collector.config.receivers.otlp.protocols.http.cors.allowed_origins[0] | string | `"*"` | | +| opentelemetry-collector.config.service.extensions[0] | string | `"zpages"` | | +| opentelemetry-collector.config.service.extensions[1] | string | `"health_check"` | | +| opentelemetry-collector.config.service.pipelines.logs.exporters[0] | string | `"debug"` | | +| opentelemetry-collector.config.service.pipelines.logs.processors[0] | string | `"batch"` | | +| opentelemetry-collector.config.service.pipelines.logs.receivers[0] | string | `"otlp"` | | +| opentelemetry-collector.config.service.pipelines.metrics.exporters[0] | string | `"debug"` | | +| opentelemetry-collector.config.service.pipelines.metrics.processors[0] | string | `"batch"` | | +| opentelemetry-collector.config.service.pipelines.metrics.receivers[0] | string | `"otlp"` | | +| opentelemetry-collector.config.service.pipelines.traces.exporters[0] | string | `"debug"` | | +| opentelemetry-collector.config.service.pipelines.traces.exporters[1] | string | `"zipkin"` | | +| opentelemetry-collector.config.service.pipelines.traces.processors[0] | string | `"tail_sampling"` | | +| opentelemetry-collector.config.service.pipelines.traces.processors[1] | string | `"transform"` | | +| opentelemetry-collector.config.service.pipelines.traces.receivers[0] | string | `"otlp"` | | +| opentelemetry-collector.mode | string | `"deployment"` | | +| otelDeployed | bool | `true` | | +| otelEnabled | bool | `true` | | +| otelEnvVars.OTEL_LOGS_EXPORTER | string | `"none"` | | +| otelEnvVars.OTEL_METRICS_EXPORTER | string | `"otlp"` | | +| otelEnvVars.OTEL_PROPAGATORS | string | `"tracecontext,baggage"` | | +| otelEnvVars.OTEL_PYTHON_EXCLUDED_URLS | string | `"health"` | | +| otelEnvVars.OTEL_RESOURCE_ATTRIBUTES | string | `"deployment.environment=$(NAMESPACE)"` | | +| otelEnvVars.OTEL_SERVICE_NAME | string | `"nemo-retrieval-service"` | | +| otelEnvVars.OTEL_TRACES_EXPORTER | string | `"otlp"` | | +| paddleocr-nim.env[0].name | string | `"NIM_HTTP_API_PORT"` | | +| paddleocr-nim.env[0].value | string | `"8000"` | | +| paddleocr-nim.fullnameOverride | string | `"nv-ingest-paddle"` | | +| paddleocr-nim.image.repository | string | `"nvcr.io/nvidia/nemo-microservices/paddleocr"` | | +| paddleocr-nim.image.tag | string | `"1.0.0"` | | +| paddleocr-nim.nim.grpcPort | int | `8001` | | +| paddleocr-nim.service.grpcPort | int | `8001` | | +| paddleocr-nim.service.name | string | `"nv-ingest-paddle"` | | +| paddleocrDeployed | bool | `true` | | +| podAnnotations."traffic.sidecar.istio.io/excludeOutboundPorts" | string | `"8007"` | | +| podLabels | object | `{}` | | +| podSecurityContext.fsGroup | int | `1000` | | +| readinessProbe.enabled | bool | `false` | | +| readinessProbe.enabled | bool | `true` | | +| readinessProbe.failureThreshold | int | `220` | | +| readinessProbe.httpGet.path | string | `"/health"` | | +| readinessProbe.httpGet.path | string | `"/v1/health/ready"` | | +| readinessProbe.httpGet.port | string | `"http"` | | +| readinessProbe.httpGet.port | int | `7670` | | +| readinessProbe.initialDelaySeconds | int | `120` | | +| readinessProbe.initialDelaySeconds | int | `30` | | +| readinessProbe.periodSeconds | int | `30` | | +| readinessProbe.periodSeconds | int | `30` | | +| readinessProbe.successThreshold | int | `1` | | +| readinessProbe.timeoutSeconds | int | `10` | | +| redis.auth.enabled | bool | `false` | | +| redis.master.persistence.size | string | `"50Gi"` | | +| redis.master.resources.limits.memory | string | `"12Gi"` | | +| redis.master.resources.requests.memory | string | `"6Gi"` | | +| redis.replica.persistence.size | string | `"50Gi"` | | +| redis.replica.replicaCount | int | `1` | | +| redis.replica.resources.limits.memory | string | `"12Gi"` | | +| redis.replica.resources.requests.memory | string | `"6Gi"` | | +| redisDeployed | bool | `true` | | +| replicaCount | int | `1` | | +| resources.limits."nvidia.com/gpu" | int | `1` | | +| resources.limits.cpu | string | `"48000m"` | | +| resources.limits.memory | string | `"90Gi"` | | +| resources.requests.cpu | string | `"24000m"` | | +| resources.requests.memory | string | `"24Gi"` | | +| service.annotations | object | `{}` | | +| service.labels | object | `{}` | | +| service.name | string | `""` | | +| service.nodePort | string | `nil` | | +| service.port | int | `7670` | | +| service.type | string | `"ClusterIP"` | | +| serviceAccount.annotations | object | `{}` | | +| serviceAccount.automount | bool | `true` | | +| serviceAccount.create | bool | `true` | | +| serviceAccount.name | string | `""` | | +| startupProbe.enabled | bool | `false` | | +| startupProbe.failureThreshold | int | `220` | | +| startupProbe.httpGet.path | string | `"/health"` | | +| startupProbe.httpGet.port | string | `"http"` | | +| startupProbe.initialDelaySeconds | int | `120` | | +| startupProbe.periodSeconds | int | `30` | | +| startupProbe.successThreshold | int | `1` | | +| startupProbe.timeoutSeconds | int | `10` | | +| text-embedding-nim.env[0].name | string | `"NIM_HTTP_API_PORT"` | | +| text-embedding-nim.env[0].value | string | `"8000"` | | +| text-embedding-nim.fullnameOverride | string | `"nv-ingest-embedqa"` | | +| text-embedding-nim.image.repository | string | `"nvcr.io/nim/nvidia/nv-embedqa-e5-v5"` | | +| text-embedding-nim.image.tag | string | `"1.1.0"` | | +| text-embedding-nim.nim.grpcPort | int | `8001` | | +| text-embedding-nim.service.grpcPort | int | `8001` | | +| text-embedding-nim.service.name | string | `"nv-ingest-embedqa"` | | +| tmpDirSize | string | `"16Gi"` | | +| tolerations | list | `[]` | | +| triton.enabled | bool | `false` | | +| yolox-nim.deployed | bool | `true` | | +| yolox-nim.env[0].name | string | `"NIM_HTTP_API_PORT"` | | +| yolox-nim.env[0].value | string | `"8000"` | | +| yolox-nim.fullnameOverride | string | `"nv-ingest-yolox"` | | +| yolox-nim.image.repository | string | `"nvcr.io/nvidia/nemo-microservices/nv-yolox-page-elements-v1"` | | +| yolox-nim.image.tag | string | `"1.0.0"` | | +| yolox-nim.nim.grpcPort | int | `8001` | | +| yolox-nim.service.grpcPort | int | `8001` | | +| yolox-nim.service.name | string | `"nv-ingest-yolox"` | | +| yoloxDeployed | bool | `true` | | +| zipkinDeployed | bool | `true` | | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl index dcd493d3..538f54dd 100644 --- a/helm/templates/_helpers.tpl +++ b/helm/templates/_helpers.tpl @@ -64,17 +64,16 @@ Create the name of the service account to use {{/* Create secret to access docker registry */}} -{{- define "nv-ingest.imagePullSecret" }} -{{- printf "{\"auths\": {\"%s\": {\"auth\": \"%s\"}}}" .Values.imagePullSecret.registry (printf "%s:%s" .Values.imagePullSecret.username .Values.imagePullSecret.password | b64enc) | b64enc }} +{{- define "nv-ingest.ngcImagePullSecret" }} +{{- printf "{\"auths\": {\"%s\": {\"auth\": \"%s\"}}}" .Values.ngcImagePullSecret.registry (printf "%s:%s" .Values.ngcImagePullSecret.username .Values.ngcImagePullSecret.password | b64enc) | b64enc }} {{- end }} {{/* -Create secret to access docker registry -*/}} -{{- define "nv-ingest.ngcAPIKey" }} -{{- printf "%s" .Values.ngcSecret.password }} -{{- end }} - + Create secret to access NGC Api + */}} + {{- define "nv-ingest.ngcApiSecret" }} + {{- printf "%s" .Values.ngcApiSecret.password }} + {{- end }} {{/* Create a deployment with the NGC Puller diff --git a/helm/templates/image-pull-secret.yaml b/helm/templates/image-pull-secret.yaml deleted file mode 100644 index 7214d4a1..00000000 --- a/helm/templates/image-pull-secret.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. -# All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -{{ if and .Values.ngcSecret.create -}} ---- -apiVersion: v1 -kind: Secret -metadata: - name: ngc-api -type: Opaque -data: - NGC_CLI_API_KEY: {{ template "nv-ingest.ngcAPIKey" . }} - NGC_API_KEY: {{ template "nv-ingest.ngcAPIKey" . }} -{{- end }} - - -{{ if and .Values.imagePullSecret.name .Values.imagePullSecret.create -}} ---- -apiVersion: v1 -kind: Secret -metadata: - name: {{ .Values.imagePullSecret.name }} -type: kubernetes.io/dockerconfigjson -data: - .dockerconfigjson: {{ template "nv-ingest.imagePullSecret" . }} -{{- end }} diff --git a/helm/templates/secrets.yaml b/helm/templates/secrets.yaml new file mode 100644 index 00000000..c5c9ecf6 --- /dev/null +++ b/helm/templates/secrets.yaml @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +{{ if .Values.ngcImagePullSecret.create -}} +--- +apiVersion: v1 +kind: Secret +metadata: + name: ngc-secret # name expected by NIMs +type: kubernetes.io/dockerconfigjson +data: + .dockerconfigjson: {{ template "nv-ingest.ngcImagePullSecret" . }} +{{- end }} + + +{{ if and .Values.ngcApiSecret.create -}} +--- +apiVersion: v1 +kind: Secret +metadata: + name: ngc-api # Name expected by NIMs +type: Opaque +data: + NGC_CLI_API_KEY: {{ template "nv-ingest.ngcApiSecret" . }} + NGC_API_KEY: {{ template "nv-ingest.ngcApiSecret" . }} +{{- end }} diff --git a/helm/values.yaml b/helm/values.yaml index 27b13cca..c35ba0bb 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -51,6 +51,7 @@ extraVolumeMounts: {} ## @skip imagePullSecrets[0].name imagePullSecrets: - name: nvcrimagepullsecret + - name: nemoMicroservicesPullSecret ## @param containerSecurityContext [object] Sets privilege and access control settings for container (Only affects the main container, not pod-level) containerSecurityContext: {} @@ -62,27 +63,25 @@ containerSecurityContext: {} ## @skip tolerations[0].effect tolerations: [] -# TODO: jdyer - uncomment this when microservice is added, currently no /health endpoint exists to check ... - ## @param replicaCount [default: 1] The number of replicas for NVIngest when autoscaling is disabled replicaCount: 1 ## @skip resources Specify resources limits and requests for the running service. ## @extra resources.limits."nvidia.com/gpu" Specify number of GPUs to present to the running service. -## @param resources.limits.memory [default: 32Gi] Specify limit for memory -## @param resources.requests.memory [default: 16Gi] Specify request for memory +## @param resources.limits.memory [default: 90Gi] Specify limit for memory +## @param resources.requests.memory [default: 24Gi] Specify request for memory resources: limits: memory: 90Gi nvidia.com/gpu: 1 - cpu: "36000m" + cpu: "48000m" requests: memory: 24Gi - cpu: "16000m" + cpu: "24000m" -## @param tmpDirSize [default: 8Gi] Specify the amount of space to reserve for temporary storage -tmpDirSize: 8Gi +## @param tmpDirSize [default: 16Gi] Specify the amount of space to reserve for temporary storage +tmpDirSize: 16Gi ## @section NIM Configuration @@ -92,120 +91,345 @@ tmpDirSize: 8Gi ## explicitly called out here. ## @descriptionEnd -## @skip yoloxDeployed ## @skip yolox-nim -## @extra yolox-nim.image.repository The repository to override the location of the YOLOX -## @extra yolox-nim.image.tag The tag override for YOLOX -yoloxDeployed: true +## @param yolox-nim.deployed [default: true] true if the Yolox NIM should be deployed and false otherwise +## @param yolox-nim.image.repository The repository to override the location of the YOLOX +## @param yolox-nim.image.tag The tag override for YOLOX yolox-nim: + deployed: true fullnameOverride: nv-ingest-yolox image: repository: nvcr.io/nvidia/nemo-microservices/nv-yolox-page-elements-v1 tag: "1.0.0" + pullPolicy: IfNotPresent + podSecurityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + replicaCount: 1 + serviceAccount: + create: false + name: "" + statefuleSet: + enabled: false + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 10 + metrics: [] service: + type: "ClusterIP" name: nv-ingest-yolox + httpPort: 8000 grpcPort: 8001 + metricsPort: 0 # Generally unused and defaults to 0 nim: grpcPort: 8001 + logLevel: "INFO" env: - name: NIM_HTTP_API_PORT value: "8000" +## @skip nvidia-nim-nemoretriever-graphic-elements-v1 +## @param nvidia-nim-nemoretriever-graphic-elements-v1.deployed [default: true] true if the nemoretriever-graphic-elements NIM should be deployed and false otherwise +## @param nvidia-nim-nemoretriever-graphic-elements-v1.image.repository The repository to override the location of the nemoretriever-graphic-elements +## @param nvidia-nim-nemoretriever-graphic-elements-v1.image.tag The tag override for nemoretriever-graphic-elements +nvidia-nim-nemoretriever-graphic-elements-v1: + deployed: true + # fullnameOverride: nvidia-nim-nemoretriever-graphic-elements-v1 + customCommand: [] + customArgs: [] + image: + repository: nvcr.io/nvidia/nemo-microservices/nemoretriever-graphic-elements-v1 + tag: "1.2.0" + podSecurityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + replicaCount: 1 + serviceAccount: + create: false + name: "" + statefuleSet: + enabled: false + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 10 + metrics: [] + service: + type: "ClusterIP" + name: nvidia-nim-nemoretriever-graphic-elements-v1 + httpPort: 8000 + grpcPort: 8001 + metricsPort: 0 # Generally unused and defaults to 0 + nim: + grpcPort: 8001 + logLevel: "INFO" + env: + - name: NIM_HTTP_API_PORT + value: "8000" -## @skip cachedDeployed -## @skip cached-nim -## @extra cached-nim.image.repository The repository to override the location of the Cached Model NIM -## @extra cached-nim.image.tag The tag override for Cached Model NIM -cachedDeployed: true -cached-nim: - fullnameOverride: nv-ingest-cached +## @skip nvidia-nim-nemoretriever-table-structure-v1 +## @param nvidia-nim-nemoretriever-table-structure-v1.deployed [default: true] true if the nemoretriever-table-structure NIM should be deployed and false otherwise +## @param nvidia-nim-nemoretriever-table-structure-v1.image.repository The repository to override the location of the nemoretriever-table-structure +## @param nvidia-nim-nemoretriever-table-structure-v1.image.tag The tag override for nemoretriever-table-structure +nvidia-nim-nemoretriever-table-structure-v1: + deployed: true + # fullnameOverride: nvidia-nim-nemoretriever-table-structure-v1 + customCommand: [] + customArgs: [] image: - repository: nvcr.io/nvidia/nemo-microservices/cached - tag: "0.2.1" + repository: nvcr.io/nvidia/nemo-microservices/nemoretriever-table-structure-v1 + tag: "1.2.0" + podSecurityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + replicaCount: 1 + serviceAccount: + create: false + name: "" + statefuleSet: + enabled: false + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 10 + metrics: [] service: - name: nv-ingest-cached + type: "ClusterIP" + name: nvidia-nim-nemoretriever-table-structure-v1 + httpPort: 8000 grpcPort: 8001 + metricsPort: 0 # Generally unused and defaults to 0 nim: grpcPort: 8001 + logLevel: "INFO" env: - name: NIM_HTTP_API_PORT value: "8000" -## @skip paddleocrDeployed -## @skip paddleocr-nim -## @extra paddleocr-nim.image.repository The repository to override the location of the Paddle OCR NIM -## @extra paddleocr-nim.image.tag The tag override for Paddle OCR NIM -paddleocrDeployed: true -paddleocr-nim: - fullnameOverride: nv-ingest-paddle +## @skip nim-vlm-image-captioning +## @param nim-vlm-image-captioning.deployed [default: false] true if the vlm-nim should be deployed and false otherwise +## @param nim-vlm-image-captioning.image.repository The repository to override the location of the nim-vlm +## @param nim-vlm-image-captioning.image.tag The tag override for nim-vlm +nim-vlm-image-captioning: + deployed: false + fullnameOverride: nim-vlm-image-captioning + customCommand: [] + customArgs: [] image: - repository: nvcr.io/nvidia/nemo-microservices/paddleocr - tag: "1.0.0" + repository: nvcr.io/nim/meta/llama-3.2-11b-vision-instruct + tag: "latest" + podSecurityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + replicaCount: 1 service: - name: nv-ingest-paddle + type: "ClusterIP" + name: nim-vlm-image-captioning + httpPort: 8000 + grpcPort: 8001 + metricsPort: 0 # Generally unused and defaults to 0 + nim: + grpcPort: 8001 + logLevel: "INFO" + env: + - name: NIM_HTTP_API_PORT + value: "8000" + +## @skip nim-vlm-text-extraction +## @param nim-vlm-text-extraction.deployed [default: false] true if the vlm-nim should be deployed and false otherwise +## @param nim-vlm-text-extraction.image.repository The repository to override the location of the nim-vlm +## @param nim-vlm-text-extraction.image.tag The tag override for nim-vlm +nim-vlm-text-extraction: + deployed: false + fullnameOverride: nim-vlm-text-extraction-nemoretriever-parse + customCommand: [] + customArgs: [] + image: + repository: nvcr.io/nvidia/nemo-microservices/nemoretriever-parse + tag: "1.2.0ea" + podSecurityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + replicaCount: 1 + service: + type: "ClusterIP" + name: nim-vlm-text-extraction-nemoretriever-parse + httpPort: 8000 grpcPort: 8001 + metricsPort: 0 # Generally unused and defaults to 0 nim: grpcPort: 8001 + logLevel: "INFO" env: - name: NIM_HTTP_API_PORT value: "8000" -## @skip deplotDeployed -## @skip deplot-nim -## @extra deplot-nim.image.repository The repository to override the location of the Deplot NIM -## @extra deplot-nim.image.tag The tag override for Deplot NIM -deplotDeployed: true -deplot-nim: - fullnameOverride: nv-ingest-deplot +## @skip paddleocr-nim +## @param paddleocr-nim.deployed [default: true] true if the paddleocr-nim should be deployed and false otherwise +## @param paddleocr-nim.image.repository The repository to override the location of the Paddle OCR NIM +## @param paddleocr-nim.image.tag The tag override for Paddle OCR NIM +paddleocr-nim: + paddleocr-nim.deployed: true + fullnameOverride: nv-ingest-paddle + customCommand: [] + customArgs: [] image: - repository: nvcr.io/nvidia/nemo-microservices/deplot + repository: nvcr.io/nvidia/nemo-microservices/paddleocr tag: "1.0.0" + podSecurityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + replicaCount: 1 + serviceAccount: + create: false + name: "" + statefuleSet: + enabled: false + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 10 + metrics: [] service: - name: nv-ingest-deplot + type: "ClusterIP" + name: nv-ingest-paddle + httpPort: 8000 grpcPort: 8001 + metricsPort: 0 # Generally unused and defaults to 0 nim: grpcPort: 8001 + logLevel: "INFO" env: - name: NIM_HTTP_API_PORT value: "8000" -## @skip embedqaDeployed -## @skip embedqa-nim -## @extra embedqa-nim.image.repository The repository to override the location of the embedqa NIM -## @extra embedqa-nim.image.tag The tag override for embedqa NIM -embedqaDeployed: false + +## @skip text-embedding-nim +## @param text-embedding-nim.deployed [default: false] true if the text-embedding-nim should be deployed and false otherwise +## @param text-embedding-nim.image.repository The repository to override the location of the text-embedding-nim +## @param text-embedding-nim.image.tag The tag override for text-embedding-nim text-embedding-nim: - fullnameOverride: nv-ingest-embedqa + deployed: false + fullnameOverride: nv-ingest-embedqa # Share name with nvidia-nim-llama-32-nv-embedqa-1b-v2 to ease configuration + customCommand: [] + customArgs: [] image: repository: nvcr.io/nim/nvidia/nv-embedqa-e5-v5 tag: "1.1.0" + podSecurityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + replicaCount: 1 + serviceAccount: + create: false + name: "" + statefuleSet: + enabled: false + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 10 + metrics: [] service: + type: "ClusterIP" name: nv-ingest-embedqa + httpPort: 8000 grpcPort: 8001 + metricsPort: 0 # Generally unused and defaults to 0 nim: grpcPort: 8001 + logLevel: "INFO" env: - name: NIM_HTTP_API_PORT - value: "8000" -## @skip nvEmbedqaDeployed + ## @skip nvidia-nim-llama-32-nv-embedqa-1b-v2 -## @extra nvidia-nim-llama-32-nv-embedqa-1b-v2.image.repository The repository to override the location of the nvEmbedqa NIM -## @extra nvidia-nim-llama-32-nv-embedqa-1b-v2.image.tag The tag override for nvEmbedqa NIM -nvEmbedqaDeployed: true +## @param nvidia-nim-llama-32-nv-embedqa-1b-v2.deployed [default: true] true if nvidia-nim-llama-32-nv-embedqa-1b-v2 should be deployed and false otherwise +## @param nvidia-nim-llama-32-nv-embedqa-1b-v2.image.repository The repository to override the location of the nvEmbedqa NIM +## @param nvidia-nim-llama-32-nv-embedqa-1b-v2.image.tag The tag override for nvEmbedqa NIM nvidia-nim-llama-32-nv-embedqa-1b-v2: - fullnameOverride: nv-ingest-embedqa + deployed: true + fullnameOverride: nv-ingest-embedqa # Share name with text-embedding-nim to ease configuration + customCommand: [] + customArgs: [] image: repository: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2 tag: "1.3.1" + podSecurityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + replicaCount: 1 + serviceAccount: + create: false + name: "" + statefuleSet: + enabled: false + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 10 + metrics: [] service: + type: "ClusterIP" name: nv-ingest-embedqa + httpPort: 8000 grpcPort: 8001 + metricsPort: 0 # Generally unused and defaults to 0 nim: grpcPort: 8001 + logLevel: "INFO" env: - name: NIM_HTTP_API_PORT - value: "8000" + + +## @skip llama-3.2-nv-rerankqa-1b-v2 +## @param llama-3.2-nv-rerankqa-1b-v2.deployed [default: true] true if llama-3.2-nv-rerankqa-1b-v2 should be deployed and false otherwise +## @param llama-3.2-nv-rerankqa-1b-v2.image.repository The repository to override the location of the reranker NIM +## @param llama-3.2-nv-rerankqa-1b-v2.image.tag The tag override for reranker NIM +llama-3.2-nv-rerankqa-1b-v2: + deployed: false + # fullnameOverride: llama-3.2-nv-rerankqa-1b-v2 + customCommand: [] + customArgs: [] + image: + repository: nvcr.io/nim/nvidia/llama-3.2-nv-rerankqa-1b-v2 + tag: "1.3.0" + podSecurityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + replicaCount: 1 + serviceAccount: + create: false + name: "" + statefuleSet: + enabled: false + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 10 + metrics: [] + service: + type: "ClusterIP" + name: llama-3.2-nv-rerankqa-1b-v2 + httpPort: 8000 + grpcPort: 8001 + metricsPort: 0 # Generally unused and defaults to 0 + nim: + grpcPort: 8001 + logLevel: "INFO" + env: + - name: NIM_HTTP_API_PORT + # Use this to load an image Pre NIM Factory via triton ## @skip triton @@ -276,12 +500,16 @@ redis: enabled: false replica: replicaCount: 1 + persistence: + size: "50Gi" resources: requests: memory: "6Gi" limits: memory: "12Gi" master: + persistence: + size: "50Gi" resources: requests: memory: "6Gi" @@ -323,23 +551,27 @@ envVars: MINIO_PUBLIC_ADDRESS: http://localhost:9000 MINIO_BUCKET: nv-ingest - CACHED_GRPC_ENDPOINT: nv-ingest-cached:8001 - CACHED_HTTP_ENDPOINT: http://nv-ingest-cached:8000/v1/infer - CACHED_INFER_ENDPOINT: grpc PADDLE_GRPC_ENDPOINT: nv-ingest-paddle:8001 PADDLE_HTTP_ENDPOINT: http://nv-ingest-paddle:8000/v1/infer PADDLE_INFER_PROTOCOL: grpc + NEMORETRIEVER_PARSE_HTTP_ENDPOINT: http://nim-vlm-text-extraction-nemoretriever-parse:8000/v1/chat/completions + NEMORETRIEVER_PARSE_INFER_PROTOCOL: http YOLOX_GRPC_ENDPOINT: nv-ingest-yolox:8001 YOLOX_HTTP_ENDPOINT: http://nv-ingest-yolox:8000/v1/infer YOLOX_INFER_PROTOCOL: grpc - DEPLOT_GRPC_ENDPOINT: "" - DEPLOT_HTTP_ENDPOINT: http://nv-ingest-deplot:8000/v1/chat/completions - DEPLOT_INFER_PROTOCOL: http - - EMBEDDING_NIM_ENDPOINT: "http://nv-ingest-embedding:8000/v1" + YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT: nvidia-nim-nemoretriever-graphic-elements-v1:8001 + YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT: http://nvidia-nim-nemoretriever-graphic-elements-v1:8000/v1/infer + YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL: http + YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT: -nim-nemoretriever-table-structure-v1:8001 + YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT: http://nvidia-nim-nemoretriever-table-structure-v1:8000/v1/infer + YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL: http + + EMBEDDING_NIM_ENDPOINT: "http://nv-ingest-embedqa:8000/v1" + EMBEDDING_NIM_MODEL_NAME: "nvidia/llama-3.2-nv-embedqa-1b-v2" MILVUS_ENDPOINT: "http://nv-ingest-milvus:19530" - VLM_CAPTION_ENDPOINT: "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions" + VLM_CAPTION_ENDPOINT: "http://nim-vlm-image-captioning:8000/v1/chat/completions" + VLM_CAPTION_MODEL_NAME: meta/llama-3.2-11b-vision-instruct ## @section Open Telemetry ## @descriptionStart @@ -476,7 +708,7 @@ ingress: livenessProbe: enabled: false httpGet: - path: /health + path: /v1/health/live port: http initialDelaySeconds: 120 periodSeconds: 10 @@ -484,26 +716,6 @@ livenessProbe: failureThreshold: 20 successThreshold: 1 -## @section Probe parameters -## @param startupProbe.enabled Enables `startupProbe`` -## @param startupProbe.httpGet.path `StartupProbe`` endpoint path -## @param startupProbe.httpGet.port `StartupProbe`` endpoint port -## @param startupProbe.initialDelaySeconds Initial delay seconds for `startupProbe` -## @param startupProbe.timeoutSeconds Timeout seconds for `startupProbe` -## @param startupProbe.periodSeconds Period seconds for `startupProbe` -## @param startupProbe.successThreshold Success threshold for `startupProbe` -## @param startupProbe.failureThreshold Failure threshold for `startupProbe` -startupProbe: - enabled: false - httpGet: - path: /health - port: http - initialDelaySeconds: 120 - periodSeconds: 30 - timeoutSeconds: 10 - failureThreshold: 220 - successThreshold: 1 - ## @section Probe parameters ## @param readinessProbe.enabled Enables `readinessProbe`` ## @param readinessProbe.httpGet.path `ReadinessProbe`` endpoint path @@ -516,7 +728,7 @@ startupProbe: readinessProbe: enabled: false httpGet: - path: /health + path: /v1/health/ready port: http initialDelaySeconds: 120 periodSeconds: 30 @@ -554,24 +766,30 @@ serviceAccount: ## @descriptionStart ## Manage the creation of secrets used by the helm chart ## @descriptionEnd -## @param ngcSecret.create Specifies whether to create the ngc api secret -## @param ngcSecret.password The password to use for the NGC Secret -ngcSecret: + +# ngcApi: +# # If set to false, the chart expects a secret with the name +# create: false +# password: "" + +## @param ngcApiSecret.create Specifies whether to create the ngc api secret +## @param ngcApiSecret.password The password to use for the NGC Secret +ngcApiSecret: # If set to false, the chart expects a secret with name ngc-api to exist in the namespace # credentials are needed. create: false password: "" -## @param imagePullSecret.create Specifies whether to create the NVCR Image Pull secret -## @param imagePullSecret.password The password to use for the NVCR Image Pull Secret -## @skip imagePullSecret.registry -## @skip imagePullSecret.name -## @skip imagePullSecret.username -imagePullSecret: +## @param ngcImagePullSecret.create Specifies whether to create the NVCR Image Pull secret +## @param ngcImagePullSecret.password The password to use for the NVCR Image Pull Secret +## @skip ngcImagePullSecret.registry +## @skip ngcImagePullSecret.name +## @skip ngcImagePullSecret.username +ngcImagePullSecret: create: false # Leave blank, if no imagePullSecret is needed. registry: "nvcr.io" - name: "nvcrimagepullsecret" + name: "ngcImagePullSecret" # If set to false, the chart expects either a imagePullSecret # with the name configured above to be present on the cluster or that no # credentials are needed. @@ -580,6 +798,7 @@ imagePullSecret: password: "" + ## @skip nemo nemo: userID: "1000"