Fix regex task filtering regression and image extraction handling (#383)

NVIDIA · Jan 28, 2025 · f8c2e8c · f8c2e8c
1 parent 522b5d1
commit f8c2e8c
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 18 deletions.
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -188,6 +188,7 @@ services:
       #- YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nv-yolox-page-elements-v1
       #- YOLOX_INFER_PROTOCOL=http
       - VLM_CAPTION_ENDPOINT=https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions
+      - VLM_CAPTION_MODEL_NAME=meta/llama-3.2-90b-vision-instruct
     healthcheck:
       test: curl --fail http://nv-ingest-ms-runtime:7670/v1/health/ready || exit 1
       interval: 10s

diff --git a/src/nv_ingest/extraction_workflows/image/image_handlers.py b/src/nv_ingest/extraction_workflows/image/image_handlers.py
@@ -345,19 +345,6 @@ def image_data_extractor(
         # Future function for text extraction based on document_type
         logger.warning("Text extraction is not supported for raw images.")
 
-    # Image extraction stub
-    if extract_images:
-        # Placeholder for image-specific extraction process
-        extracted_data.append(
-            construct_image_metadata_from_base64(
-                numpy_to_base64(image_array),
-                page_idx=0,  # Single image treated as one page
-                page_count=1,
-                source_metadata=source_metadata,
-                base_unified_metadata=base_unified_metadata,
-            )
-        )
-
     # Table and chart extraction
     if extract_tables or extract_charts:
         try:
@@ -366,8 +353,8 @@ def image_data_extractor(
                 config=kwargs.get("image_extraction_config"),
                 trace_info=trace_info,
             )
-            logger.debug("Extracted table/chart data from image")
-            for _, table_chart_data in tables_and_charts[0]:
+            for item in tables_and_charts:
+                table_chart_data = item[1]
                 extracted_data.append(
                     construct_table_and_chart_metadata(
                         table_chart_data,
@@ -381,6 +368,19 @@ def image_data_extractor(
             logger.error(f"Error extracting tables/charts from image: {e}")
             raise
 
+        # Image extraction stub
+    if extract_images and not extracted_data:  # It's not an unstructured image if we extracted a sturctured image
+        # Placeholder for image-specific extraction process
+        extracted_data.append(
+            construct_image_metadata_from_base64(
+                numpy_to_base64(image_array),
+                page_idx=0,  # Single image treated as one page
+                page_count=1,
+                source_metadata=source_metadata,
+                base_unified_metadata=base_unified_metadata,
+            )
+        )
+
     logger.debug(f"Extracted {len(extracted_data)} items from the image.")
 
     return extracted_data
diff --git a/src/nv_ingest/service/impl/ingest/redis_ingest_service.py b/src/nv_ingest/service/impl/ingest/redis_ingest_service.py
@@ -68,7 +68,11 @@ async def submit_job(self, job_spec: MessageWrapper, trace_id: str) -> str:
 
             for task in tasks:
                 task_prop = task["task_properties"]
-                task_prop_dict = task_prop.dict()
+                if not isinstance(task_prop, dict):
+                    logger.debug(f"Task properties are not a dictionary: {tasks}")
+                    task_prop_dict = task_prop.model_dump()
+                else:
+                    task_prop_dict = task_prop
                 task["task_properties"] = task_prop_dict
                 updated_tasks.append(task)
 

diff --git a/src/nv_ingest/util/flow_control/filter_by_task.py b/src/nv_ingest/util/flow_control/filter_by_task.py
@@ -87,9 +87,9 @@ def _is_subset(superset, subset):
         # The subset is a regex pattern
         pattern = subset[len("regex:") :]
         if isinstance(superset, list):
-            return any(re.match(pattern, str(sup_item)) for sup_item in superset)
+            return any(re.match(pattern, sup_item) for sup_item in superset)
         else:
-            return re.match(pattern, str(superset)) is not None
+            return re.match(pattern, superset) is not None
     if isinstance(superset, list) and not isinstance(subset, list):
         # Check if the subset value matches any item in the superset
         return any(_is_subset(sup_item, subset) for sup_item in superset)

diff --git a/src/nv_ingest/util/pipeline/stage_builders.py b/src/nv_ingest/util/pipeline/stage_builders.py
@@ -384,12 +384,14 @@ def add_image_caption_stage(pipe, morpheus_pipeline_config, ingest_config, defau
     )
 
     endpoint_url = os.environ.get("VLM_CAPTION_ENDPOINT", "localhost:5000")
+    model_name = os.environ.get("VLM_CAPTION_MODEL_NAME", "meta/nv-llama-3.2-90b-vision-instruct")
 
     image_caption_config = ingest_config.get(
         "image_caption_extraction_module",
         {
             "api_key": auth_token,
             "endpoint_url": endpoint_url,
+            "model_name": model_name,
             "prompt": "Caption the content of this image:",
         },
     )