diff --git a/docker-compose.yaml b/docker-compose.yaml index 8d9c307a..0429423d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -188,6 +188,7 @@ services: #- YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nv-yolox-page-elements-v1 #- YOLOX_INFER_PROTOCOL=http - VLM_CAPTION_ENDPOINT=https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions + - VLM_CAPTION_MODEL_NAME=meta/llama-3.2-90b-vision-instruct healthcheck: test: curl --fail http://nv-ingest-ms-runtime:7670/v1/health/ready || exit 1 interval: 10s diff --git a/src/nv_ingest/extraction_workflows/image/image_handlers.py b/src/nv_ingest/extraction_workflows/image/image_handlers.py index f7b12982..be94a47a 100644 --- a/src/nv_ingest/extraction_workflows/image/image_handlers.py +++ b/src/nv_ingest/extraction_workflows/image/image_handlers.py @@ -345,19 +345,6 @@ def image_data_extractor( # Future function for text extraction based on document_type logger.warning("Text extraction is not supported for raw images.") - # Image extraction stub - if extract_images: - # Placeholder for image-specific extraction process - extracted_data.append( - construct_image_metadata_from_base64( - numpy_to_base64(image_array), - page_idx=0, # Single image treated as one page - page_count=1, - source_metadata=source_metadata, - base_unified_metadata=base_unified_metadata, - ) - ) - # Table and chart extraction if extract_tables or extract_charts: try: @@ -366,8 +353,8 @@ def image_data_extractor( config=kwargs.get("image_extraction_config"), trace_info=trace_info, ) - logger.debug("Extracted table/chart data from image") - for _, table_chart_data in tables_and_charts[0]: + for item in tables_and_charts: + table_chart_data = item[1] extracted_data.append( construct_table_and_chart_metadata( table_chart_data, @@ -381,6 +368,19 @@ def image_data_extractor( logger.error(f"Error extracting tables/charts from image: {e}") raise + # Image extraction stub + if extract_images and not extracted_data: # It's not an unstructured image if we extracted a sturctured image + # Placeholder for image-specific extraction process + extracted_data.append( + construct_image_metadata_from_base64( + numpy_to_base64(image_array), + page_idx=0, # Single image treated as one page + page_count=1, + source_metadata=source_metadata, + base_unified_metadata=base_unified_metadata, + ) + ) + logger.debug(f"Extracted {len(extracted_data)} items from the image.") return extracted_data diff --git a/src/nv_ingest/service/impl/ingest/redis_ingest_service.py b/src/nv_ingest/service/impl/ingest/redis_ingest_service.py index 737231f5..7301ff31 100644 --- a/src/nv_ingest/service/impl/ingest/redis_ingest_service.py +++ b/src/nv_ingest/service/impl/ingest/redis_ingest_service.py @@ -68,7 +68,11 @@ async def submit_job(self, job_spec: MessageWrapper, trace_id: str) -> str: for task in tasks: task_prop = task["task_properties"] - task_prop_dict = task_prop.dict() + if not isinstance(task_prop, dict): + logger.debug(f"Task properties are not a dictionary: {tasks}") + task_prop_dict = task_prop.model_dump() + else: + task_prop_dict = task_prop task["task_properties"] = task_prop_dict updated_tasks.append(task) diff --git a/src/nv_ingest/util/flow_control/filter_by_task.py b/src/nv_ingest/util/flow_control/filter_by_task.py index c5be609c..02e2dce8 100644 --- a/src/nv_ingest/util/flow_control/filter_by_task.py +++ b/src/nv_ingest/util/flow_control/filter_by_task.py @@ -87,9 +87,9 @@ def _is_subset(superset, subset): # The subset is a regex pattern pattern = subset[len("regex:") :] if isinstance(superset, list): - return any(re.match(pattern, str(sup_item)) for sup_item in superset) + return any(re.match(pattern, sup_item) for sup_item in superset) else: - return re.match(pattern, str(superset)) is not None + return re.match(pattern, superset) is not None if isinstance(superset, list) and not isinstance(subset, list): # Check if the subset value matches any item in the superset return any(_is_subset(sup_item, subset) for sup_item in superset) diff --git a/src/nv_ingest/util/pipeline/stage_builders.py b/src/nv_ingest/util/pipeline/stage_builders.py index 7dd80f3a..4f9f152d 100644 --- a/src/nv_ingest/util/pipeline/stage_builders.py +++ b/src/nv_ingest/util/pipeline/stage_builders.py @@ -384,12 +384,14 @@ def add_image_caption_stage(pipe, morpheus_pipeline_config, ingest_config, defau ) endpoint_url = os.environ.get("VLM_CAPTION_ENDPOINT", "localhost:5000") + model_name = os.environ.get("VLM_CAPTION_MODEL_NAME", "meta/nv-llama-3.2-90b-vision-instruct") image_caption_config = ingest_config.get( "image_caption_extraction_module", { "api_key": auth_token, "endpoint_url": endpoint_url, + "model_name": model_name, "prompt": "Caption the content of this image:", }, )