Skip to content

Commit

Permalink
Fix regex task filtering regression and image extraction handling (#383)
Browse files Browse the repository at this point in the history
  • Loading branch information
drobison00 authored Jan 28, 2025
1 parent 522b5d1 commit f8c2e8c
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 18 deletions.
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ services:
#- YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nv-yolox-page-elements-v1
#- YOLOX_INFER_PROTOCOL=http
- VLM_CAPTION_ENDPOINT=https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions
- VLM_CAPTION_MODEL_NAME=meta/llama-3.2-90b-vision-instruct
healthcheck:
test: curl --fail http://nv-ingest-ms-runtime:7670/v1/health/ready || exit 1
interval: 10s
Expand Down
30 changes: 15 additions & 15 deletions src/nv_ingest/extraction_workflows/image/image_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,19 +345,6 @@ def image_data_extractor(
# Future function for text extraction based on document_type
logger.warning("Text extraction is not supported for raw images.")

# Image extraction stub
if extract_images:
# Placeholder for image-specific extraction process
extracted_data.append(
construct_image_metadata_from_base64(
numpy_to_base64(image_array),
page_idx=0, # Single image treated as one page
page_count=1,
source_metadata=source_metadata,
base_unified_metadata=base_unified_metadata,
)
)

# Table and chart extraction
if extract_tables or extract_charts:
try:
Expand All @@ -366,8 +353,8 @@ def image_data_extractor(
config=kwargs.get("image_extraction_config"),
trace_info=trace_info,
)
logger.debug("Extracted table/chart data from image")
for _, table_chart_data in tables_and_charts[0]:
for item in tables_and_charts:
table_chart_data = item[1]
extracted_data.append(
construct_table_and_chart_metadata(
table_chart_data,
Expand All @@ -381,6 +368,19 @@ def image_data_extractor(
logger.error(f"Error extracting tables/charts from image: {e}")
raise

# Image extraction stub
if extract_images and not extracted_data: # It's not an unstructured image if we extracted a sturctured image
# Placeholder for image-specific extraction process
extracted_data.append(
construct_image_metadata_from_base64(
numpy_to_base64(image_array),
page_idx=0, # Single image treated as one page
page_count=1,
source_metadata=source_metadata,
base_unified_metadata=base_unified_metadata,
)
)

logger.debug(f"Extracted {len(extracted_data)} items from the image.")

return extracted_data
6 changes: 5 additions & 1 deletion src/nv_ingest/service/impl/ingest/redis_ingest_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,11 @@ async def submit_job(self, job_spec: MessageWrapper, trace_id: str) -> str:

for task in tasks:
task_prop = task["task_properties"]
task_prop_dict = task_prop.dict()
if not isinstance(task_prop, dict):
logger.debug(f"Task properties are not a dictionary: {tasks}")
task_prop_dict = task_prop.model_dump()
else:
task_prop_dict = task_prop
task["task_properties"] = task_prop_dict
updated_tasks.append(task)

Expand Down
4 changes: 2 additions & 2 deletions src/nv_ingest/util/flow_control/filter_by_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def _is_subset(superset, subset):
# The subset is a regex pattern
pattern = subset[len("regex:") :]
if isinstance(superset, list):
return any(re.match(pattern, str(sup_item)) for sup_item in superset)
return any(re.match(pattern, sup_item) for sup_item in superset)
else:
return re.match(pattern, str(superset)) is not None
return re.match(pattern, superset) is not None
if isinstance(superset, list) and not isinstance(subset, list):
# Check if the subset value matches any item in the superset
return any(_is_subset(sup_item, subset) for sup_item in superset)
Expand Down
2 changes: 2 additions & 0 deletions src/nv_ingest/util/pipeline/stage_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,12 +384,14 @@ def add_image_caption_stage(pipe, morpheus_pipeline_config, ingest_config, defau
)

endpoint_url = os.environ.get("VLM_CAPTION_ENDPOINT", "localhost:5000")
model_name = os.environ.get("VLM_CAPTION_MODEL_NAME", "meta/nv-llama-3.2-90b-vision-instruct")

image_caption_config = ingest_config.get(
"image_caption_extraction_module",
{
"api_key": auth_token,
"endpoint_url": endpoint_url,
"model_name": model_name,
"prompt": "Caption the content of this image:",
},
)
Expand Down

0 comments on commit f8c2e8c

Please sign in to comment.