Skip to content

Commit

Permalink
change log level on record ingestion failures
Browse files Browse the repository at this point in the history
  • Loading branch information
jperez999 committed Feb 18, 2025
1 parent ae6b2ff commit 45aa82d
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions client/src/nv_ingest_client/util/milvus.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,9 @@ def _pull_text(element, enable_text: bool, enable_charts: bool, enable_tables: b
pg_num = element["metadata"]["content_metadata"]["page_number"]
doc_type = element["document_type"]
if not verify_emb:
logger.error(f"failed to find embedding for entity: {source_name} page: {pg_num} type: {doc_type}")
logger.info(f"failed to find embedding for entity: {source_name} page: {pg_num} type: {doc_type}")
if not text:
logger.error(f"failed to find text for entity: {source_name} page: {pg_num} type: {doc_type}")
logger.info(f"failed to find text for entity: {source_name} page: {pg_num} type: {doc_type}")
# if we do find text but no embedding remove anyway
text = None
return text
Expand All @@ -387,7 +387,7 @@ def _insert_location_into_content_metadata(element, enable_charts: bool, enable_
source_name = element["metadata"]["source_metadata"]["source_name"]
pg_num = element["metadata"]["content_metadata"]["page_number"]
doc_type = element["document_type"]
logger.error(f"failed to find location for entity: {source_name} page: {pg_num} type: {doc_type}")
logger.info(f"failed to find location for entity: {source_name} page: {pg_num} type: {doc_type}")
location = max_dimensions = None
element["metadata"]["content_metadata"]["location"] = location
element["metadata"]["content_metadata"]["max_dimensions"] = max_dimensions
Expand All @@ -408,6 +408,8 @@ def write_records_minio(
If a sparse model is supplied, it will be used to generate sparse
embeddings to allow for hybrid search. Will filter records based on
type, depending on what types are enabled via the boolean parameters.
If the user sets the log level to info, any time a record fails
ingestion, it will be reported to the user.
Parameters
----------
Expand Down Expand Up @@ -495,7 +497,9 @@ def create_bm25_model(
"""
This function takes the input records and creates a corpus,
factoring in filters (i.e. texts, charts, tables) and fits
a BM25 model with that information.
a BM25 model with that information. If the user sets the log
level to info, any time a record fails ingestion, it will be
reported to the user.
Parameters
----------
Expand Down Expand Up @@ -543,7 +547,9 @@ def stream_insert_milvus(
"""
This function takes the input records and creates a corpus,
factoring in filters (i.e. texts, charts, tables) and fits
a BM25 model with that information.
a BM25 model with that information. If the user sets the log
level to info, any time a record fails ingestion, it will be
reported to the user.
Parameters
----------
Expand Down

0 comments on commit 45aa82d

Please sign in to comment.