diff --git a/client/src/nv_ingest_client/util/milvus.py b/client/src/nv_ingest_client/util/milvus.py index db98529a..27ea53ab 100644 --- a/client/src/nv_ingest_client/util/milvus.py +++ b/client/src/nv_ingest_client/util/milvus.py @@ -363,9 +363,9 @@ def _pull_text(element, enable_text: bool, enable_charts: bool, enable_tables: b pg_num = element["metadata"]["content_metadata"]["page_number"] doc_type = element["document_type"] if not verify_emb: - logger.error(f"failed to find embedding for entity: {source_name} page: {pg_num} type: {doc_type}") + logger.info(f"failed to find embedding for entity: {source_name} page: {pg_num} type: {doc_type}") if not text: - logger.error(f"failed to find text for entity: {source_name} page: {pg_num} type: {doc_type}") + logger.info(f"failed to find text for entity: {source_name} page: {pg_num} type: {doc_type}") # if we do find text but no embedding remove anyway text = None return text @@ -387,7 +387,7 @@ def _insert_location_into_content_metadata(element, enable_charts: bool, enable_ source_name = element["metadata"]["source_metadata"]["source_name"] pg_num = element["metadata"]["content_metadata"]["page_number"] doc_type = element["document_type"] - logger.error(f"failed to find location for entity: {source_name} page: {pg_num} type: {doc_type}") + logger.info(f"failed to find location for entity: {source_name} page: {pg_num} type: {doc_type}") location = max_dimensions = None element["metadata"]["content_metadata"]["location"] = location element["metadata"]["content_metadata"]["max_dimensions"] = max_dimensions @@ -408,6 +408,8 @@ def write_records_minio( If a sparse model is supplied, it will be used to generate sparse embeddings to allow for hybrid search. Will filter records based on type, depending on what types are enabled via the boolean parameters. + If the user sets the log level to info, any time a record fails + ingestion, it will be reported to the user. Parameters ---------- @@ -495,7 +497,9 @@ def create_bm25_model( """ This function takes the input records and creates a corpus, factoring in filters (i.e. texts, charts, tables) and fits - a BM25 model with that information. + a BM25 model with that information. If the user sets the log + level to info, any time a record fails ingestion, it will be + reported to the user. Parameters ---------- @@ -543,7 +547,9 @@ def stream_insert_milvus( """ This function takes the input records and creates a corpus, factoring in filters (i.e. texts, charts, tables) and fits - a BM25 model with that information. + a BM25 model with that information. If the user sets the log + level to info, any time a record fails ingestion, it will be + reported to the user. Parameters ----------