Skip to content

Commit

Permalink
Add content metadata to vdb upload (#161)
Browse files Browse the repository at this point in the history
Co-authored-by: Chris Jarrett <[email protected]>
  • Loading branch information
ChrisJar and Chris Jarrett authored Oct 16, 2024
1 parent 18a0b70 commit ac81779
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/nv_ingest/modules/sinks/vdb_task_sink.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,16 +211,18 @@ def extract_df(ctrl_msg: ControlMessage, filter_errors: bool):

mdf["embedding"] = mdf["metadata"].struct.field("embedding")
mdf["_source_metadata"] = mdf["metadata"].struct.field("source_metadata")
mdf["_content_metadata"] = mdf["metadata"].struct.field("content_metadata")
df = mdf[mdf["_contains_embeddings"]].copy()

df = df[
[
"embedding",
"_content",
"_source_metadata",
"_content_metadata",
]
]
df.columns = ["vector", "text", "source"]
df.columns = ["vector", "text", "source", "content_metadata"]

return df, resource_name

Expand Down
5 changes: 5 additions & 0 deletions src/nv_ingest/schemas/vdb_task_sink_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def build_default_milvus_config(embedding_size: int = 1024) -> typing.Dict[str,
dtype=pymilvus.DataType.JSON,
description="Source document and raw data extracted content",
).to_dict(),
pymilvus.FieldSchema(
name="content_metadata",
dtype=pymilvus.DataType.JSON,
description="Content metadata",
).to_dict(),
],
"description": "NV-INGEST collection schema",
},
Expand Down

0 comments on commit ac81779

Please sign in to comment.