Skip to content

Commit

Permalink
Make id field optional for filtering (#247)
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan Wolf <[email protected]>
  • Loading branch information
ryantwolf authored Sep 16, 2024
1 parent 8d9ba84 commit d0c92c9
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions nemo_curator/scripts/filter_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,10 @@ def main(args):

# Write scores to separate directory
if args.output_document_score_dir:
if args.id_field in filtered_dataset.df.columns:
if (
args.id_field is not None
and args.id_field in filtered_dataset.df.columns
):
output_df = filtered_dataset.df[[args.id_field, *score_fields]]
else:
output_df = filtered_dataset.df[score_fields]
Expand Down Expand Up @@ -207,7 +210,7 @@ def attach_args(
parser.add_argument(
"--id-field",
type=str,
required=True,
default=None,
help="The name of the field within each object of the dataset "
"file that assigns a unqiue ID to each document. "
"If this is specified and found within the object, a list of all "
Expand Down

0 comments on commit d0c92c9

Please sign in to comment.