From cace5cbacc32315b01eefdd6c1c50ba619dc891b Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Wed, 12 Feb 2025 15:01:11 -0800 Subject: [PATCH 1/4] Enable PyTests for Nemotron-CC classifiers Signed-off-by: Sarah Yurick --- tests/test_classifiers.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/test_classifiers.py b/tests/test_classifiers.py index d6d2852c..6d48b3de 100644 --- a/tests/test_classifiers.py +++ b/tests/test_classifiers.py @@ -139,9 +139,6 @@ def test_fineweb_edu_classifier(gpu_client, domain_dataset): assert result_pred.equals(expected_pred) -@pytest.mark.skip( - reason="Skipping until https://huggingface.co/nvidia/nemocurator-fineweb-mixtral-edu-classifier is published" -) @pytest.mark.gpu def test_fineweb_mixtral_classifier(gpu_client, domain_dataset): from nemo_curator.classifiers import FineWebMixtralEduClassifier @@ -155,9 +152,6 @@ def test_fineweb_mixtral_classifier(gpu_client, domain_dataset): assert result_pred.equals(expected_pred) -@pytest.mark.skip( - reason="Skipping until https://huggingface.co/nvidia/nemocurator-fineweb-nemotron-4-edu-classifier is published" -) @pytest.mark.gpu def test_fineweb_nemotron_classifier(gpu_client, domain_dataset): from nemo_curator.classifiers import FineWebNemotronEduClassifier From 571350cb26f55aec15914209ad1fda17fb1eed74 Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Wed, 12 Feb 2025 16:06:10 -0800 Subject: [PATCH 2/4] Update copyright year Signed-off-by: Sarah Yurick --- .../classifiers/fineweb_mixtral_edu_classifier_inference.py | 2 +- .../classifiers/fineweb_nemotron_edu_classifier_inference.py | 2 +- pyproject.toml | 2 +- tests/test_classifiers.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nemo_curator/scripts/classifiers/fineweb_mixtral_edu_classifier_inference.py b/nemo_curator/scripts/classifiers/fineweb_mixtral_edu_classifier_inference.py index 582ec4c5..756584f3 100644 --- a/nemo_curator/scripts/classifiers/fineweb_mixtral_edu_classifier_inference.py +++ b/nemo_curator/scripts/classifiers/fineweb_mixtral_edu_classifier_inference.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/nemo_curator/scripts/classifiers/fineweb_nemotron_edu_classifier_inference.py b/nemo_curator/scripts/classifiers/fineweb_nemotron_edu_classifier_inference.py index 112453a2..d58867ce 100644 --- a/nemo_curator/scripts/classifiers/fineweb_nemotron_edu_classifier_inference.py +++ b/nemo_curator/scripts/classifiers/fineweb_nemotron_edu_classifier_inference.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/pyproject.toml b/pyproject.toml index 56e0fd9e..ec270713 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test_classifiers.py b/tests/test_classifiers.py index 6d48b3de..81b1112e 100644 --- a/tests/test_classifiers.py +++ b/tests/test_classifiers.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 0191cbafb0aaaf1c885d63402efc3e07ebe54349 Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Fri, 14 Feb 2025 11:18:15 -0800 Subject: [PATCH 3/4] revert pyproject toml change Signed-off-by: Sarah Yurick --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ec270713..56e0fd9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 0d19bf55571f7503b80f4269477af2f0f7cf5f5f Mon Sep 17 00:00:00 2001 From: Sarah Yurick Date: Tue, 18 Feb 2025 11:00:15 -0800 Subject: [PATCH 4/4] push Signed-off-by: Sarah Yurick --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 56e0fd9e..ec270713 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.