From 4fea6951abc9b91009851aee9df2874da433cf99 Mon Sep 17 00:00:00 2001 From: rxu17 <26471741+rxu17@users.noreply.github.com> Date: Tue, 7 Nov 2023 13:39:03 -0800 Subject: [PATCH] remove NA check in _check_allele_col --- genie_registry/maf.py | 8 -------- tests/test_maf.py | 22 +--------------------- 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/genie_registry/maf.py b/genie_registry/maf.py index c53e2529..62d7bdd1 100644 --- a/genie_registry/maf.py +++ b/genie_registry/maf.py @@ -47,14 +47,6 @@ def _check_allele_col(df, col): error = "" warning = "" if col_exist: - # CHECK: The value "NA" can't be used as a placeholder - if sum(df[col].fillna("") == "NA") > 0: - warning = ( - "maf: " - f"{col} column contains 'NA' values, " - "which cannot be placeholders for blank values. " - "Please put in empty strings for blank values.\n" - ) # CHECK: There can't be any null values if sum(df[col].isnull()) > 0: error = f"maf: {col} can't have any blank or null values.\n" diff --git a/tests/test_maf.py b/tests/test_maf.py index 541e6be8..309819e1 100644 --- a/tests/test_maf.py +++ b/tests/test_maf.py @@ -165,10 +165,6 @@ def test_errors_validation(maf_class): "Does not have the column headers that can give " "extra information to the processed maf: " "T_REF_COUNT, N_DEPTH.\n" - "maf: " - "REFERENCE_ALLELE column contains 'NA' values, " - "which cannot be placeholders for blank values. " - "Please put in empty strings for blank values.\n" ) assert error == expectedErrors @@ -213,9 +209,6 @@ def test_invalid_validation(maf_class): "This is the list of accepted allele values that can only appear individually: -\n" ) expectedWarnings = ( - "maf: TUMOR_SEQ_ALLELE2 column contains 'NA' values, " - "which cannot be placeholders for blank values. " - "Please put in empty strings for blank values.\n" "maf: Does not have the column headers that can give " "extra information to the processed maf: T_REF_COUNT.\n" ) @@ -226,25 +219,12 @@ def test_invalid_validation(maf_class): @pytest.mark.parametrize("col", ["temp", "REFERENCE_ALLELE"]) def test_noerror__check_allele_col(col): """Test error and warning is an empty string if REF col isn't passed in""" - df = pd.DataFrame(dict(REFERENCE_ALLELE=["A", "A"])) + df = pd.DataFrame(dict(REFERENCE_ALLELE=["NA", "A"])) error, warning = genie_registry.maf._check_allele_col(df, col) assert error == "" assert warning == "" -def test_warning__check_allele_col(): - """Test warning occurs when 'NA' string is passed in""" - df = pd.DataFrame(dict(TEMP=["NA", "A"])) - error, warning = genie_registry.maf._check_allele_col(df, "TEMP") - assert error == "" - assert warning == ( - "maf: " - "TEMP column contains 'NA' values, " - "which cannot be placeholders for blank values. " - "Please put in empty strings for blank values.\n" - ) - - def test_error__check_allele_col(): """Test error occurs when blank allele is passed in""" df = pd.DataFrame(dict(TEMP=[float("nan"), "A"]))