From 780ee66b8f303534cbba17e2a649d6351b0ef2fa Mon Sep 17 00:00:00 2001 From: rxu17 <26471741+rxu17@users.noreply.github.com> Date: Tue, 7 Nov 2023 14:38:13 -0800 Subject: [PATCH] linting, standardizing --- genie_registry/maf.py | 4 ++-- genie_registry/vcf.py | 37 +++++++++++++++++++------------------ tests/test_maf.py | 8 +++++++- 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/genie_registry/maf.py b/genie_registry/maf.py index 62d7bdd1..3adcc2ce 100644 --- a/genie_registry/maf.py +++ b/genie_registry/maf.py @@ -64,7 +64,7 @@ class maf(FileTypeFormat): _process_kwargs = [] _allele_cols = ["REFERENCE_ALLELE", "TUMOR_SEQ_ALLELE1", "TUMOR_SEQ_ALLELE2"] _allowed_comb_alleles = ["A", "T", "C", "G", "N"] - _allowed_ind_alleles = ['-'] + _allowed_ind_alleles = ["-"] def _validateFilename(self, filePath): """ @@ -298,7 +298,7 @@ def _validate(self, mutationDF): allowed_comb_alleles=self._allowed_comb_alleles, allowed_ind_alleles=self._allowed_ind_alleles, ignore_case=True, - allow_na=False + allow_na=False, ) errors, warnings = validate.get_allele_validation_message( invalid_indices, diff --git a/genie_registry/vcf.py b/genie_registry/vcf.py index fe430b84..cf381086 100644 --- a/genie_registry/vcf.py +++ b/genie_registry/vcf.py @@ -18,7 +18,7 @@ class vcf(FileTypeFormat): _fileType = "vcf" _process_kwargs = [] - _allele_col = "REF" + _allele_cols = ["REF"] _allowed_comb_alleles = ["A", "T", "C", "G", "N"] _allowed_ind_alleles = [] @@ -140,24 +140,25 @@ def _validate(self, vcfdf): total_error += error warning += warn - if process_functions.checkColExist(vcfdf, self._allele_col): - invalid_indices = validate.get_invalid_allele_rows( - vcfdf, - input_col=self._allele_col, - allowed_comb_alleles=self._allowed_comb_alleles, - allowed_ind_alleles=self._allowed_ind_alleles, - ignore_case=True, - allow_na=False + for allele_col in self._allele_cols: + if process_functions.checkColExist(vcfdf, allele_col): + invalid_indices = validate.get_invalid_allele_rows( + vcfdf, + input_col=allele_col, + allowed_comb_alleles=self._allowed_comb_alleles, + allowed_ind_alleles=self._allowed_ind_alleles, + ignore_case=True, + allow_na=False, ) - errors, warnings = validate.get_allele_validation_message( - invalid_indices, - invalid_col=self._allele_col, - allowed_comb_alleles=self._allowed_comb_alleles, - allowed_ind_alleles=self._allowed_ind_alleles, - fileformat=self._fileType, - ) - total_error += errors - warning += warnings + errors, warnings = validate.get_allele_validation_message( + invalid_indices, + invalid_col=allele_col, + allowed_comb_alleles=self._allowed_comb_alleles, + allowed_ind_alleles=self._allowed_ind_alleles, + fileformat=self._fileType, + ) + total_error += errors + warning += warnings # No white spaces white_space = vcfdf.apply(lambda x: contains_whitespace(x), axis=1) diff --git a/tests/test_maf.py b/tests/test_maf.py index 309819e1..71d61e64 100644 --- a/tests/test_maf.py +++ b/tests/test_maf.py @@ -19,7 +19,13 @@ def valid_maf_df(): dict( CHROMOSOME=[1, 2, 3, 4, 5], START_POSITION=[1, 2, 3, 4, 2], - REFERENCE_ALLELE=["A", "A", "A", "A", "A"], + REFERENCE_ALLELE=[ + "C", + "G", + "NA", + "-", + "TAAAGATCGTACAGAA", + ], TUMOR_SAMPLE_BARCODE=[ "GENIE-SAGE-ID1-1", "GENIE-SAGE-ID1-1",