From 7b6c2b337061710eecd33af99340344495ccc08b Mon Sep 17 00:00:00 2001 From: rxu17 <26471741+rxu17@users.noreply.github.com> Date: Wed, 24 Jan 2024 15:39:51 -0800 Subject: [PATCH] remove use of create_missing_column function, just subset on required release cols --- genie/database_to_staging.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/genie/database_to_staging.py b/genie/database_to_staging.py index 55f046bf..0b76b6b6 100644 --- a/genie/database_to_staging.py +++ b/genie/database_to_staging.py @@ -96,11 +96,10 @@ "SIFT_Prediction": "string", "SIFT_Score": "float", "SWISSPROT": "float", - # "genomic_location_explanation": "string", "n_depth": "float", "t_depth": "float", - "mutationInCis_Flag": "boolean" - # "Annotation_Status": "string" + "mutationInCis_Flag": "boolean", + "Annotation_Status": "string" } @@ -823,7 +822,6 @@ def store_maf_files( with open(MUTATIONS_CENTER_PATH % center, "w"): pass used_entities = [] - # Must get the headers (because can't assume headers are the same order) maf_ent = syn.get(centerMafSynIdsDf.id[0]) for _, mafSynId in enumerate(centerMafSynIdsDf.id): maf_ent = syn.get(mafSynId) @@ -842,9 +840,7 @@ def store_maf_files( configured_mafdf = configure_maf( mafchunk, remove_mafinbed_variants, flagged_mutationInCis_variants ) - configured_mafdf = process_functions.create_missing_columns( - dataset=configured_mafdf, schema=FULL_MAF_RELEASE_SCHEMA - ) + configured_mafdf = configured_mafdf[list(FULL_MAF_RELEASE_SCHEMA.keys())] # Create maf for release merged_mafdf = remove_maf_samples( configured_mafdf, keep_for_merged_consortium_samples