Skip to content

Commit

Permalink
Abstraction for mapping in gff_getevidence_map.py
Browse files Browse the repository at this point in the history
As GET-Evidence data is copied and outputted in gff_getevidence_map.py
not all data is retained, and some keys need to be renamed to be
consistent with what genome_display.php expects. Because the list gets
used twice in the code, I've moved it into a method.
  • Loading branch information
Madeleine Price Ball committed Aug 21, 2012
1 parent 49b61f8 commit d8aef40
Showing 1 changed file with 28 additions and 20 deletions.
48 changes: 28 additions & 20 deletions server/gff_getevidence_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,30 @@
from config_names import GENETESTS_DATA
from utils.substitution_matrix import blosum100

def get_name_map():
"""Return mapping of GET-Evidence data
Keys are keys for data in the /public/getev-latest.json flat file.
Values are keys to used in the get-evidence.json file outputed for a given
genome, to be interpreted by genome_display.php.
"""
name_map = { 'overall_frequency_n': 'num',
'overall_frequency_d': 'denom',
'pph2_score': 'pph2_score',
'impact': 'variant_impact',
'summary_short': 'summary_short',
'quality_scores': 'quality_scores',
'variant_quality': 'variant_quality',
'inheritance': 'variant_dominance',
'variant_id': 'variant_id',
'n_articles': 'n_articles',
'n_web_pos': 'n_web_pos',
'n_web_uneval': 'n_web_uneval',
'n_web_neg': 'n_web_neg',
'suff_eval': 'suff_eval',
}
return name_map

def read_getev_flat(getev_flatfile):
"""Load GET-Evidence data into two dicts, which are returned.
Expand All @@ -37,11 +61,9 @@ def read_getev_flat(getev_flatfile):
f_in = open(getev_flatfile)

# Pull only these items from the GET-Evidence json data:
items_wanted = ['gene', 'aa_change_short', 'summary_short', 'impact', \
'inheritance', 'dbsnp_id', 'in_omim', 'in_gwas', 'in_pharmgkb', \
'quality_scores', \
'variant_quality', 'overall_frequency_n', 'overall_frequency_d', \
'pph2_score', 'n_articles', 'variant_id']
items_wanted = get_name_map().keys() + ['gene',
'aa_change_short',
'dbsnp_id']

# Create two dicts to be returned, storing data we want.
# If possible, getev_by_aa is used with gene and amino acid change as key.
Expand Down Expand Up @@ -122,21 +144,7 @@ def read_transcripts(transcript_file):

def copy_output_data(getev_data, output_data):
"""Copy data to output using names recognized by genome_display.php"""
name_map = { 'overall_frequency_n': 'num',
'overall_frequency_d': 'denom',
'pph2_score': 'pph2_score',
'impact': 'variant_impact',
'summary_short': 'summary_short',
'quality_scores': 'quality_scores',
'variant_quality': 'variant_quality',
'inheritance': 'variant_dominance',
'variant_id': 'variant_id',
'n_articles': 'n_articles',
'n_web_pos': 'n_web_pos',
'n_web_uneval': 'n_web_uneval',
'n_web_neg': 'n_web_neg',
'suff_eval': 'suff_eval',
}
name_map = get_name_map()
for name in name_map:
if name in getev_data:
output_data[name_map[name]] = getev_data[name]
Expand Down

0 comments on commit d8aef40

Please sign in to comment.