Skip to content

Commit

Permalink
Merge pull request #22 from alliance-genome/SCRUM-922
Browse files Browse the repository at this point in the history
SCRUM-922
  • Loading branch information
azurebrd authored Aug 24, 2022
2 parents a31df8b + b75f5cd commit 5714780
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/config/default.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
schemaVersion: 1.0.1.0
releaseVersion: 3.1.1
INTERACTION-SOURCE: [WB-MOL, FB-MOL, WB-GEN, FB-GEN, BIOGRID, BIOGRID-TAB, IMEX]
BGI: [FB, SGD, WB, ZFIN, RGD, MGI, HUMAN, SARS-CoV-2]
BGI: [FB, SGD, WB, ZFIN, RGD, MGI, HUMAN, XBXL, XBXT, SARS-CoV-2]

# Loader configuration values are below.
# Please modify with caution!
Expand Down
2 changes: 1 addition & 1 deletion src/config/validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ releaseVersion:
required: True
BGI:
type: list
allowed: [FB, SGD, WB, ZFIN, RGD, MGI, HUMAN, SARS-CoV-2]
allowed: [FB, SGD, WB, ZFIN, RGD, MGI, HUMAN, XBXT, XBXL, SARS-CoV-2]
INTERACTION-SOURCE:
type: list
allowed: [WB-GEN, FB-GEN, WB-MOL, FB-MOL, IMEX, BIOGRID, BIOGRID-TAB]
Expand Down
13 changes: 11 additions & 2 deletions src/processor/interaction_genetic_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def parse_bgi_json(self):
for xref in item['basicGeneticEntity']['crossReferences']:
cross_ref_record = None
cross_ref_prefix = None
if xref['id'].startswith('NCBI_Gene'):
if xref['id'].lower().startswith('ncbi_gene'):
# Modify the cross reference ID to match the PSI MITAB format if necessary.
# So far, this is just converting 'NCBI_Gene' to 'entrez gene/locuslink'.
cross_ref_prefix = 'NCBI_Gene'
Expand Down Expand Up @@ -302,6 +302,8 @@ def get_data(self):
'taxid:6239',
'taxid:559292',
'taxid:7955',
'taxid:8355',
'taxid:8364',
'taxid:7227')

possible_yeast_taxon_set = ('taxid:4932', 'taxid:307796', 'taxid:643680', 'taxid:574961', 'taxid:285006', 'taxid:545124', 'taxid:764097')
Expand Down Expand Up @@ -356,6 +358,7 @@ def get_data(self):
with open(self.output_dir + 'alliance_genetic_interactions.tsv', 'w', encoding='utf-8') as tsvout, \
open(self.output_dir + 'alliance_genetic_interactions_fly.tsv', 'w', encoding='utf-8') as fb_out, \
open(self.output_dir + 'alliance_genetic_interactions_worm.tsv', 'w', encoding='utf-8') as wb_out, \
open(self.output_dir + 'alliance_genetic_interactions_xenopus.tsv', 'w', encoding='utf-8') as xb_out, \
open(self.output_dir + 'alliance_genetic_interactions_zebrafish.tsv', 'w', encoding='utf-8') as zfin_out, \
open(self.output_dir + 'alliance_genetic_interactions_yeast.tsv', 'w', encoding='utf-8') as sgd_out, \
open(self.output_dir + 'alliance_genetic_interactions_rat.tsv', 'w', encoding='utf-8') as rgd_out, \
Expand All @@ -367,6 +370,7 @@ def get_data(self):
tsvout = csv.writer(tsvout, quotechar = '', quoting=csv.QUOTE_NONE, delimiter='\t')
fb_out = csv.writer(fb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
wb_out = csv.writer(wb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
xb_out = csv.writer(xb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
zfin_out = csv.writer(zfin_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
sgd_out = csv.writer(sgd_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
rgd_out = csv.writer(rgd_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
Expand All @@ -376,7 +380,7 @@ def get_data(self):
mapped_out = csv.writer(mapped_out, quotechar = '', quoting=csv.QUOTE_NONE, delimiter='\t')

# This list is now sorted phylogenetically for the header to be sorted
out_write_list = [human_out, rgd_out, mgi_out, zfin_out, fb_out, wb_out, sgd_out]
out_write_list = [human_out, rgd_out, mgi_out, xb_out, zfin_out, fb_out, wb_out, sgd_out]

taxon_file_dispatch_dict = {
'taxid:10116': rgd_out,
Expand All @@ -386,6 +390,8 @@ def get_data(self):
'taxid:559292': sgd_out,
'taxid:7955': zfin_out,
'taxid:7227': fb_out,
'taxid:8355': xb_out,
'taxid:8364': xb_out,
'taxid:4932': sgd_out,
'taxid:307796': sgd_out,
'taxid:643680': sgd_out,
Expand All @@ -400,6 +406,7 @@ def get_data(self):
human_out: 'Homo sapiens',
mgi_out: 'Mus musculus',
wb_out: 'Caenorhabditis elegans',
xb_out: 'Xenopus laevis',
sgd_out: 'Saccharomyces cerevisiae',
zfin_out: 'Danio rerio',
fb_out: 'Drosophila melanogaster'
Expand All @@ -410,6 +417,7 @@ def get_data(self):
human_out: 'NCBI:txid9606',
mgi_out: 'NCBI:txid10090',
wb_out: 'NCBI:txid6239',
xb_out: 'NCBI:txid8355',
sgd_out: 'NCBI:txid559292',
zfin_out: 'NCBI:txid7955',
fb_out: 'NCBI:txid7227'
Expand Down Expand Up @@ -640,6 +648,7 @@ def validate_and_upload_files_to_fms(self):
'alliance_genetic_interactions.tsv': 'COMBINED',
'alliance_genetic_interactions_fly.tsv': 'FB',
'alliance_genetic_interactions_worm.tsv': 'WB',
'alliance_genetic_interactions_xenopus.tsv': 'XB',
'alliance_genetic_interactions_zebrafish.tsv': 'ZFIN',
'alliance_genetic_interactions_yeast.tsv': 'SGD',
'alliance_genetic_interactions_rat.tsv': 'RGD',
Expand Down
13 changes: 11 additions & 2 deletions src/processor/interaction_molecular_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def parse_bgi_json(self):
for xref in item['basicGeneticEntity']['crossReferences']:
cross_ref_record = None
cross_ref_prefix = None
if xref['id'].startswith('NCBI_Gene'):
if xref['id'].lower().startswith('ncbi_gene'):
# Modify the cross reference ID to match the PSI MITAB format if necessary.
# So far, this is just converting 'NCBI_Gene' to 'entrez gene/locuslink'.
cross_ref_prefix = 'NCBI_Gene'
Expand Down Expand Up @@ -377,6 +377,8 @@ def get_data(self):
'taxid:545124',
'taxid:2697049',
'taxid:764097',
'taxid:8355',
'taxid:8364',
'-')
possible_yeast_taxon_set = ('taxid:4932', 'taxid:307796', 'taxid:643680', 'taxid:574961', 'taxid:285006', 'taxid:545124', 'taxid:764097')
interaction_exclusion_set = ('psi-mi:\"MI:0208\"', 'psi-mi:\"MI:0794\"', 'psi-mi:\"MI:0796\"', 'psi-mi:\"MI:0799\"')
Expand Down Expand Up @@ -436,6 +438,7 @@ def get_data(self):
open(self.output_dir + 'alliance_molecular_interactions_sarscov2.tsv', 'w', encoding='utf-8') as sarscov2_out, \
open(self.output_dir + 'alliance_molecular_interactions_fly.tsv', 'w', encoding='utf-8') as fb_out, \
open(self.output_dir + 'alliance_molecular_interactions_worm.tsv', 'w', encoding='utf-8') as wb_out, \
open(self.output_dir + 'alliance_molecular_interactions_xenopus.tsv', 'w', encoding='utf-8') as xb_out, \
open(self.output_dir + 'alliance_molecular_interactions_zebrafish.tsv', 'w', encoding='utf-8') as zfin_out, \
open(self.output_dir + 'alliance_molecular_interactions_yeast.tsv', 'w', encoding='utf-8') as sgd_out, \
open(self.output_dir + 'alliance_molecular_interactions_rat.tsv', 'w', encoding='utf-8') as rgd_out, \
Expand All @@ -450,14 +453,15 @@ def get_data(self):
sarscov2_out = csv.writer(sarscov2_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
fb_out = csv.writer(fb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
wb_out = csv.writer(wb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
xb_out = csv.writer(xb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
zfin_out = csv.writer(zfin_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
sgd_out = csv.writer(sgd_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
rgd_out = csv.writer(rgd_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
mgi_out = csv.writer(mgi_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')
human_out = csv.writer(human_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t')

# This list is now sorted phylogenetically for the header to be sorted
out_write_list = [human_out, rgd_out, mgi_out, zfin_out, fb_out, wb_out, sgd_out, sarscov2_out]
out_write_list = [human_out, rgd_out, mgi_out, xb_out, zfin_out, fb_out, wb_out, sgd_out, sarscov2_out]

taxon_file_dispatch_dict = {
'taxid:10116': rgd_out,
Expand All @@ -468,6 +472,8 @@ def get_data(self):
'taxid:7955': zfin_out,
'taxid:7227': fb_out,
'taxid:2697049': sarscov2_out,
'taxid:8355': xb_out,
'taxid:8364': xb_out,
'taxid:4932': sgd_out,
'taxid:307796': sgd_out,
'taxid:643680': sgd_out,
Expand All @@ -482,6 +488,7 @@ def get_data(self):
human_out: 'Homo sapiens',
mgi_out: 'Mus musculus',
wb_out: 'Caenorhabditis elegans',
xb_out: 'Xenopus laevis',
sgd_out: 'Saccharomyces cerevisiae',
zfin_out: 'Danio rerio',
sarscov2_out: 'Severe acute respiratory syndrome coronavirus 2',
Expand All @@ -493,6 +500,7 @@ def get_data(self):
human_out: 'NCBI:txid9606',
mgi_out: 'NCBI:txid10090',
wb_out: 'NCBI:txid6239',
xb_out: 'NCBI:txid8355',
sgd_out: 'NCBI:txid559292',
zfin_out: 'NCBI:txid7955',
sarscov2_out: 'NCBI:txid2697049',
Expand Down Expand Up @@ -762,6 +770,7 @@ def validate_and_upload_files_to_fms(self):
'alliance_molecular_interactions_fly.tsv': 'FB',
'alliance_molecular_interactions_sarscov2.tsv': 'SARS-CoV-2',
'alliance_molecular_interactions_worm.tsv': 'WB',
'alliance_molecular_interactions_xenopus.tsv': 'XB',
'alliance_molecular_interactions_zebrafish.tsv': 'ZFIN',
'alliance_molecular_interactions_yeast.tsv': 'SGD',
'alliance_molecular_interactions_rat.tsv': 'RGD',
Expand Down

0 comments on commit 5714780

Please sign in to comment.