diff --git a/src/config/default.yml b/src/config/default.yml index 07726b9..93a2eba 100644 --- a/src/config/default.yml +++ b/src/config/default.yml @@ -1,7 +1,7 @@ schemaVersion: 1.0.1.0 releaseVersion: 3.1.1 INTERACTION-SOURCE: [WB-MOL, FB-MOL, WB-GEN, FB-GEN, BIOGRID, BIOGRID-TAB, IMEX] -BGI: [FB, SGD, WB, ZFIN, RGD, MGI, HUMAN, SARS-CoV-2] +BGI: [FB, SGD, WB, ZFIN, RGD, MGI, HUMAN, XBXL, XBXT, SARS-CoV-2] # Loader configuration values are below. # Please modify with caution! diff --git a/src/config/validation.yml b/src/config/validation.yml index 8a85f71..a088068 100644 --- a/src/config/validation.yml +++ b/src/config/validation.yml @@ -6,7 +6,7 @@ releaseVersion: required: True BGI: type: list - allowed: [FB, SGD, WB, ZFIN, RGD, MGI, HUMAN, SARS-CoV-2] + allowed: [FB, SGD, WB, ZFIN, RGD, MGI, HUMAN, XBXT, XBXL, SARS-CoV-2] INTERACTION-SOURCE: type: list allowed: [WB-GEN, FB-GEN, WB-MOL, FB-MOL, IMEX, BIOGRID, BIOGRID-TAB] diff --git a/src/processor/interaction_genetic_processor.py b/src/processor/interaction_genetic_processor.py index 2598305..f8997af 100644 --- a/src/processor/interaction_genetic_processor.py +++ b/src/processor/interaction_genetic_processor.py @@ -93,7 +93,7 @@ def parse_bgi_json(self): for xref in item['basicGeneticEntity']['crossReferences']: cross_ref_record = None cross_ref_prefix = None - if xref['id'].startswith('NCBI_Gene'): + if xref['id'].lower().startswith('ncbi_gene'): # Modify the cross reference ID to match the PSI MITAB format if necessary. # So far, this is just converting 'NCBI_Gene' to 'entrez gene/locuslink'. cross_ref_prefix = 'NCBI_Gene' @@ -302,6 +302,8 @@ def get_data(self): 'taxid:6239', 'taxid:559292', 'taxid:7955', + 'taxid:8355', + 'taxid:8364', 'taxid:7227') possible_yeast_taxon_set = ('taxid:4932', 'taxid:307796', 'taxid:643680', 'taxid:574961', 'taxid:285006', 'taxid:545124', 'taxid:764097') @@ -356,6 +358,7 @@ def get_data(self): with open(self.output_dir + 'alliance_genetic_interactions.tsv', 'w', encoding='utf-8') as tsvout, \ open(self.output_dir + 'alliance_genetic_interactions_fly.tsv', 'w', encoding='utf-8') as fb_out, \ open(self.output_dir + 'alliance_genetic_interactions_worm.tsv', 'w', encoding='utf-8') as wb_out, \ + open(self.output_dir + 'alliance_genetic_interactions_xenopus.tsv', 'w', encoding='utf-8') as xb_out, \ open(self.output_dir + 'alliance_genetic_interactions_zebrafish.tsv', 'w', encoding='utf-8') as zfin_out, \ open(self.output_dir + 'alliance_genetic_interactions_yeast.tsv', 'w', encoding='utf-8') as sgd_out, \ open(self.output_dir + 'alliance_genetic_interactions_rat.tsv', 'w', encoding='utf-8') as rgd_out, \ @@ -367,6 +370,7 @@ def get_data(self): tsvout = csv.writer(tsvout, quotechar = '', quoting=csv.QUOTE_NONE, delimiter='\t') fb_out = csv.writer(fb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') wb_out = csv.writer(wb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') + xb_out = csv.writer(xb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') zfin_out = csv.writer(zfin_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') sgd_out = csv.writer(sgd_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') rgd_out = csv.writer(rgd_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') @@ -376,7 +380,7 @@ def get_data(self): mapped_out = csv.writer(mapped_out, quotechar = '', quoting=csv.QUOTE_NONE, delimiter='\t') # This list is now sorted phylogenetically for the header to be sorted - out_write_list = [human_out, rgd_out, mgi_out, zfin_out, fb_out, wb_out, sgd_out] + out_write_list = [human_out, rgd_out, mgi_out, xb_out, zfin_out, fb_out, wb_out, sgd_out] taxon_file_dispatch_dict = { 'taxid:10116': rgd_out, @@ -386,6 +390,8 @@ def get_data(self): 'taxid:559292': sgd_out, 'taxid:7955': zfin_out, 'taxid:7227': fb_out, + 'taxid:8355': xb_out, + 'taxid:8364': xb_out, 'taxid:4932': sgd_out, 'taxid:307796': sgd_out, 'taxid:643680': sgd_out, @@ -400,6 +406,7 @@ def get_data(self): human_out: 'Homo sapiens', mgi_out: 'Mus musculus', wb_out: 'Caenorhabditis elegans', + xb_out: 'Xenopus laevis', sgd_out: 'Saccharomyces cerevisiae', zfin_out: 'Danio rerio', fb_out: 'Drosophila melanogaster' @@ -410,6 +417,7 @@ def get_data(self): human_out: 'NCBI:txid9606', mgi_out: 'NCBI:txid10090', wb_out: 'NCBI:txid6239', + xb_out: 'NCBI:txid8355', sgd_out: 'NCBI:txid559292', zfin_out: 'NCBI:txid7955', fb_out: 'NCBI:txid7227' @@ -640,6 +648,7 @@ def validate_and_upload_files_to_fms(self): 'alliance_genetic_interactions.tsv': 'COMBINED', 'alliance_genetic_interactions_fly.tsv': 'FB', 'alliance_genetic_interactions_worm.tsv': 'WB', + 'alliance_genetic_interactions_xenopus.tsv': 'XB', 'alliance_genetic_interactions_zebrafish.tsv': 'ZFIN', 'alliance_genetic_interactions_yeast.tsv': 'SGD', 'alliance_genetic_interactions_rat.tsv': 'RGD', diff --git a/src/processor/interaction_molecular_processor.py b/src/processor/interaction_molecular_processor.py index 3304098..3dba6d8 100644 --- a/src/processor/interaction_molecular_processor.py +++ b/src/processor/interaction_molecular_processor.py @@ -98,7 +98,7 @@ def parse_bgi_json(self): for xref in item['basicGeneticEntity']['crossReferences']: cross_ref_record = None cross_ref_prefix = None - if xref['id'].startswith('NCBI_Gene'): + if xref['id'].lower().startswith('ncbi_gene'): # Modify the cross reference ID to match the PSI MITAB format if necessary. # So far, this is just converting 'NCBI_Gene' to 'entrez gene/locuslink'. cross_ref_prefix = 'NCBI_Gene' @@ -377,6 +377,8 @@ def get_data(self): 'taxid:545124', 'taxid:2697049', 'taxid:764097', + 'taxid:8355', + 'taxid:8364', '-') possible_yeast_taxon_set = ('taxid:4932', 'taxid:307796', 'taxid:643680', 'taxid:574961', 'taxid:285006', 'taxid:545124', 'taxid:764097') interaction_exclusion_set = ('psi-mi:\"MI:0208\"', 'psi-mi:\"MI:0794\"', 'psi-mi:\"MI:0796\"', 'psi-mi:\"MI:0799\"') @@ -436,6 +438,7 @@ def get_data(self): open(self.output_dir + 'alliance_molecular_interactions_sarscov2.tsv', 'w', encoding='utf-8') as sarscov2_out, \ open(self.output_dir + 'alliance_molecular_interactions_fly.tsv', 'w', encoding='utf-8') as fb_out, \ open(self.output_dir + 'alliance_molecular_interactions_worm.tsv', 'w', encoding='utf-8') as wb_out, \ + open(self.output_dir + 'alliance_molecular_interactions_xenopus.tsv', 'w', encoding='utf-8') as xb_out, \ open(self.output_dir + 'alliance_molecular_interactions_zebrafish.tsv', 'w', encoding='utf-8') as zfin_out, \ open(self.output_dir + 'alliance_molecular_interactions_yeast.tsv', 'w', encoding='utf-8') as sgd_out, \ open(self.output_dir + 'alliance_molecular_interactions_rat.tsv', 'w', encoding='utf-8') as rgd_out, \ @@ -450,6 +453,7 @@ def get_data(self): sarscov2_out = csv.writer(sarscov2_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') fb_out = csv.writer(fb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') wb_out = csv.writer(wb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') + xb_out = csv.writer(xb_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') zfin_out = csv.writer(zfin_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') sgd_out = csv.writer(sgd_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') rgd_out = csv.writer(rgd_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') @@ -457,7 +461,7 @@ def get_data(self): human_out = csv.writer(human_out, quotechar='', quoting=csv.QUOTE_NONE, delimiter='\t') # This list is now sorted phylogenetically for the header to be sorted - out_write_list = [human_out, rgd_out, mgi_out, zfin_out, fb_out, wb_out, sgd_out, sarscov2_out] + out_write_list = [human_out, rgd_out, mgi_out, xb_out, zfin_out, fb_out, wb_out, sgd_out, sarscov2_out] taxon_file_dispatch_dict = { 'taxid:10116': rgd_out, @@ -468,6 +472,8 @@ def get_data(self): 'taxid:7955': zfin_out, 'taxid:7227': fb_out, 'taxid:2697049': sarscov2_out, + 'taxid:8355': xb_out, + 'taxid:8364': xb_out, 'taxid:4932': sgd_out, 'taxid:307796': sgd_out, 'taxid:643680': sgd_out, @@ -482,6 +488,7 @@ def get_data(self): human_out: 'Homo sapiens', mgi_out: 'Mus musculus', wb_out: 'Caenorhabditis elegans', + xb_out: 'Xenopus laevis', sgd_out: 'Saccharomyces cerevisiae', zfin_out: 'Danio rerio', sarscov2_out: 'Severe acute respiratory syndrome coronavirus 2', @@ -493,6 +500,7 @@ def get_data(self): human_out: 'NCBI:txid9606', mgi_out: 'NCBI:txid10090', wb_out: 'NCBI:txid6239', + xb_out: 'NCBI:txid8355', sgd_out: 'NCBI:txid559292', zfin_out: 'NCBI:txid7955', sarscov2_out: 'NCBI:txid2697049', @@ -762,6 +770,7 @@ def validate_and_upload_files_to_fms(self): 'alliance_molecular_interactions_fly.tsv': 'FB', 'alliance_molecular_interactions_sarscov2.tsv': 'SARS-CoV-2', 'alliance_molecular_interactions_worm.tsv': 'WB', + 'alliance_molecular_interactions_xenopus.tsv': 'XB', 'alliance_molecular_interactions_zebrafish.tsv': 'ZFIN', 'alliance_molecular_interactions_yeast.tsv': 'SGD', 'alliance_molecular_interactions_rat.tsv': 'RGD',