From 6f7f703f091a14f3e2fa27de61c1a3803ff23547 Mon Sep 17 00:00:00 2001 From: Dionysios Grigoriadis Date: Wed, 8 Jun 2022 14:58:08 +0100 Subject: [PATCH] Fixing PARASITE-463 --- .../production/core-creation/load_synonyms.pl | 147 ++++++++---------- 1 file changed, 65 insertions(+), 82 deletions(-) diff --git a/parasite/scripts/production/core-creation/load_synonyms.pl b/parasite/scripts/production/core-creation/load_synonyms.pl index ae7475104..30506223e 100755 --- a/parasite/scripts/production/core-creation/load_synonyms.pl +++ b/parasite/scripts/production/core-creation/load_synonyms.pl @@ -26,7 +26,7 @@ my $optsd = [ @{ $cli_helper->get_dba_opts() } ]; push( @{$optsd}, "file:s" ); push( @{$optsd}, "verbose" ); -# push( @{$optsd}, "extdb:s" ); +push( @{$optsd}, "extdb:s" ); my $opts = $cli_helper->process_args( $optsd, \&pod2usage ); @@ -34,9 +34,9 @@ die "# need -file synonyms.edited.tsv NOTE: make sure you edit it before loading!\n"; } -# if( !$opts->{'extdb'} ){ -# die "# need -extdb arg, example -extdb EntrezGene\n"; -# } +if( !$opts->{'extdb'} ){ + die "# need -extdb arg, example -extdb EntrezGene\n"; +} if( $opts->{'verbose'} ) { Log::Log4perl->easy_init($DEBUG); @@ -62,7 +62,7 @@ ## SQL statements -my $synonym_store_sth = $dba->dbc->prepare("INSERT INTO +my $synonym_store_sth = $dba->dbc->prepare("INSERT INTO external_synonym (xref_id, synonym) VALUES (?, ?) "); @@ -89,7 +89,7 @@ next if($line =~ m/^#/ || $line !~ m/\t/); - chomp($line); + chomp($line); ($stableid, $synonym) = split(/\t/, $line); # accumulate synonyms of the same stable_id @@ -98,93 +98,76 @@ close(TSV); -# ## 3) Check that the external_db exists, and add it if not -# -# my $dbname = $opts->{'extdb'}; -# my $dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1); -# unless (scalar(@$dbRefs)>0){ -# $logger->info( "Adding new external_db entry for ", $opts->{'extdb'}); -# $add_external_db_sth->execute($opts->{'extdb'}); -# $add_external_db_sth->finish(); -# $dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1); -# } +## 3) Check that the external_db exists, and add it if not + +my $dbname = $opts->{'extdb'}; +my $dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1); +unless (scalar(@$dbRefs)>0){ + $logger->info( "Adding new external_db entry for ", $opts->{'extdb'}); + $add_external_db_sth->execute($opts->{'extdb'}); + $add_external_db_sth->finish(); + $dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1); +} ## 4) create display_xrefs linked to synonyms foreach $stableid (keys(%syns)) { - # check target gene exists - my $gene = $gene_adaptor->fetch_by_stable_id($stableid); - if (!$gene) { - $logger->info("Cannot find $stableid, skip it"); + # check target gene exists + my $gene = $gene_adaptor->fetch_by_stable_id($stableid); + if ( !$gene ) { + $logger->info( "Cannot find $stableid, skip it"); next; - } + } # check whether gene already has display_xref - my $old_display_xrefs = $gene->get_all_object_xrefs(); - my @old_xref_ids = []; - foreach my $old_display_xref (@$old_display_xrefs) { - my $pid = $old_display_xref->primary_id(); - my $xid = $old_display_xref->dbID(); - if ($pid eq $stableid) { - push(@old_xref_ids, $old_display_xref) - } - } - my $loa = @old_xref_ids; - if ($loa eq 2) { - my $old_display_xref = @old_xref_ids[1]; - $logger->info("$stableid has xref_id set to ", $old_display_xref->display_id()); + my $old_display_xref = $gene->display_xref(); + + if( $old_display_xref ) { + $logger->info( "$stableid has display_xref_id set to ", $old_display_xref->display_id() ); # get its xref_id - my $xref_id = $old_display_xref->dbID(); - $logger->info("Its xref_id is $xref_id"); - # get existing synonyms for this xref - my @existing_synonyms = @{$old_display_xref->get_all_synonyms}; + my $xref_id = $old_display_xref->dbID(); + $logger->info( "Its xref_id is $xref_id"); + # get existing synonyms for this xref + my @existing_synonyms = @{$old_display_xref->get_all_synonyms }; # hang the synonyms off the existing display xref - SYN: - foreach $synonym (@{$syns{$stableid}}) { - # check the synonym isn't already added - foreach my $existing_synonym (@existing_synonyms) { - if ($synonym eq $existing_synonym) { - $logger->info("$synonym already associated with $stableid, skipping"); - next SYN; - } - } - $synonym_store_sth->execute($xref_id, $synonym); - $logger->info("Added $synonym to $stableid"); + SYN: foreach $synonym (@{$syns{$stableid}}){ + # check the synonym isn't already added + foreach my $existing_synonym (@existing_synonyms){ + if ($synonym eq $existing_synonym){ + $logger->info( "$synonym already associated with $stableid, skipping"); + next SYN; + } + } + $synonym_store_sth->execute($xref_id, $synonym); + $logger->info( "Added $synonym to $stableid"); } - $gene->display_xref($old_display_xref); - $gene_adaptor->update($gene); - $synonym_store_sth->finish(); + $synonym_store_sth->finish(); } - # no existing display_xref so we make a new one to hang the synonyms off - # use the stableid as the xref's ID and display label- sorry Ensembl :( - else { - $logger->info("$stableid doesn't have a curated_gene_synonyms xref. Have you ran Xref pipeline?"); - die; - } -} - -# else{ -# my $new_display_xref = Bio::EnsEMBL::DBEntry -> new ( -# -PRIMARY_ID => $stableid, -# -DBNAME => $opts->{'extdb'}, -# -DISPLAY_ID => $stableid, -# -INFO_TYPE => 'SEQUENCE_MATCH', -# ); -# # add all synonyms to the new xref -# foreach $synonym (@{$syns{$stableid}}){ -# $new_display_xref->add_synonym($synonym); -# $logger->info( "Added $synonym to $stableid"); -# } -# $dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1); -# my $dbRef = shift(@$dbRefs); -# my $xref_id = $dea->_store_or_fetch_xref($new_display_xref,$dbRef); -# $new_display_xref->dbID($xref_id); -# -# # and update the gene -# $gene->display_xref($new_display_xref); -# $gene_adaptor->update($gene); -# } -# } + # no existing display_xref so we make a new one to hang the synonyms off + # use the stableid as the xref's ID and display label- sorry Ensembl :( + + else{ + my $new_display_xref = Bio::EnsEMBL::DBEntry -> new ( + -PRIMARY_ID => $stableid, + -DBNAME => $opts->{'extdb'}, + -DISPLAY_ID => $stableid, + -INFO_TYPE => 'SEQUENCE_MATCH', + ); + # add all synonyms to the new xref + foreach $synonym (@{$syns{$stableid}}){ + $new_display_xref->add_synonym($synonym); + $logger->info( "Added $synonym to $stableid"); + } + $dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1); + my $dbRef = shift(@$dbRefs); + my $xref_id = $dea->_store_or_fetch_xref($new_display_xref,$dbRef); + $new_display_xref->dbID($xref_id); + + # and update the gene + $gene->display_xref($new_display_xref); + $gene_adaptor->update($gene); + } +} \ No newline at end of file