Skip to content

Commit

Permalink
Merge pull request #234 from WormBase/WBPS17.5
Browse files Browse the repository at this point in the history
Fixing PARASITE-463
  • Loading branch information
digrigor authored Jun 8, 2022
2 parents 509f866 + 6f7f703 commit 02cfed5
Showing 1 changed file with 65 additions and 82 deletions.
147 changes: 65 additions & 82 deletions parasite/scripts/production/core-creation/load_synonyms.pl
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,17 @@
my $optsd = [ @{ $cli_helper->get_dba_opts() } ];
push( @{$optsd}, "file:s" );
push( @{$optsd}, "verbose" );
# push( @{$optsd}, "extdb:s" );
push( @{$optsd}, "extdb:s" );

my $opts = $cli_helper->process_args( $optsd, \&pod2usage );

if( !$opts->{'file'} ){
die "# need -file synonyms.edited.tsv NOTE: make sure you edit it before loading!\n";
}

# if( !$opts->{'extdb'} ){
# die "# need -extdb arg, example -extdb EntrezGene\n";
# }
if( !$opts->{'extdb'} ){
die "# need -extdb arg, example -extdb EntrezGene\n";
}

if( $opts->{'verbose'} ) {
Log::Log4perl->easy_init($DEBUG);
Expand All @@ -62,7 +62,7 @@

## SQL statements

my $synonym_store_sth = $dba->dbc->prepare("INSERT INTO
my $synonym_store_sth = $dba->dbc->prepare("INSERT INTO
external_synonym (xref_id, synonym)
VALUES (?, ?)
");
Expand All @@ -89,7 +89,7 @@

next if($line =~ m/^#/ || $line !~ m/\t/);

chomp($line);
chomp($line);
($stableid, $synonym) = split(/\t/, $line);

# accumulate synonyms of the same stable_id
Expand All @@ -98,93 +98,76 @@
close(TSV);


# ## 3) Check that the external_db exists, and add it if not
#
# my $dbname = $opts->{'extdb'};
# my $dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1);
# unless (scalar(@$dbRefs)>0){
# $logger->info( "Adding new external_db entry for ", $opts->{'extdb'});
# $add_external_db_sth->execute($opts->{'extdb'});
# $add_external_db_sth->finish();
# $dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1);
# }
## 3) Check that the external_db exists, and add it if not

my $dbname = $opts->{'extdb'};
my $dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1);
unless (scalar(@$dbRefs)>0){
$logger->info( "Adding new external_db entry for ", $opts->{'extdb'});
$add_external_db_sth->execute($opts->{'extdb'});
$add_external_db_sth->finish();
$dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1);
}


## 4) create display_xrefs linked to synonyms

foreach $stableid (keys(%syns)) {

# check target gene exists
my $gene = $gene_adaptor->fetch_by_stable_id($stableid);
if (!$gene) {
$logger->info("Cannot find $stableid, skip it");
# check target gene exists
my $gene = $gene_adaptor->fetch_by_stable_id($stableid);
if ( !$gene ) {
$logger->info( "Cannot find $stableid, skip it");
next;
}
}

# check whether gene already has display_xref
my $old_display_xrefs = $gene->get_all_object_xrefs();
my @old_xref_ids = [];
foreach my $old_display_xref (@$old_display_xrefs) {
my $pid = $old_display_xref->primary_id();
my $xid = $old_display_xref->dbID();
if ($pid eq $stableid) {
push(@old_xref_ids, $old_display_xref)
}
}
my $loa = @old_xref_ids;
if ($loa eq 2) {
my $old_display_xref = @old_xref_ids[1];
$logger->info("$stableid has xref_id set to ", $old_display_xref->display_id());
my $old_display_xref = $gene->display_xref();

if( $old_display_xref ) {
$logger->info( "$stableid has display_xref_id set to ", $old_display_xref->display_id() );
# get its xref_id
my $xref_id = $old_display_xref->dbID();
$logger->info("Its xref_id is $xref_id");
# get existing synonyms for this xref
my @existing_synonyms = @{$old_display_xref->get_all_synonyms};
my $xref_id = $old_display_xref->dbID();
$logger->info( "Its xref_id is $xref_id");
# get existing synonyms for this xref
my @existing_synonyms = @{$old_display_xref->get_all_synonyms };
# hang the synonyms off the existing display xref
SYN:
foreach $synonym (@{$syns{$stableid}}) {
# check the synonym isn't already added
foreach my $existing_synonym (@existing_synonyms) {
if ($synonym eq $existing_synonym) {
$logger->info("$synonym already associated with $stableid, skipping");
next SYN;
}
}
$synonym_store_sth->execute($xref_id, $synonym);
$logger->info("Added $synonym to $stableid");
SYN: foreach $synonym (@{$syns{$stableid}}){
# check the synonym isn't already added
foreach my $existing_synonym (@existing_synonyms){
if ($synonym eq $existing_synonym){
$logger->info( "$synonym already associated with $stableid, skipping");
next SYN;
}
}
$synonym_store_sth->execute($xref_id, $synonym);
$logger->info( "Added $synonym to $stableid");
}
$gene->display_xref($old_display_xref);
$gene_adaptor->update($gene);
$synonym_store_sth->finish();
$synonym_store_sth->finish();
}

# no existing display_xref so we make a new one to hang the synonyms off
# use the stableid as the xref's ID and display label- sorry Ensembl :(
else {
$logger->info("$stableid doesn't have a curated_gene_synonyms xref. Have you ran Xref pipeline?");
die;
}
}

# else{
# my $new_display_xref = Bio::EnsEMBL::DBEntry -> new (
# -PRIMARY_ID => $stableid,
# -DBNAME => $opts->{'extdb'},
# -DISPLAY_ID => $stableid,
# -INFO_TYPE => 'SEQUENCE_MATCH',
# );
# # add all synonyms to the new xref
# foreach $synonym (@{$syns{$stableid}}){
# $new_display_xref->add_synonym($synonym);
# $logger->info( "Added $synonym to $stableid");
# }
# $dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1);
# my $dbRef = shift(@$dbRefs);
# my $xref_id = $dea->_store_or_fetch_xref($new_display_xref,$dbRef);
# $new_display_xref->dbID($xref_id);
#
# # and update the gene
# $gene->display_xref($new_display_xref);
# $gene_adaptor->update($gene);
# }
# }
# no existing display_xref so we make a new one to hang the synonyms off
# use the stableid as the xref's ID and display label- sorry Ensembl :(

else{
my $new_display_xref = Bio::EnsEMBL::DBEntry -> new (
-PRIMARY_ID => $stableid,
-DBNAME => $opts->{'extdb'},
-DISPLAY_ID => $stableid,
-INFO_TYPE => 'SEQUENCE_MATCH',
);
# add all synonyms to the new xref
foreach $synonym (@{$syns{$stableid}}){
$new_display_xref->add_synonym($synonym);
$logger->info( "Added $synonym to $stableid");
}
$dbRefs = $dea->get_external_db_ids($dbname, 'NULL', 1);
my $dbRef = shift(@$dbRefs);
my $xref_id = $dea->_store_or_fetch_xref($new_display_xref,$dbRef);
$new_display_xref->dbID($xref_id);

# and update the gene
$gene->display_xref($new_display_xref);
$gene_adaptor->update($gene);
}
}

0 comments on commit 02cfed5

Please sign in to comment.