-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Script to request a batch of IDs from the nameserver, any domain Vari…
…ation/Gene/Strain. Returns a stub acefile with some generic tags and origin info.
- Loading branch information
Paul-Davis
committed
Jan 15, 2020
1 parent
e80f04d
commit b548514
Showing
1 changed file
with
196 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
#!/usr/bin/env perl | ||
# | ||
# a script to batch request variation ids based on lists of public_names | ||
# Last change by $Author: mh6 $ on $Date: 2015-03-31 10:16:20 $ | ||
# usage: perl get_NS_ids.pl -species elegans -user me -pass me < file containing varId_pubId_per_line | ||
|
||
|
||
use lib $ENV{CVS_DIR}; | ||
use lib "$ENV{CVS_DIR}/NAMEDB/lib"; | ||
|
||
use NameDB_handler; | ||
use Wormbase; | ||
use Storable; | ||
use Getopt::Long; | ||
use IO::File; | ||
use strict; | ||
|
||
|
||
my ($PASS,$USER, $DB); # mysql ones | ||
my $DOMAIN; | ||
|
||
my ($wormbase, $debug, $test, $store, $species,$infile,$outfile,$nocheck, | ||
$ace_file_template, $input_is_public_names, $input_is_other_names, $input, $output,$public); | ||
|
||
GetOptions ( | ||
"debug=s" => \$debug, # send log emails only to one user | ||
"test" => \$test, # run against the test database on utlt-db | ||
"store:s" => \$store, # if you want to pass a Storable instead of recreating it | ||
"species:s" => \$species, # elegans/briggsae/whateva .. needed for logging | ||
"user:s" => \$USER, # mysql username | ||
"pass:s" => \$PASS, # mysql password | ||
'nocheck' => \$nocheck, # don't check public_names | ||
'input:s' => \$input, # File containing new public_names for new Variations | ||
"output:s" => \$output, # File to capture the new WBVar and name associations | ||
'acetemplate=s' => \$ace_file_template, #unknown template file that is also printed along with the IDs retrieved by the script | ||
'public' => \$public, # The file only contains public_names so these should be used (This should be used for all types unless Variation with other names). | ||
'domain:s' => \$DOMAIN, # specify Gene, Variation, Feature or Strain to retrieve this type of ID. | ||
)||die(@!); | ||
|
||
die "Species option is mandatory\n" unless $species; # need that for NameDB to fetch the correct regexps | ||
die "No domain specified which is mandatory\n" unless $DOMAIN; # need that for NameDB to assign correct ID type | ||
#die "Must specify -public if domain isn't Variation\n" unless (($public) && ($DOMAIN ne "Variation")); | ||
if ( $store ) { | ||
$wormbase = retrieve( $store ) or croak("Can't restore wormbase from $store\n"); | ||
} else { | ||
$wormbase = Wormbase->new( -debug => $debug, | ||
-test => $test, | ||
-organism => $species | ||
); | ||
} | ||
|
||
# establish log file. | ||
my $log = Log_files->make_build_log($wormbase); | ||
|
||
$DB = $wormbase->test ? 'test_wbgene_id;utlt-db:3307' : 'nameserver_live;web-wwwdb-core-02:3449'; | ||
|
||
my $db = NameDB_handler->new($DB,$USER,$PASS,$wormbase->wormpub . '/DATABASES/NameDB'); | ||
$db->setDomain($DOMAIN); | ||
|
||
# there should be *only* one public_name per line in the input_file | ||
# like: | ||
# abc123 | ||
# abc234 | ||
|
||
if (defined $ace_file_template) { | ||
my $tmp_str = ""; | ||
|
||
open(my $atfh, $ace_file_template) | ||
or $log->log_and_die("Could not open $ace_file_template for reading\n"); | ||
while(<$atfh>) { | ||
/\S/ and $tmp_str .= $_; | ||
} | ||
|
||
$ace_file_template = $tmp_str; | ||
} | ||
|
||
my @entries; | ||
open (OUT, ">$output") or $log->log_and_die("Could not open $output for reading\n"); | ||
open (IN, "<$input") or $log->log_and_die("Could not open $input for reading\n"); | ||
while (<IN>) { | ||
chomp; | ||
s/\"//g; | ||
my @names; | ||
|
||
#should the -public flag be set?? | ||
# unless (/\S+\s+\S+/) { | ||
# | ||
# } | ||
# if (/\S+\s+\S+/) { | ||
# print "File appears to contain multiple name attributes, assuming that is intended"; | ||
# } | ||
|
||
unless ($public){ | ||
@names = split(/\s+/, $_); | ||
} | ||
else { | ||
@names = $_; | ||
} | ||
if (scalar(@names) == 2 or scalar(@names) == 1) { | ||
push @entries, \@names, | ||
} else { | ||
$log->log_and_die("Bad line:$_\n"); | ||
} | ||
} | ||
|
||
foreach my $entry (@entries) { | ||
my ($public_name, $other_name); | ||
if ($public) { | ||
($public_name, $other_name) = @$entry; | ||
} | ||
else { | ||
($other_name, $public_name) = @$entry; | ||
} | ||
if (not $nocheck and defined $public_name and not defined &_check_name($public_name)) { | ||
next; | ||
} | ||
|
||
my $id = $db->idCreate; | ||
|
||
$public_name = $id if not defined $public_name; | ||
|
||
$db->addName($id,'Public_name'=>$public_name); | ||
|
||
print OUT "\n$DOMAIN : \"$id\"\n"; | ||
print OUT "Public_name \"$public_name\"\n"; | ||
print OUT "Other_name \"$other_name\"\n" if (defined $other_name); | ||
print OUT "Live\nRemark \"Batch $DOMAIN object requested via get_NS_ids.pl\"\n"; | ||
if (defined $ace_file_template) { | ||
print OUT "$ace_file_template\n"; | ||
} | ||
} | ||
$log->mail; | ||
|
||
# small function to check the variation public_name for sanity | ||
sub _check_name { | ||
my $name = shift; | ||
my $var = $db->idGetByTypedName('Public_name'=>$name)->[0]; | ||
if($var) { | ||
print STDERR "ERROR: $name already exists as $var\n"; | ||
return undef; | ||
} | ||
return 1; | ||
} | ||
|
||
__END__ | ||
=pod | ||
=head1 NAME - get_variation_ids.pl | ||
=head1 DECRIPTION | ||
This script takes a list of submitter-supplied variation names and generates new WBVar identifiers in the name server, | ||
writing the results to the given output file. | ||
The input file is expected to contain a list of submitter-supplied variation names, one per line. Optionally, each | ||
line may also contain an additional name, which is added to the NameDB as the public name. If no second name is | ||
defined, the generated WBVar identifier will be used as the public name | ||
By default, the script will refuse to create new WBVar identifiers for public names that already exist in the | ||
database; this behaviour can be switched off with the -nocheck option. | ||
-head1 OUTPUT | ||
The default output is a 3-column table of | ||
WBVarid<tab>Public name<tab>Submitter-supplied name | ||
However, if the -acetemplate option is used, Ace format can be generated (see -acetemplate below). With Ace format, | ||
the submitter-supplied name is used to populate the Other_name tag. | ||
=head1 USAGE EXAMPLES | ||
get_variation_ids.pl -species briggsae input_id_-user me -pass me input file.txt -output output_id_file.txt | ||
get_variation_ids.pl -species elegans -user me -pass me -acetemplate wild_template.ace -input input_id_file.txt -output output_file.ace | ||
=head1 OPTIONS | ||
-species WormBase species name, required for logging (defaults to elegans) | ||
-user MySQL NameDB account user name | ||
-pass MySQL NameDB account password | ||
-acetemplate Takes the file name of a file containing Ace template format which will be printed to the end of the results to create a fuller entry. | ||
-nocheck Do not check for public name clashes | ||
-input Takes the file name that you have generated as input | ||
-output Takes the name of a file that the results from the nameserver are written to. | ||
=cut | ||