Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
markquintontulloch committed Feb 12, 2021
2 parents 3f1b4c8 + d802b09 commit 5914ac0
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 36 deletions.
2 changes: 1 addition & 1 deletion scripts/GFF_post_process/GFF_post_process.pl
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ sub collate_and_sort {
#
if ($gff3) {
my $outfile = "$working_dir/post_process_gff3.gff3";
$wormbase->run_script("GFF_post_process/post_process_gff3.pl -infile $processed_gff_file -outfile $outfile", $log)
$wormbase->run_script("GFF_post_process/post_process_gff3.pl -infile $processed_gff_file -outfile $outfile -tecred", $log)
and $log->log_and_die("Unsuccessful post-processing of GFF3 prior to collation\n");

if ($debug) {
Expand Down
50 changes: 49 additions & 1 deletion scripts/GFF_post_process/post_process_gff3.pl
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,13 @@
# variables and command-line options #
######################################

my ( $debug, $test, $store, $wormbase, $species );
my ( $debug, $test, $store, $wormbase, $species, $tecred );
my ( $infile, $outfile, $changed_lines );

GetOptions(
'debug=s' => \$debug,
'test' => \$test,
'tecred' => \$tecred,
'store:s' => \$store,
'species:s' => \$species,
'infile:s' => \$infile,
Expand All @@ -89,6 +90,31 @@
$log->log_and_die("You must define -infile and -outfile\n");
}

open(my $gff_in_fh, $infile) or $log->log_and_die("Could not open $infile for reading\n");


#### TECRED modification START ####

# Read in all TECRED IDs and store in a hash
my %tecredid;
if ($tecred) {
print "Doing TECRED processing\n";
while(<$gff_in_fh>) {
chomp;
my $line = $_;
my @l = split(/\t+/, $line);
if ($l[1]=~/TEC_RED/) {
my @trid =split(/\s+/,$l[8]);
$tecredid{$trid[0]}+=1;
#print "$trid[0]\t$tecredid{$trid[0]}\n";
}
}
}
close($gff_in_fh) or $log->log_and_die("Could not close $outfile after writing\n");

#### TECRED modification END ####


open(my $gff_in_fh, $infile) or $log->log_and_die("Could not open $infile for reading\n");
open(my $gff_out_fh, ">$outfile") or $log->log_and_die("Could not open $outfile for writing\n");

Expand Down Expand Up @@ -132,6 +158,28 @@
$l[4] = $l[3];
}

#
# Fix up the tecreds if needed https://github.com/WormBase/website/issues/8039
# Remove orientation, and mark up multimapped TEC-REDs
#
if ($tecred) {
if ($l[1]=~/TEC_RED/) {
$l[6]='.';
#print "$line\n";

# Mark multimapping
my @trid =split(/\s+/,$l[8]);
if ($tecredid{$trid[0]} >1) {
#print "$trid[0]\t multimap\n";
$l[8]=$l[8] . "; multimapping=TRUE";
}
else {
$l[8]=$l[8] . "; multimapping=FALSE";
#print "$trid[0]\t$tecredid{$trid[0]}\n";
}
}
}

#
# Clean up attributes
#
Expand Down
8 changes: 8 additions & 0 deletions scripts/RNASeq_align.pl
Original file line number Diff line number Diff line change
Expand Up @@ -205,20 +205,28 @@ sub results {
# make the ace file of RNASeq spanned introns to load into acedb
print "Running Intron analyses ...\n";
my $splice_file = $wormbase->acefiles."/RNASeq_splice_${species}.ace";
print "Writing splicefile $splice_file\n";

# get old splice_file size
my $old_splice_file = $splice_file . '.old';
# if splice_file is older than a month, then move it to be the 'old_splice_file' (because this script may be run more than once, overwriting the splice file)
if (-M $splice_file >= 30) {
system("mv $splice_file $old_splice_file");
print "Moving $splice_file to $old_splice_file\n";
}
my $old_splice_file_size = -s $old_splice_file;

chdir $RNASeq->{RNASeqSRADir};
$rnadir= $RNASeq->{RNASeqSRADir};
print "Going to folder $rnadir\n";
$status = $wormbase->run_command("rm -f $splice_file", $log);
print "Running command1: cat */Introns/virtual_objects.${species}.RNASeq.ace > $splice_file\n";
$status = $wormbase->run_command("cat */Introns/virtual_objects.${species}.RNASeq.ace > $splice_file", $log);
print "Running command2: acezip.pl -file $splice_file\n";
$status = $wormbase->run_script("acezip.pl -file $splice_file", $log);
print "Running command3: cat */Introns/Intron.ace > ${splice_file}.tmp\n";
$status = $wormbase->run_command("cat */Introns/Intron.ace > ${splice_file}.tmp", $log);
print "Running command4: acezip.pl -file ${splice_file}.tmp\n";
$status = $wormbase->run_script("acezip.pl -file ${splice_file}.tmp", $log);
# flatten the results of all libraries at a position into one entry
open (FEAT, "< ${splice_file}.tmp") || $log->log_and_die("Can't open file ${splice_file}.tmp\n");
Expand Down
6 changes: 5 additions & 1 deletion scripts/Wormbase.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1373,8 +1373,10 @@ sub establish_paths {
$self->{'autoace'} = $self->species eq 'elegans' ? "$basedir/autoace" : "$basedir/".$self->species;
$self->{'orgdb'} = $self->{'autoace'}; #."/".$self->{'organism'};
}


#$basedir = '/nfs/panda/ensemblgenomes/wormbase/BUILD';
$self->{'basedir'} = $basedir;
# print "BASE $basedir\n";

if ($self->test) {
$self->{'ftp_upload'} = $self->wormpub . "/TEST/ftp_uploads/wormbase";
Expand Down Expand Up @@ -1452,6 +1454,8 @@ sub establish_paths {
$self->{'smasked_genome_seq'} = $self->sequences . "/" . $self->{species} . ".genome_softmasked.fa";

# create dirs if missing
#my $wep = $self->wormpep;
#print "WORMPEP $wep\n";
mkpath( $self->logs ) unless ( -e $self->logs );
mkpath( $self->common_data ) unless ( -e $self->common_data );
mkpath( $self->wormpep ) unless ( -e $self->wormpep );
Expand Down
110 changes: 80 additions & 30 deletions scripts/make_FTP_sites.pl
Original file line number Diff line number Diff line change
Expand Up @@ -650,45 +650,95 @@ sub copy_rna_files{
# copy across ontology files
############################################
sub copy_ontology_files {


print "Copy ontology innit\n";

my $runtime = $wormbase->runtime;
$log->write_to("$runtime: copying ontology files\n");

my $ace_dir = $wormbase->autoace;

$log->write_to("$runtime: Copying ontology files\n");

my $ace_dir = $wormbase->autoace;
my $obo_dir = $wormbase->primaries . "/citace/temp_unpack_dir/home/citace/Data_for_${WS_version_name}/Data_for_Ontology/";
my $ace_ontology_dir = "$ace_dir/ONTOLOGY";
my $ftp_ontology_dir = "$targetdir/ONTOLOGY";

#print "$ace_dir $obo_dir $ace_ontology_dir $ftp_ontology_dir\n";

mkpath($ace_ontology_dir,1,0775);
mkpath($ftp_ontology_dir,1,0775);


# run through all possible organisms
my %accessors = ($wormbase->species_accessors);
$accessors{$wormbase->species} = $wormbase;
my %species_name_map;
while(my ($species, $wb) = each %accessors) {
$species_name_map{$wb->full_name('-g_species' => 1)} = $species;
}
$accessors{elegans} = $wormbase;

$wormbase->run_command("cp -f $obo_dir/*.obo $ace_ontology_dir/", $log);
foreach my $file (glob("$ace_ontology_dir/*.*")) {
my ($filestem, $suffix) = $file =~ /\/([^\/]+)\.([^\.\/]+)$/;
if (exists $species_name_map{$suffix}) {
my ($filetype, $release, $extension) = $filestem =~ /^([^\.]+)\.(WS\d+)\.([^\.]+)$/;
my $wb = $accessors{$species_name_map{$suffix}};
my $bioproj = $wb->ncbi_bioproject;
my $new_filename = join('.', $suffix, $bioproj, $release, $filtype, $extension);
my $species_dir = "$targetdir/species/$species/$bioproj";
mkpath($species_dir,1,0775);
$wormbase->run_command("cp -f $file ${species_dir}/${new_filename}", $log);
}
else {
$wormbase->run_command("cp -f $file $ftp_ontology_dir/", $log);
}
}
my %copied;

foreach my $wb (values %accessors) {
next if exists $skip_species{$wb->species};
next if @only_species and not exists $only_species{$wb->species};

my $gspecies = $wb->full_name('-g_species' => 1);
my $bioproj = $wb->ncbi_bioproject;
my $prefix = $wb->pepdir_prefix;

#print "$gspecies $bioproj $prefix \n";


# Copy to species folders
foreach my $file (glob("$ace_ontology_dir/*.*")) {
my @a = split(/\./, $file);
my @b = split(/\//, $a[0]);
# If is daf file
if ($file=~/$gspecies/ and $file=~/daf\.txt/) {
my $species_dir = "$targetdir/species/$gspecies/$bioproj";
#print "cp -f $file $targetdir/species/$gspecies/$bioproj/annotation/$gspecies\.$bioproj\.$WS_version_name\.$b[-1]\.daf\n ";
$wormbase->run_command("cat $file | gzip -n -9 > $targetdir/species/$gspecies/$bioproj/annotation/$gspecies\.$bioproj\.$WS_version_name\.$b[-1]\.daf.gz", $log);
$copied{$file}=1;
}
elsif ($file=~/$gspecies/ and $file=~/rnai_phenotypes/) {
my $species_dir = "$targetdir/species/$gspecies/$bioproj";
#print "cp -f $file $targetdir/species/$gspecies/$bioproj/annotation/$gspecies\.$bioproj\.$WS_version_name\.$b[-1]\.wb\n ";
$wormbase->run_command("cat $file | gzip -n -9 > $targetdir/species/$gspecies/$bioproj/annotation/$gspecies\.$bioproj\.$WS_version_name\.$b[-1]\.wb.gz", $log);
$copied{$file}=1;
}
elsif ($file=~/$gspecies/) {
my $species_dir = "$targetdir/species/$gspecies/$bioproj";
#print "cp -f $file $targetdir/species/$gspecies/$bioproj/annotation/$gspecies\.$bioproj\.$WS_version_name\.$b[-1]\.gaf\n ";
$wormbase->run_command("cat $file | gzip -n -9 > $targetdir/species/$gspecies/$bioproj/annotation/$gspecies\.$bioproj\.$WS_version_name\.$b[-1]\.gaf.gz", $log);
$copied{$file}=1;
}
else {
}
}
}

# Copy the rest of the files without species name to the ontology folder
foreach my $file (glob("$ace_ontology_dir/*.*")) {
my @a = split(/\./, $file);
my @b = split(/\//, $a[0]);
if (exists $copied{$file}) {
#print "Copied $file\n";
}
else {
if ($file=~/daf\.txt/) {
#print "cp -f $file $targetdir/ONTOLOGY/$b[-1]\.$WS_version_name\.daf\n ";
$wormbase->run_command("cp -f $file $targetdir/ONTOLOGY/$b[-1]\.$WS_version_name\.daf", $log);
}
elsif ($file=~/rnai_phenotypes/) {
#print "cp -f $file $targetdir/ONTOLOGY/$b[-1]\.$WS_version_name\.wb\n ";
$wormbase->run_command("cp -f $file $targetdir/ONTOLOGY/$b[-1]\.$WS_version_name\.wb", $log);
}
else {
#print "cp -f $file $targetdir/ONTOLOGY/$b[-1]\.$WS_version_name\.gaf\n ";
$wormbase->run_command("cp -f $file $targetdir/ONTOLOGY/$b[-1]\.$WS_version_name\.gaf", $log);
}

}

}


$runtime = $wormbase->runtime;
$log->write_to("$runtime: Finished copying ontology files\n\n");
$runtime = $wormbase->runtime;
$log->write_to("$runtime: Finished copying ontology files!\n\n");

}


Expand Down
1 change: 1 addition & 0 deletions scripts/make_life_stage_expression.pl
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,7 @@
} else {
my $iterator = $db->fetch_many(-query => 'Find Gene where Status = "Live" AND Species = "Caenorhabditis elegans"');
while ($gene = $iterator->next) {
unless ($gene=~/WBGene/) {next}; # Make sure not to read VC2010 genes
print "Doing $gene ...\n";
# if ($gene->name eq 'WBGene00001495') {
# next; # lacks a Public_name in WS273
Expand Down
Empty file modified scripts/next_builder_checks.pl
100755 → 100644
Empty file.
2 changes: 1 addition & 1 deletion scripts/preFTPdumps/perSpecies/pre_ftp_dumps.pl
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
{ script => 'dump_geneid_list.pl', output => 'geneIDs.txt', all => 1, mem => 12000 },
{ script => 'dump_molecules.pl', output => 'molecules.ace', mem => 12000 },
{ script => 'dump_geneid_list.pl', output => 'geneOtherIDs.txt', options => '-other', all=>1, mem => 12000 },
{ script => 'uniprotxrefs.pl', output => 'uniprot_papers.txt', all=>1, mem => 12000 },
{ script => 'uniprotxrefs.pl', output => 'uniprot_papers.txt', all=>1, mem => 20000 },
);


Expand Down
6 changes: 4 additions & 2 deletions scripts/update_Common_data.pl
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@
if (
$arg eq 'est2feature' or
$arg eq 'gene_id' or
$arg eq 'worm_gene2cgc' or
$arg eq 'worm_gene2geneID' or
$arg eq 'est' or
$arg eq 'cds2wormpep') {
push @bsub_options, (
Expand All @@ -181,8 +183,8 @@
-J => $job_name);
} else {
push @bsub_options, (
-M => "3000",
-R => "\"select[mem>3000] rusage[mem=3000]\"",
-M => "6000",
-R => "\"select[mem>6000] rusage[mem=6000]\"",
-J => $job_name);
}
my $cmd = "update_Common_data.pl -${arg}";
Expand Down

0 comments on commit 5914ac0

Please sign in to comment.