Skip to content

Commit

Permalink
Added code to dump and incorporate a Transposon_CDS peptide file into…
Browse files Browse the repository at this point in the history
… the release.
  • Loading branch information
Paul-Davis committed May 19, 2020
1 parent ca9a5ee commit 534434a
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 5 deletions.
1 change: 1 addition & 0 deletions scripts/autoace_builder.pl
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@
my @options = "-classmethod CDS:Transposon_CDS:Transposon-mRNA -classmethod Pseudogene:Transposon_Pseudogene:Transposon-pseudogenic_transcript -classmethod Transcript:Transposon_ncRNA:Transposon-non-coding_transcript";
$wormbase->run_script( "fasta_dumper.pl @options -output $seqdir/transposon_transcripts.dna", $log);
$wormbase->run_script( "fasta_dumper.pl -classmethod Transposon:Transposon -output $seqdir/transposons.dna", $log);
$wormbase->run_script( "fasta_dumper.pl -classmethod CDS:Transposon_cds -pep -output $seqdir/transposon_cds.pep", $log);
}
}

Expand Down
11 changes: 9 additions & 2 deletions scripts/fasta_dumper.pl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
use Bio::SeqIO;

my ($debug, $store, $verbose, $database, $test, $wormbase, $species,
@classmethodlabel, $out, @seqs);
@classmethodlabel, $out, @seqs, $pep);

GetOptions ( "debug:s" => \$debug,
"verbose" => \$verbose, #verbose quces a little more info to screen
Expand All @@ -29,6 +29,7 @@
"test" => \$test, #invoke test env
"store:s" => \$store, #supply a storable
"species:s" => \$species, #needed to work out what species is being processed
"pep" => \$pep #peptide dump
) ;


Expand Down Expand Up @@ -68,7 +69,13 @@

my $object_it = $connection->fetch_many(-query => $query);
while(my $object = $object_it->next){
my $dna = $object->asDNA();
my $dna;
if ($pep) {
$dna = $object->asPeptide();
}
else {
$dna = $object->asDNA();
}
my @dna = split(/\n/, $dna);
shift @dna;
$dna = join("", @dna);
Expand Down
13 changes: 10 additions & 3 deletions scripts/make_FTP_sites.pl
Original file line number Diff line number Diff line change
Expand Up @@ -345,10 +345,16 @@ sub copy_dna_files{
map { $copied_files{$_} = 1 } ($dna_file, $masked_file, $soft_file);

# copy over outstanding dna files
foreach my $dna_file (glob("$seqdir/*.dna.gz"), glob("$seqdir/*.dna")) {
foreach my $dna_file (glob("$seqdir/*.dna.gz"), glob("$seqdir/*.dna"), glob("$seqdir/*.pep")) {
if (not exists $copied_files{$dna_file}) {
my ($prefix) = $dna_file =~ /$seqdir\/(\S+)\.dna/;
my $target = "$dna_dir/${gspecies}.${bioproj}.${WS_version_name}.$prefix.fa.gz";
my ($prefix) = $dna_file =~ /$seqdir\/(\S+)\./;
my $target;
if ($dna_file =~ /pep/) {
$target = "$dna_dir/${gspecies}.${bioproj}.${WS_version_name}.$prefix.pep.gz";
}
else {
$target = "$dna_dir/${gspecies}.${bioproj}.${WS_version_name}.$prefix.fa.gz";
}
if ($dna_file =~ /\.gz$/) {
$wormbase->run_command("cp -f $dna_file $target", $log);
} else {
Expand Down Expand Up @@ -1379,6 +1385,7 @@ sub checkfile {
GSPECIES.BIOPROJ.WSREL.wormpep_package.tar.gz
GSPECIES.BIOPROJ.WSREL.transposon_transcripts.fa.gz
GSPECIES.BIOPROJ.WSREL.transposons.fa.gz
GSPECIES.BIOPROJ.WSREL.transposon_cds.pep.gz
[CORE]species/GSPECIES/BIOPROJ/annotation
GSPECIES.BIOPROJ.WSREL.functional_descriptions.txt.gz
Expand Down
1 change: 1 addition & 0 deletions scripts/release_letter.pl
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@
printf $rlfh " - G_SPECIES.BIOPROJECT.WS$ver.pseudogenic_transcripts.fa.gz - Spliced cDNA sequence for pseudogenic transcripts\n";
printf $rlfh " - G_SPECIES.BIOPROJECT.WS$ver.transposon_transcripts.fa.gz - Spliced cDNA sequence for mRNAs and pseudogenes located in Transposons\n";
printf $rlfh " - G_SPECIES.BIOPROJECT.WS$ver.transposons.fa.gz - DNA sequence of curated and predicted Transposons\n";
printf $rlfh " - G_SPECIES.BIOPROJECT.WS$ver.transposon_cds.pep.gz - Protein sequence of curated CDSs associated with Transposons\n";
printf $rlfh " - G_SPECIES.BIOPROJECT.WS$ver.intergenic_sequences.fa.gz - DNA sequence between pairs of adjacent genes\n";
printf $rlfh " - G_SPECIES.BIOPROJECT.WS$ver.annotations.gff[2|3].gz - Sequence features in either GFF2 or GFF3 format\n";
printf $rlfh " - G_SPECIES.BIOPROJECT.WS$ver.protein_annotation.gff3.gz - Sequence features in proteins in GFF3 format\n";
Expand Down

0 comments on commit 534434a

Please sign in to comment.