Skip to content

Commit

Permalink
Merge branch 'master' into mafft
Browse files Browse the repository at this point in the history
  • Loading branch information
elischberg authored Nov 6, 2023
2 parents 53e9de6 + 3001281 commit 3760169
Show file tree
Hide file tree
Showing 335 changed files with 33,307 additions and 395,148 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import json
import os
import shutil
import subprocess
import tarfile

import requests
import subprocess


# Utility functions for interacting with Galaxy JSON
Expand Down Expand Up @@ -140,13 +141,15 @@ def move_index_files(archive_content_path, target_dir, data_tables, version):
command = "indexdb_rna --ref %s,%s" % (
fasta_filepath,
indexed_filepath)
process = subprocess.call(command, shell=True )
returncode = subprocess.call(command, shell=True)
if returncode:
exit(f"`{command}` exited with exit code {returncode}")
# Add entry in the data table
add_data_table_entry(
data_tables,
"rRNA_databases",
dict(
value="%s-%s" %(version, db_name),
value="%s-%s" % (version, db_name),
name=db_name,
path=filedir))

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="data_manager_sortmerna_download" name="Download SortMeRNA" version="@[email protected]" tool_type="manage_data">
<tool id="data_manager_sortmerna_download" name="Download SortMeRNA" version="@[email protected]" tool_type="manage_data" profile="19.05">
<description>reference databases</description>
<macros>
<import>macros.xml</import>
Expand All @@ -22,12 +22,20 @@
<data name="out_file" format="data_manager_json" label="${tool.name}"/>
</outputs>
<tests>
<test>
<output name="out_file" ftype="data_manager_json">
<assert_contents>
<has_text text="silva-bac-16s-id90"/>
<has_text text="silva-bac-23s-id98"/>
<has_text text="&quot;name&quot;" n="8"/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
This tool downloads the reference databases for SortMeRNA and index it
]]></help>
<citations>
<citation type="doi">10.1093/bioinformatics/bts611</citation>
<yield />
</citations>
</tool>
150 changes: 125 additions & 25 deletions tools/agat/agat.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,30 @@
<command detect_errors="exit_code"><![CDATA[
#if $tool.selector == 'fix'
@input_annotation_single@
agat_convert_sp_gxf2gxf.pl -gff $input_annotation --output 'output.gff' &&
cat 'output.gff' > '${annotation_gff}'
agat_convert_sp_gxf2gxf.pl
--gxf $input_annotation
--config $agat_configfile
--output 'output' &&
cat 'output' > '${annotation}'
#else if $tool.selector == 'convert_GFF2GTF'
@input_annotation_single@
agat_convert_sp_gff2gtf.pl --gff $input_annotation --gtf_version $tool.gtf_version --output 'output.gtf' &&
agat_convert_sp_gff2gtf.pl
--gff $input_annotation
--gtf_version $tool.gtf_version
--output 'output.gtf' &&
cat 'output.gtf' > '${annotation_gtf}'
#else if $tool.selector == 'convert_GTF2GFF'
@input_annotation_single@
agat_convert_sp_gxf2gxf.pl --gff $input_annotation --output 'output.gff' &&
agat_convert_sp_gxf2gxf.pl
--gff $input_annotation
--output 'output.gff' &&
cat 'output.gff' > '${annotation_gff}'
#else if $tool.selector == 'compare'
@input_annotation_double@
agat_sp_compare_two_annotations.pl --gff1 $input1 --gff2 $input2 --output 'temp_output' &&
agat_sp_compare_two_annotations.pl
--gff1 $input1
--gff2 $input2
--output 'temp_output' &&
cat 'temp_output' > '${stats_output}'
#else if $tool.selector == 'extract'
@input_annotation_single@
Expand Down Expand Up @@ -56,35 +67,100 @@
@input_annotation_single@
@input_reference@
mkdir -p './statistics' &&
agat_sp_statistics.pl
agat_sp_functional_statistics.pl
--gff $input_annotation
--gs $ref_genome
--output 'temp_output' &&
cat 'temp_output' > '$stats_output'
cat 'temp_output/gene@transcript/table_per_feature_type.txt' > '$stats_output'
#else if $tool.selector == 'merge_annotations'
@input_annotation_double@
agat_sp_merge_annotations.pl -gff $input1 --gff $input2 --output 'temp_output' &&
cat 'temp_output' > '${annotation_gff}'
agat_sp_merge_annotations.pl
--gff $input1
--gff $input2
--config $agat_configfile
--output 'output' &&
cat 'output' > '${annotation}'
#else if $tool.selector == 'annotation_statistics'
@input_annotation_single@
@input_reference@
agat_sp_statistics.pl --gff $input_annotation --gs $ref_genome -d --output 'temp_output' &&
agat_sp_statistics.pl
--gff $input_annotation
--gs $ref_genome
-d
--output 'temp_output' &&
cat 'temp_output' > '$stats_output'
#else if $tool.selector == 'filter_feature_fasta'
@input_annotation_single@
@input_reference@
agat_sq_filter_feature_from_fasta.pl --gff $input_annotation --fasta $ref_genome --output 'temp_output' &&
cat 'temp_output' > '${features_filtered}'
agat_sq_filter_feature_from_fasta.pl
--gff $input_annotation
--fasta $ref_genome
--config $agat_configfile
--output 'output' &&
cat 'output' > '${annotation}'
#else if $tool.selector == 'complement'
@input_annotation_double@
agat_sp_complement_annotations.pl --ref $input1 --add $input2 --size_min $tool.size_min --output 'temp_output' &&
cat 'temp_output' > '${annotation_gff}'
agat_sp_complement_annotations.pl
--ref $input1
--add $input2
--size_min $tool.size_min
--config $agat_configfile
--output 'temp_output' &&
cat 'temp_output' > '${annotation}'
#else if $tool.selector == 'splice_sites'
@input_annotation_single@
agat_sp_add_splice_sites.pl
--gff $input_annotation
--config $agat_configfile
--output 'output' &&
cat 'output' > '${annotation}'
#end if
]]>
</command>
<configfiles>
<configfile name="agat_configfile"><![CDATA[
#if $tool.selector in ['fix','merge_annotations','complement','splice_sites','filter_feature_fasta']
---
output_format: $tool.output_format.selector
#if $tool.output_format.selector == "GFF"
gff_output_version: $tool.output_format.version
gtf_output_version: relax
#else
gff_output_version: 3
gtf_output_version: $tool.output_format.version
#end if
verbose: 1
progress_bar: true
log: true
debug: false
tabix: false
merge_loci: $tool.merge_loci
throw_fasta: false
force_gff_input_version: 0
create_l3_for_l2_orphan: $tool.create_exon
locus_tag:
- locus_tag
- gene_id
prefix_new_id: nbis
check_sequential: true
check_l2_linked_to_l3: true
check_l1_linked_to_l2: true
remove_orphan_l1: true
check_all_level3_locations: true
check_cds: true
check_exons: true
check_utrs: true
check_all_level2_locations: true
check_all_level1_locations: true
check_identical_isoforms: true
#end if
]]></configfile>
</configfiles>
<inputs>
<conditional name="tool">
<param name="selector" type="select" label="AGAT tool selector" help="As AGAT is a toolkit, it contains a lot of tools. If any of them is missing, please contact the server admin.">
<option value="splice_sites">Add splice sites</option>
<option value="annotation_statistics">Annotation statistics (agat_sp_statistics.pl)</option>
<option value="compare">Compare annotation files (agat_sp_compare_two_annotations.pl)</option>
<option value="complement">Complement annotation file (agat_sp_complement_annotations.pl)</option>
Expand Down Expand Up @@ -113,8 +189,8 @@
<option value="exon">Exon</option>
<option value="cds">CDS</option>
<option value="trna">tRNA</option>
<option value="three_prime_utr">3' UTR</option>
<option value="five_prime_utr">5' UTR</option>
<option value="three_prime_utr">3 UTR</option>
<option value="five_prime_utr">5 UTR</option>
</param>
<param argument="--mrna" type="boolean" truevalue="--mrna" falsevalue="" checked="false" label="Extract mRNA sequences" help=" This extract the mrna
sequence (i.e transcribed sequence (devoid of introns, but containing untranslated exons))." />
Expand All @@ -127,7 +203,7 @@
<param argument="--clean_internal_stop" type="boolean" truevalue="--clean_internal_stop" falsevalue="" checked="false" label="Clean internal
stop codons" help="The Clean Internal Stop option allows replacing the translation of the stop codons present among the sequence that is
represented by the '*' character by . This character can be disturbing for many programs (e.g interproscan)" />
<param argument="--upstream" type="integer" min="0" value="" optional="true" label="Upstream nucleotides" help="It will take that number of nucleotide in more at the 5' extremity." />
<param argument="--upstream" type="integer" min="0" value="" optional="true" label="Upstream nucleotides" help="It will take that number of nucleotide in more at the 5 extremity." />
<param argument="--downstream" type="integer" min="0" value="" optional="true" label="Downstream nucleotides" help="It will take that number of downstream nucleotides." />
<param argument="--full" type="boolean" truevalue="--full" falsevalue="" checked="false" label="Full" help="This option allows dealing
with feature that may span over several locations like CDS or exon, in order to extract the full sequence from the start extremity
Expand Down Expand Up @@ -171,9 +247,11 @@
<when value="filter_feature_fasta">
<expand macro="ANNOTATION_INPUT" />
<expand macro="REFERENCE_FASTA"/>
<expand macro="AGAT_CONFIG"/>
</when>
<when value="fix">
<expand macro="ANNOTATION_INPUT" format="gff,gff3,gff3.gz"/>
<expand macro="AGAT_CONFIG"/>
</when>
<when value="functional_analysis">
<expand macro="ANNOTATION_INPUT" format="gff,gtf,gff3,gff3.gz"/>
Expand All @@ -182,24 +260,33 @@
<when value="merge_annotations">
<param argument="--gff1" name="input_annotation1" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation file 1" help="Input GTF/GFF file" />
<param argument="--gff2" name="input_annotation2" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation file 2" help="Input GTF/GFF file" />
<expand macro="AGAT_CONFIG"/>
</when>
<when value="complement">
<param argument="--ref" name="input_annotation1" type="data" format="gff,gtf,gff3,gff3.gz" label="Reference annotaiton" help="Reference GTF/GFF file" />
<param argument="--add" name="input_annotation2" type="data" format="gff,gtf,gff3,gff3.gz" label="Annotation to complement" help="Annotation file you would like to use to complement the reference annotation." />
<param argument="--size_min" type="integer" min="0" value="0" label="Minimun CDS size" help="Option to keep the non-overlping gene only if the CDS size (in nucleotide) is over the minimum
size defined. Default = 0 that means all of them are kept." />
<expand macro="AGAT_CONFIG"/>
</when>
<when value="splice_sites">
<expand macro="ANNOTATION_INPUT" format="gff,gff3,gff3.gz"/>
<expand macro="AGAT_CONFIG"/>
</when>
</conditional>
</inputs>
<outputs>
<data name="annotation_gff" format="gff" label="${tool.name} on ${on_string}: annotation file (GFF)">
<filter>tool['selector'] not in ['annotation_statistics','extract','functional_analysis','compare','convert_GFF2GTF','filter_feature_fasta']</filter>
<filter>tool['selector'] == 'convert_GTF2GFF'</filter>
</data>
<data name="annotation_gtf" format="gtf" label="${tool.name} on ${on_string}: annotation file (GTF)">
<filter>tool['selector'] == 'convert_GFF2GTF'</filter>
</data>
<data name="features_filtered" format="tabular" label="${tool.name} on ${on_string}: filtered results">
<filter>tool['selector'] == 'filter_feature_fasta'</filter>
<data name="annotation" format="gff" label="${tool.name} on ${on_string}: annotation file">
<filter>tool['selector'] in ['fix','merge_annotations','complement','filter_feature_fasta','splice_sites','bam2gff']</filter>
<change_format>
<when input="output_format.selector" value="GTF" format="gtf" />
</change_format>
</data>
<data name="sequence_output" format="fasta" label="${tool.name} on ${on_string}: FASTA file">
<filter>tool['selector'] =='extract'</filter>
Expand Down Expand Up @@ -228,9 +315,6 @@
</conditional>
</conditional>
<output name="stats_output" file="test01_stats.txt" ftype="txt"/>
<output_collection name="distribution_plots_woiso" type="list" count="4">
<element name="transcriptClass_cds" file="test01_plot2.pdf" ftype="pdf" compare="sim_size" delta="100"/>
</output_collection>
<output_collection name="distribution_plots_wiso" type="list" count="4">
<element name="transcriptClass_cds" file="test01_plot1.pdf" ftype="pdf" compare="sim_size" delta="100"/>
</output_collection>
Expand Down Expand Up @@ -259,13 +343,17 @@
</conditional>
<output name="stats_output" file="test03.txt" ftype="txt" lines_diff="2"/>
</test>
<!-- Test 04: comlement annotation -->
<!-- Test 04: complement annotation -->
<test expect_num_outputs="1">
<conditional name="tool">
<param name="selector" value="complement"/>
<param name="input_annotation1" value="annotation_small.gtf" ftype="gtf"/>
<param name="input_annotation2" value="annotation_unique.gtf" ftype="gtf"/>
<param name="size_min" value="10"/>
<conditional name="output_format">
<param name="selector" value="gff"/>
<param name="version" value="3"/>
</conditional>
</conditional>
<output name="annotation_gff" file="test04.gff" ftype="gff"/>
</test>
Expand Down Expand Up @@ -296,7 +384,7 @@
<param name="history_item" value="genome.fasta.gz"/>
</conditional>
</conditional>
<output name="features_filtered" file="test07.tabular" ftype="tabular"/>
<output name="annotation" file="test07.gff" ftype="gff"/>
</test>
<!-- Test 08: Fix annotation file -->
<test expect_num_outputs="1">
Expand Down Expand Up @@ -328,6 +416,10 @@
<param name="input_annotation1" value="annotation_small.gtf"/>
<param name="input_annotation2" value="annotation_unique.gtf"/>
</conditional>
<conditional name="output_format">
<param name="selector" value="gff"/>
<param name="version" value="3"/>
</conditional>
<output name="annotation_gff" file="test10.gff" ftype="gff"/>
</test>
<!-- Test 11: Test compressed files -->
Expand Down Expand Up @@ -356,6 +448,14 @@
<has_text text="Job done" />
</assert_stdout>
</test>
<!-- Test 13: Add splicing sites -->
<test expect_num_outputs="1">
<conditional name="tool">
<param name="selector" value="splice_sites"/>
<param name="gff" value="test04.gff" ftype="gff"/>
</conditional>
<output name="annotation" file="test13.gff" ftype="gff"/>
</test>
</tests>
<help><![CDATA[
Expand Down
35 changes: 32 additions & 3 deletions tools/agat/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<token name="@TOOL_VERSION@">1.1.0</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@TOOL_VERSION@">1.2.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">agat</requirement>
Expand All @@ -19,7 +19,36 @@
<xml name="ANNOTATION_INPUT" token_format="gff,gtf,gff3,gff3.gz">
<param argument="--gff" type="data" format="@FORMAT@" label="Annotation file" help="Input GTF/GFF file" />
</xml>

<xml name="AGAT_CONFIG">
<conditional name="output_format">
<param name="selector" type="select" label="Output format">
<option value="GFF">GFF</option>
<option value="GTF">GTF</option>
</param>
<when value="GFF">
<param name="version" type="select" label="Format version">
<option value="1">1</option>
<option value="2">2</option>
<option value="2.5">2.5</option>
<option value="3" selected="true">3</option>
</param>
</when>
<when value="GTF">
<param name="version" type="select" label="Format version">
<option value="1">1 = ("CDS", "start_codon", "stop_codon", "exon", "intron")</option>
<option value="2">2 = ("CDS", "start_codon", "stop_codon", "exon")</option>
<option value="2.1">2.1 = ("CDS", "start_codon", "stop_codon", "exon", "5UTR", "3UTR")</option>
<option value="2.2">2.2 = ("CDS", "start_codon", "stop_codon", "5UTR", "3UTR", "inter", "inter_CNS", "intron_CNS", "exon")</option>
<option value="2.5">2.5 = ("gene", "transcript", "exon", "CDS", "UTR", "start_codon", "stop_codon", "Selenocysteine")</option>
<option value="3">3 = ("gene", "transcript", "exon", "CDS", "Selenocysteine", "start_codon", "stop_codon", "three_prime_utr", "five_prime_utr")</option>
<option value="relax" selected="true">Relax = All feature types will be accepted</option>
</param>
</when>
</conditional>
<param name="merge_loci" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Merge loci" help="Should overlapping loci (at CDS level) be merged in a single locus. Only one gene is kept, and the mRNA features become isoforms." />
<param name="create_exon" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Create exon when l2 do not have children"/>
</xml>

<xml name="REFERENCE_FASTA">
<conditional name="reference_genome">
<param name="source" type="select" label="Source for the reference genome" help="Built-in references were created using default options.">
Expand Down
1 change: 1 addition & 0 deletions tools/agat/test-data/region.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
K03455 1 2669
Binary file modified tools/agat/test-data/test01_plot1.pdf
Binary file not shown.
Binary file removed tools/agat/test-data/test01_plot2.pdf
Binary file not shown.
Loading

0 comments on commit 3760169

Please sign in to comment.