diff --git a/CHANGELOG.md b/CHANGELOG.md index fce07ec..8801613 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v0.5.0dev - [07-Nov-2024] +## v0.5.0dev - [11-Nov-2024] ### `Added` @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 2. Updated nf-core template to 3.0.2 [#66](https://github.com/PlantandFoodResearch/genepal/issues/66) 3. Integrated nf-test into pipeline CI [#68](https://github.com/PlantandFoodResearch/genepal/issues/68) 4. Updated the flowchart [#87](https://github.com/PlantandFoodResearch/genepal/issues/87) +5. Added a large test dataset for the `test_full` profile [#90](https://github.com/PlantandFoodResearch/genepal/issues/90) +6. Now `.gff.gz` and `.gff3.gz` inputs are also allowed for the `benchmark` column in `--input` +7. Now removing liftoff genes with any intron shorted than 10bp [#89](https://github.com/Plant-Food-Research-Open/genepal/issues/89) +8. Now also removing `rRNA` and `tRNA` after liftoff as the downstream logic in the pipeline can not correctly handle these ### `Fixed` diff --git a/README.md b/README.md index ab1527f..8d35f3d 100644 --- a/README.md +++ b/README.md @@ -29,16 +29,16 @@ - With protein evidence alone, [BRAKER workflow C](https://github.com/Gaius-Augustus/BRAKER/tree/f58479fe5bb13a9e51c3ca09cb9e137cab3b8471?tab=readme-ov-file#overview-of-modes-for-running-braker) is executed - With protein plus RNASeq evidence, [BRAKER workflow D](https://github.com/Gaius-Augustus/BRAKER/tree/f58479fe5bb13a9e51c3ca09cb9e137cab3b8471?tab=readme-ov-file#overview-of-modes-for-running-braker) is executed - [Liftoff](https://github.com/agshumate/Liftoff): Optionally, liftoff annotations from reference genome FASTA/GFF -- [TSEBRA](https://github.com/Gaius-Augustus/TSEBRA) - - Ensure that each BRAKER model has [full intron support](./docs/usage.md#iso-forms-and-full-intron-support) - - Optionally, ensure that each Liftoff model has full intron support +- [TSEBRA](https://github.com/Gaius-Augustus/TSEBRA): Optionally, ensure that each BRAKER or both BRAKER and Liftoff models have [full intron support](./docs/usage.md#iso-forms-and-full-intron-support) - [AGAT](https://github.com/NBISweden/AGAT) - Merge multi-reference liftoffs - Remove liftoff transcripts marked by _valid_ORF=False_ + - Remove liftoff genes with any intron shorter than 10 bp + - Remove rRNA and tRNA from liftoff - Optionally, allow or remove iso-forms - Remove BRAKER models from Liftoff loci - - Optionally, remove models without any EggNOG-mapper hits - Merge Liftoff and BRAKER models + - Optionally, remove models without any EggNOG-mapper hits - [EggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper): Add functional annotation to gff - [GenomeTools](https://github.com/genometools/genometools): GFF format validation - [GffRead](https://github.com/gpertea/gffread): Extraction of protein sequences diff --git a/assets/schema_input.json b/assets/schema_input.json index a1e66c9..ebc3ffb 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -64,14 +64,14 @@ "anyOf": [ { "type": "string", - "pattern": "^\\S+\\.gff(3)?$" + "pattern": "^\\S+\\.gff(3)?(\\.gz)?$" }, { "type": "string", "maxLength": 0 } ], - "errorMessage": "GFF/GFF3 file for benchmarking cannot contain spaces and must have extension '.gff' or '.gff3'" + "errorMessage": "GFF/GFF3 file for benchmarking cannot contain spaces and must have extension '.gff.gz', '.gff3.gz', '.gff' or '.gff3'" } }, "type": "object", diff --git a/conf/test_full.config b/conf/test_full.config index 05d556a..979ed52 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -10,26 +10,22 @@ ---------------------------------------------------------------------------------------- */ -process { - resourceLimits = [ - cpus: 10, - memory: '32.GB', - time: '6.h' - ] -} - params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - // Input data - input = "${projectDir}/tests/minimal/assemblysheet.csv" - protein_evidence = 'https://raw.githubusercontent.com/Gaius-Augustus/BRAKER/f58479fe5bb13a9e51c3ca09cb9e137cab3b8471/example/proteins.fa' + input = "${projectDir}/tests/full/assemblysheet.csv" + protein_evidence = 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/214/015/GCF_000214015.3_version_140606/GCF_000214015.3_version_140606_protein.faa.gz' + eggnogmapper_tax_scope = 33090 + rna_evidence = "${projectDir}/tests/full/rnasheet.csv" + liftoff_annotations = "${projectDir}/tests/full/liftoffannotations.csv" + orthofinder_annotations = "${projectDir}/tests/full/orthofinderannotations.csv" + + star_max_intron_length = 5000 - // Braker options for faster test execution! - // WARNING: Do not use with actual data! - braker_extra_args = '--gm_max_intergenic 10000 --skipOptimize' // Added for faster test execution! Do not use with actual data! + busco_lineage_datasets = 'chlorophyta_odb10 eukaryota_odb10' - // BUSCO lineage - busco_lineage_datasets = 'eudicots_odb10' + // Relaxed filtering due to limited evidence + enforce_full_intron_support = false + filter_liftoff_by_hints = false } diff --git a/docs/img/genepal.png b/docs/img/genepal.png index 69668d2..d03c8b6 100644 Binary files a/docs/img/genepal.png and b/docs/img/genepal.png differ diff --git a/docs/output.md b/docs/output.md index e1ce75a..8bd504c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -119,7 +119,7 @@ RNASeq alignment is performed with [STAR](https://github.com/alexdobin/STAR). Al > > BRAKER outputs are not the final outputs from the pipeline and that's why they are not stored by default. These are only intermediary files. > -> The pipeline further processes the BRAKER predictions and stores the final validated outputs in the `annotations` directory. The `braker_save_outputs` option is only provided to allow a manual resume of the pipeline for advanced use cases. +> The pipeline further processes the BRAKER predictions and stores the final validated outputs in the `annotations` directory. The `braker_save_outputs` option is only provided to allow a manual resume of the pipeline for advanced use cases. See [Advanced inputs for manual resume](./usage.md#advanced-inputs-for-manual-resume) in the [usage doc](./usage.md). ### Annotation with Liftoff diff --git a/docs/usage.md b/docs/usage.md index b6ba3c4..1b84b09 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -5,6 +5,7 @@ > This document does not describe every pipeline parameter. For an exhaustive list of parameters, see [parameters.md](./parameters.md). - [Assemblysheet input](#assemblysheet-input) + - [Advanced inputs for manual resume](#advanced-inputs-for-manual-resume) - [Protein evidence](#protein-evidence) - [BRAKER workflow](#braker-workflow) - [RNASeq evidence](#rnaseq-evidence) @@ -39,7 +40,17 @@ You will need to create an assemblysheet with information about the genome assem - `tag:` A unique tag which represents the target assembly throughout the pipeline. The `tag` and `fasta` file name should not be same, such as `tag.fasta`. This can create file name collisions in the pipeline or result in file overwrite. It is also a good-practice to make all the input files read-only. - `fasta:` FASTA file for the genome - `is_masked:` Whether the FASTA is masked or not? Use yes/no to indicate the masking. If the assembly is not masked. The pipeline will soft mask it before annotating it. -- `te_lib [Optional]`: If an assembly is not masked and a TE library is available which cna be used to mask the assembly, the path of the TE library FASTA file can be provided here. If this column is absent and the assembly is not masked, the pipeline will first create a TE library so that it can soft mask the assembly. +- `te_lib [Optional]:` If an assembly is not masked and a TE library is available which cna be used to mask the assembly, the path of the TE library FASTA file can be provided here. If this column is absent and the assembly is not masked, the pipeline will first create a TE library so that it can soft mask the assembly. +- `benchmark [Optional]:` A GFF3 file which can be used to benchmark or compare the results of the pipeline against an existing annotation. + +### Advanced inputs for manual resume + +If the pipeline fails while processing large datasets, it is advisable to backup the repeat-masked genomes and the BRAKER outputs before attempting a [Nextflow resume](https://www.nextflow.io/docs/latest/cache-and-resume.html#caching-and-resuming). If the resume fails, these outputs from the first pipeline run can be used to setup a manual resume. This can be achieved by providing the repeat-masked genomes under the `fasta` column along with `is_masked` column set to `yes`. The BRAKER outputs can be provided under the following columns, + +- `braker_gff3 [Optional]:` BRAKER GFF3 file +- `braker_hints [Optional]:` BRAKER hints file in GFF3 format + +The pipeline will automatically skip the repeat modelling, masking and BRAKER steps. It will still perform these steps for those genomes for which these files are not provided. These files are not saved by the pipeline by default. To save the files, set the `repeatmasker_save_outputs` and `braker_save_outputs` parameters to `true`. ## Protein evidence diff --git a/local_genepal b/local_genepal index ab6d774..92a0b5d 100755 --- a/local_genepal +++ b/local_genepal @@ -17,6 +17,6 @@ nextflow run \ -profile docker,test_full \ -resume \ $stub \ - --eggnogmapper_tax_scope 33090 \ + -c ../nxf-config/resources.config \ --eggnogmapper_db_dir ../dbs/emapperdb/5.0.2 \ --outdir results diff --git a/modules.json b/modules.json index 1edf67b..4e5a485 100644 --- a/modules.json +++ b/modules.json @@ -15,6 +15,11 @@ "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", "installed_by": ["gxf_fasta_agat_spaddintrons_spextractsequences"] }, + "agat/spflagshortintrons": { + "branch": "main", + "git_sha": "d8f08700c82a3bd14811a3dfe7e7d63838130693", + "installed_by": ["modules"] + }, "braker3": { "branch": "main", "git_sha": "a8939d36280e7d9037c7cf164eeede19e46546a4", diff --git a/modules/gallvp/agat/spflagshortintrons/environment.yml b/modules/gallvp/agat/spflagshortintrons/environment.yml new file mode 100644 index 0000000..647ad5e --- /dev/null +++ b/modules/gallvp/agat/spflagshortintrons/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::agat=1.4.1" diff --git a/modules/gallvp/agat/spflagshortintrons/main.nf b/modules/gallvp/agat/spflagshortintrons/main.nf new file mode 100644 index 0000000..f6c4c0e --- /dev/null +++ b/modules/gallvp/agat/spflagshortintrons/main.nf @@ -0,0 +1,50 @@ +process AGAT_SPFLAGSHORTINTRONS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/agat:1.4.1--pl5321hdfd78af_0': + 'biocontainers/agat:1.4.1--pl5321hdfd78af_0' }" + + input: + tuple val(meta), path(gxf) + path config + + output: + tuple val(meta), path("*.gff") , emit: gff + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def config_arg = config ? "-c $config" : '' + if( "$gxf" == "${prefix}.gff" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + agat_sp_flag_short_introns.pl \\ + $args \\ + -g $gxf \\ + $config_arg \\ + -o ${prefix}.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_sp_flag_short_introns.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + if( "$gxf" == "${prefix}.gff" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.gff + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + agat: \$(agat_sp_flag_short_introns.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p') + END_VERSIONS + """ +} diff --git a/modules/gallvp/agat/spflagshortintrons/meta.yml b/modules/gallvp/agat/spflagshortintrons/meta.yml new file mode 100644 index 0000000..31058d0 --- /dev/null +++ b/modules/gallvp/agat/spflagshortintrons/meta.yml @@ -0,0 +1,60 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "agat_spflagshortintrons" +description: | + The script flags the short introns with the attribute . Is is usefull to avoid ERROR when submiting the data to EBI. + (Typical EBI error message: ********ERROR: Intron usually expected to be at least 10 nt long. Please check the accuracy) +keywords: + - genomics + - gtf + - gff + - intron + - short + - annotation +tools: + - "agat": + description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene annotations in any GTF/GFF format." + homepage: "https://agat.readthedocs.io/en/latest/" + documentation: "https://agat.readthedocs.io/en/latest/" + tool_dev_url: "https://github.com/NBISweden/AGAT" + doi: "10.5281/zenodo.3552717" + licence: ["GPL v3"] + identifier: biotools:AGAT +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - gxf: + type: file + description: Input GFF3/GTF file + pattern: "*.{gff,gff3,gtf}" + - - config: + type: file + description: | + Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". + The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + pattern: "*.yaml" +output: + - gff: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ] + - "*.gff": + type: file + description: Output GFF file. + pattern: "*.gff" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/agat/spflagshortintrons/tests/main.nf.test b/modules/gallvp/agat/spflagshortintrons/tests/main.nf.test new file mode 100644 index 0000000..bc07077 --- /dev/null +++ b/modules/gallvp/agat/spflagshortintrons/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process AGAT_SPFLAGSHORTINTRONS" + script "../main.nf" + process "AGAT_SPFLAGSHORTINTRONS" + + tag "modules" + tag "modules_gallvp" + tag "agat" + tag "agat/spflagshortintrons" + + test("homo_sapiens - genome - gtf") { + + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.gtf', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - genome - gtf - stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr1/genome.gtf', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/gallvp/agat/spflagshortintrons/tests/main.nf.test.snap b/modules/gallvp/agat/spflagshortintrons/tests/main.nf.test.snap new file mode 100644 index 0000000..e1de4ea --- /dev/null +++ b/modules/gallvp/agat/spflagshortintrons/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "homo_sapiens - genome - gtf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,d50eba23d96e28146030720912e12709" + ], + "gff": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d50eba23d96e28146030720912e12709" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-10T10:06:15.615025" + }, + "homo_sapiens - genome - gtf": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff:md5,274fe888f5cdf138cc059bcec2b75b1c" + ] + ], + "1": [ + "versions.yml:md5,d50eba23d96e28146030720912e12709" + ], + "gff": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gff:md5,274fe888f5cdf138cc059bcec2b75b1c" + ] + ], + "versions": [ + "versions.yml:md5,d50eba23d96e28146030720912e12709" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-10T10:06:09.863425" + } +} \ No newline at end of file diff --git a/subworkflows/local/fasta_liftoff.nf b/subworkflows/local/fasta_liftoff.nf index a46beb3..0da2949 100644 --- a/subworkflows/local/fasta_liftoff.nf +++ b/subworkflows/local/fasta_liftoff.nf @@ -3,6 +3,7 @@ include { GUNZIP as GUNZIP_GFF } from '../../mo include { GFFREAD as GFFREAD_BEFORE_LIFTOFF } from '../../modules/nf-core/gffread/main' include { LIFTOFF } from '../../modules/nf-core/liftoff/main' include { AGAT_SPMERGEANNOTATIONS as MERGE_LIFTOFF_ANNOTATIONS } from '../../modules/nf-core/agat/spmergeannotations/main' +include { AGAT_SPFLAGSHORTINTRONS } from '../../modules/gallvp/agat/spflagshortintrons/main' include { AGAT_SPFILTERFEATUREFROMKILLLIST } from '../../modules/nf-core/agat/spfilterfeaturefromkilllist/main' include { GFFREAD as GFFREAD_AFTER_LIFTOFF } from '../../modules/nf-core/gffread/main' include { GFF_TSEBRA_SPFILTERFEATUREFROMKILLLIST } from '../../subworkflows/local/gff_tsebra_spfilterfeaturefromkilllist' @@ -112,8 +113,16 @@ workflow FASTA_LIFTOFF { ) ch_versions = ch_versions.mix(MERGE_LIFTOFF_ANNOTATIONS.out.versions.first()) + // MODULE: AGAT_SPFLAGSHORTINTRONS + AGAT_SPFLAGSHORTINTRONS ( ch_merged_gff, [] ) + + ch_flagged_gff = AGAT_SPFLAGSHORTINTRONS.out.gff + ch_versions = ch_versions.mix(AGAT_SPFLAGSHORTINTRONS.out.versions.first()) + // COLLECTFILE: Kill list for valid_ORF=False transcripts - ch_kill_list = ch_merged_gff + // tRNA, rRNA + // gene with any intron marked as 'pseudo=' by AGAT/SPFLAGSHORTINTRONS + ch_kill_list = ch_flagged_gff | map { meta, gff -> def tx_from_gff = gff.readLines() @@ -122,10 +131,12 @@ workflow FASTA_LIFTOFF { def cols = it.split('\t') def feat = cols[2] - if ( ! ( feat == 'transcript' || feat == 'mRNA' ) ) { return false } + + if ( feat in [ 'tRNA', 'rRNA' ] ) { return true } + if ( feat !in [ 'transcript', 'mRNA', 'gene' ] ) { return false } def attrs = cols[8] - attrs.contains('valid_ORF=False') + ( attrs.contains('valid_ORF=False') || attrs.contains('pseudo=') ) } .collect { def cols = it.split('\t') @@ -144,7 +155,7 @@ workflow FASTA_LIFTOFF { } // MODULE: AGAT_SPFILTERFEATUREFROMKILLLIST - ch_agat_kill_inputs = ch_merged_gff + ch_agat_kill_inputs = ch_flagged_gff | join(ch_kill_list) diff --git a/tests/full/assemblysheet.csv b/tests/full/assemblysheet.csv new file mode 100644 index 0000000..58be184 --- /dev/null +++ b/tests/full/assemblysheet.csv @@ -0,0 +1,4 @@ +tag ,fasta ,is_masked ,benchmark +Ostta ,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/158/475/GCA_002158475.1_Ostta1115_2/GCA_002158475.1_Ostta1115_2_genomic.fna.gz ,yes ,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/002/158/475/GCA_002158475.1_Ostta1115_2/GCA_002158475.1_Ostta1115_2_genomic.gff.gz +RCC7079 ,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/339/685/GCA_905339685.1_AP0985_ms/GCA_905339685.1_AP0985_ms_genomic.fna.gz ,yes +RCC7080 ,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/905/339/615/GCA_905339615.1_AP0986_ms/GCA_905339615.1_AP0986_ms_genomic.fna.gz ,yes diff --git a/tests/full/liftoffannotations.csv b/tests/full/liftoffannotations.csv new file mode 100644 index 0000000..5c4f7b6 --- /dev/null +++ b/tests/full/liftoffannotations.csv @@ -0,0 +1,2 @@ +fasta ,gff3 +https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/214/015/GCF_000214015.3_version_140606/GCF_000214015.3_version_140606_genomic.fna.gz ,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/214/015/GCF_000214015.3_version_140606/GCF_000214015.3_version_140606_genomic.gff.gz diff --git a/tests/full/orthofinderannotations.csv b/tests/full/orthofinderannotations.csv new file mode 100644 index 0000000..c8bc463 --- /dev/null +++ b/tests/full/orthofinderannotations.csv @@ -0,0 +1,2 @@ +tag,fasta +RCC4221,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/214/015/GCF_000214015.3_version_140606/GCF_000214015.3_version_140606_protein.faa.gz diff --git a/tests/full/rnasheet.csv b/tests/full/rnasheet.csv new file mode 100644 index 0000000..31ef837 --- /dev/null +++ b/tests/full/rnasheet.csv @@ -0,0 +1,3 @@ +sample ,file_1 ,file_2 ,target_assemblies +SRR15069948 ,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR150/048/SRR15069948/SRR15069948.fastq.gz , ,Ostta;RCC7079;RCC7080 +SRR7121164 ,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR712/004/SRR7121164/SRR7121164_1.fastq.gz ,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR712/004/SRR7121164/SRR7121164_2.fastq.gz ,Ostta;RCC7079;RCC7080 diff --git a/tests/stub/main.nf.test.snap b/tests/stub/main.nf.test.snap index 1a7b3af..1c2a70a 100644 --- a/tests/stub/main.nf.test.snap +++ b/tests/stub/main.nf.test.snap @@ -2,7 +2,7 @@ "full - stub": { "content": [ { - "successful tasks": 148, + "successful tasks": 152, "versions": { "AGAT_CONVERTSPGFF2GTF": { "agat": "v1.4.0" @@ -19,6 +19,9 @@ "AGAT_SPFILTERFEATUREFROMKILLLIST": { "agat": "v1.4.0" }, + "AGAT_SPFLAGSHORTINTRONS": { + "agat": "v1.4.1" + }, "AGAT_SPMERGEANNOTATIONS": { "agat": "v1.4.0" }, @@ -184,6 +187,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-22T13:39:38.456892" + "timestamp": "2024-11-11T17:57:02.994671" } } \ No newline at end of file diff --git a/workflows/genepal.nf b/workflows/genepal.nf index 13a8cab..2769013 100644 --- a/workflows/genepal.nf +++ b/workflows/genepal.nf @@ -24,6 +24,7 @@ include { GXF_FASTA_AGAT_SPADDINTRONS_SPEXTRACTSEQUENCES } from '../subworkflows include { CAT_CAT as SAVE_MARKED_GFF3 } from '../modules/nf-core/cat/cat/main' include { GFFCOMPARE as BENCHMARK } from '../modules/nf-core/gffcompare/main' +include { FILE_GUNZIP as BENCHMARK_GFF3_GUNZIP } from '../subworkflows/local/file_gunzip' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_genepal_pipeline' @@ -247,10 +248,15 @@ workflow GENEPAL { // MODULE: CAT_CAT as SAVE_MARKED_GFF3 SAVE_MARKED_GFF3 ( ch_splicing_marked_gff3 ) + // SUBWORKFLOW: FILE_GUNZIP as BENCHMARK_GFF3_GUNZIP + BENCHMARK_GFF3_GUNZIP ( ch_benchmark_gff ) + ch_benchmark_gunzip_gff = BENCHMARK_GFF3_GUNZIP.out.gunzip + ch_versions = ch_versions.mix(BENCHMARK_GFF3_GUNZIP.out.versions) + // MODULE: GFFCOMPARE as BENCHMARK ch_benchmark_inputs = ch_final_gff | join ( ch_valid_target_assembly ) - | join ( ch_benchmark_gff ) + | join ( ch_benchmark_gunzip_gff ) BENCHMARK ( ch_benchmark_inputs.map { meta, gff, fasta, ref_gff -> [ meta, gff ] },