diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7a620df22..0d3cb8255 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -51,7 +51,6 @@ jobs: matrix: NXF_VER: - "24.04.2" - - "latest-everything" nf_test_files: ["${{ fromJson(needs.nf-test-changes.outputs.nf_test_files) }}"] profile: - "docker" diff --git a/CHANGELOG.md b/CHANGELOG.md index 83e2bc3b4..a9c12a685 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [PR #1480](https://github.com/nf-core/rnaseq/pull/1480) - Bump version after release 3.18.0 - [PR #1482](https://github.com/nf-core/rnaseq/pull/1482) - Update trimgalore module for save_unpaired fix -- [pR #1486](https://github.com/nf-core/rnaseq/pull/1486) - Bump STAR build for multiprocessing fix +- [PR #1486](https://github.com/nf-core/rnaseq/pull/1486) - Bump STAR build for multiprocessing fix +- [PR #1490](https://github.com/nf-core/rnaseq/pull/1490) - Make genomic FASTA input optional # 3.18.0 - 2024-12-19 diff --git a/bin/filter_gtf.py b/bin/filter_gtf.py index b2215fde6..4393fdc9e 100755 --- a/bin/filter_gtf.py +++ b/bin/filter_gtf.py @@ -6,7 +6,7 @@ import argparse import re import statistics -from typing import Set +from typing import Optional, Set # Create a logger logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") @@ -27,14 +27,15 @@ def tab_delimited(file: str) -> float: return statistics.median(line.count("\t") for line in data.split("\n")) -def filter_gtf(fasta: str, gtf_in: str, filtered_gtf_out: str, skip_transcript_id_check: bool) -> None: +def filter_gtf(fasta: Optional[str], gtf_in: str, filtered_gtf_out: str, skip_transcript_id_check: bool) -> None: """Filter GTF file based on FASTA sequence names.""" if tab_delimited(gtf_in) != 8: raise ValueError("Invalid GTF file: Expected 9 tab-separated columns.") - seq_names_in_genome = extract_fasta_seq_names(fasta) - logger.info(f"Extracted chromosome sequence names from {fasta}") - logger.debug("All sequence IDs from FASTA: " + ", ".join(sorted(seq_names_in_genome))) + if (fasta is not None): + seq_names_in_genome = extract_fasta_seq_names(fasta) + logger.info(f"Extracted chromosome sequence names from {fasta}") + logger.debug("All sequence IDs from FASTA: " + ", ".join(sorted(seq_names_in_genome))) seq_names_in_gtf = set() try: @@ -44,7 +45,7 @@ def filter_gtf(fasta: str, gtf_in: str, filtered_gtf_out: str, skip_transcript_i seq_name = line.split("\t")[0] seq_names_in_gtf.add(seq_name) # Add sequence name to the set - if seq_name in seq_names_in_genome: + if fasta is None or seq_name in seq_names_in_genome: if skip_transcript_id_check or re.search(r'transcript_id "([^"]+)"', line): out.write(line) line_count += 1 @@ -63,7 +64,7 @@ def filter_gtf(fasta: str, gtf_in: str, filtered_gtf_out: str, skip_transcript_i if __name__ == "__main__": parser = argparse.ArgumentParser(description="Filters a GTF file based on sequence names in a FASTA file.") parser.add_argument("--gtf", type=str, required=True, help="GTF file") - parser.add_argument("--fasta", type=str, required=True, help="Genome fasta file") + parser.add_argument("--fasta", type=str, required=False, help="Genome fasta file") parser.add_argument("--prefix", dest="prefix", default="genes", type=str, help="Prefix for output GTF files") parser.add_argument( "--skip_transcript_id_check", action="store_true", help="Skip checking for transcript IDs in the GTF file" diff --git a/docs/usage.md b/docs/usage.md index 48cafce85..74fd92290 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -132,7 +132,7 @@ You also have the option to pseudoalign and quantify your data directly with [Sa The library preparation protocol (library type) used by Salmon quantification is inferred by the pipeline based on the information provided in the samplesheet, however, you can override it using the `--salmon_quant_libtype` parameter. You can find the available options in the [Salmon documentation](https://salmon.readthedocs.io/en/latest/library_type.html). Similarly, strandedness is taken from the sample sheet or calculated automatically, and passed to Kallisto on a per-library basis, but you can apply a global override by setting the Kallisto strandedness parameters in `--extra_kallisto_quant_args` like `--extra_kallisto_quant_args '--fr-stranded'` see the [Kallisto documentation](https://pachterlab.github.io/kallisto/manual). -When running Salmon in mapping-based mode via `--pseudo_aligner salmon` the entire genome of the organism is used by default for the decoy-aware transcriptome when creating the indices (see second bulleted option in [Salmon documentation](https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode)). +When running Salmon in mapping-based mode via `--pseudo_aligner salmon`, supplying a genome fasta via `--fasta` and not supplying a Salmon index, the entire genome of the organism is used by default for the decoy-aware transcriptome when creating the indices, as is recommended (see second bulleted option in [Salmon documentation](https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode)). If you do not supply a FASTA file or an index, Salmon will index without those decoys, using only transcript sequences in the index. This second option is not usually recommended, but may be useful in limited circumstances. Note that Kallisto does not index with genomic sequences. Two additional parameters `--extra_star_align_args` and `--extra_salmon_quant_args` were added in v3.10 of the pipeline that allow you to append any custom parameters to the STAR align and Salmon quant commands, respectively. Note, the `--seqBias` and `--gcBias` are not provided to Salmon quant by default so you can provide these via `--extra_salmon_quant_args '--seqBias --gcBias'` if required. You can now also supply additional arguments to Kallisto via `--extra_kallisto_quant_args`. @@ -209,7 +209,7 @@ When supplying reference files as discussed below, it is important to be consist ### Explicit reference file specification (recommended) -The minimum reference genome requirements for this pipeline are a FASTA and GTF file, all other files required to run the pipeline can be generated from these files. For example, the latest reference files for human can be derived from Ensembl like: +The minimum reference genome requirements for this pipeline are a FASTA file (genome and/ or transcriptome) and GTF file, all other files required to run the pipeline can be generated from these files. For example, the latest reference files for human can be derived from Ensembl like: ``` latest_release=$(curl -s 'http://rest.ensembl.org/info/software?content-type=application/json' | grep -o '"release":[0-9]*' | cut -d: -f2) @@ -227,6 +227,7 @@ Notes: - If `--gene_bed` is not provided then it will be generated from the GTF file. - If `--additional_fasta` is provided then the features in this file (e.g. ERCC spike-ins) will be automatically concatenated onto both the reference FASTA file as well as the GTF annotation before building the appropriate indices. - When using `--aligner star_rsem`, both the STAR and RSEM indices should be present in the path specified by `--rsem_index` (see [#568](https://github.com/nf-core/rnaseq/issues/568)). +- If the `--skip_alignment` option is used along with `--transcript_fasta`, the pipeline can technically run without providing the genomic FASTA (`--fasta`). However, this approach is **not recommended** with `--pseudo_aligner salmon`, as any dynamically generated Salmon index will lack decoys. To ensure optimal indexing with decoys, it is **highly recommended** to include the genomic FASTA (`--fasta`) with Salmon, unless a pre-existing decoy-aware Salmon index is supplied. For more details on the benefits of decoy-aware indexing, refer to the [Salmon documentation](https://salmon.readthedocs.io/en/latest/salmon.html#preparing-transcriptome-indices-mapping-based-mode). #### Reference genome @@ -304,7 +305,7 @@ Notes: ### GTF filtering -By default, the input GTF file will be filtered to ensure that sequence names correspond to those in the genome fasta file, and to remove rows with empty transcript identifiers. Filtering can be bypassed completely where you are confident it is not necessary, using the `--skip_gtf_filter` parameter. If you just want to skip the 'transcript_id' checking component of the GTF filtering script used in the pipeline this can be disabled specifically using the `--skip_gtf_transcript_filter` parameter. +By default, the input GTF file will be filtered to ensure that sequence names correspond to those in the genome fasta file (where supplied), and to remove rows with empty transcript identifiers. Filtering can be bypassed completely where you are confident it is not necessary, using the `--skip_gtf_filter` parameter. If you just want to skip the 'transcript_id' checking component of the GTF filtering script used in the pipeline this can be disabled specifically using the `--skip_gtf_transcript_filter` parameter. ## Contamination screening options @@ -332,6 +333,21 @@ nextflow run \ -profile docker ``` +You can also run without a genomic FASTA file, provided you skip the alignment step and provide a transcriptome FASTA directly: + +```bash +nextflow run \ + nf-core/rnaseq \ + --input \ + --outdir \ + --gtf \ + --transcript_fasta \ + --skip_alignment \ + -profile docker +``` + +This is not usually recommended with Salmon unless you also supply a previously generated decoy-aware Salmon transcriptome index. + > **NB:** Loading iGenomes configuration remains the default for reasons of consistency with other workflows, but should be disabled when not using iGenomes, applying the recommended usage above. This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. diff --git a/modules.json b/modules.json index 1c8fbb6f6..320f2c245 100644 --- a/modules.json +++ b/modules.json @@ -181,7 +181,7 @@ }, "salmon/index": { "branch": "master", - "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "git_sha": "25ddc0bb25292280923eed07e6351789a671e86a", "installed_by": ["fastq_subsample_fq_salmon"] }, "salmon/quant": { diff --git a/modules/local/gtf_filter/main.nf b/modules/local/gtf_filter/main.nf index 60eb9a9bd..bba995475 100644 --- a/modules/local/gtf_filter/main.nf +++ b/modules/local/gtf_filter/main.nf @@ -18,11 +18,15 @@ process GTF_FILTER { task.ext.when == null || task.ext.when script: // filter_gtf.py is bundled with the pipeline, in nf-core/rnaseq/bin/ + fasta_text='' + if (fasta){ + fasta_text="--fasta $fasta" + } """ filter_gtf.py \\ --gtf $gtf \\ - --fasta $fasta \\ - --prefix ${fasta.baseName} + $fasta_text \\ + --prefix ${gtf.baseName} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/salmon/index/main.nf b/modules/nf-core/salmon/index/main.nf index 3d653c0d0..2e9c6224c 100644 --- a/modules/nf-core/salmon/index/main.nf +++ b/modules/nf-core/salmon/index/main.nf @@ -20,22 +20,29 @@ process SALMON_INDEX { script: def args = task.ext.args ?: '' - def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt" - def gentrome = "gentrome.fa" - if (genome_fasta.endsWith('.gz')) { - get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt" - gentrome = "gentrome.fa.gz" + def decoys = '' + def fasta = transcript_fasta + if (genome_fasta){ + if (genome_fasta.endsWith('.gz')) { + genome_fasta = "<(gunzip -c $genome_fasta)" + } + decoys='-d decoys.txt' + fasta='gentrome.fa' + } + if (transcript_fasta.endsWith('.gz')) { + transcript_fasta = "<(gunzip -c $transcript_fasta)" } """ - $get_decoy_ids - sed -i.bak -e 's/>//g' decoys.txt - cat $transcript_fasta $genome_fasta > $gentrome + if [ -n '$genome_fasta' ]; then + grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 | sed 's/>//g' > decoys.txt + cat $transcript_fasta $genome_fasta > $fasta + fi salmon \\ index \\ --threads $task.cpus \\ - -t $gentrome \\ - -d decoys.txt \\ + -t $fasta \\ + $decoys \\ $args \\ -i salmon diff --git a/modules/nf-core/salmon/index/tests/main.nf.test b/modules/nf-core/salmon/index/tests/main.nf.test index 0caf30d3e..30b7359b9 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test +++ b/modules/nf-core/salmon/index/tests/main.nf.test @@ -3,6 +3,10 @@ nextflow_process { name "Test Process SALMON_INDEX" script "../main.nf" process "SALMON_INDEX" + tag "modules" + tag "modules_nfcore" + tag "salmon" + tag "salmon/index" test("sarscov2") { @@ -22,13 +26,43 @@ nextflow_process { assertAll( { assert process.success }, { assert path(process.out.index.get(0)).exists() }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot( + file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions + ).match()} ) } } - test("sarscov2 stub") { + test("sarscov2 transcriptome only") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([]) + input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.index.get(0)).exists() }, + { assert snapshot( + file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions + ).match()} + ) + } + + } + + test("sarscov2 stub") { options "-stub" when { params { @@ -46,7 +80,10 @@ nextflow_process { assertAll( { assert process.success }, { assert path(process.out.index.get(0)).exists() }, - { assert snapshot(process.out.versions).match("versions stub") } + { assert snapshot( + file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(), + process.out.versions + ).match()} ) } diff --git a/modules/nf-core/salmon/index/tests/main.nf.test.snap b/modules/nf-core/salmon/index/tests/main.nf.test.snap index e5899b511..f8ed44d7d 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test.snap +++ b/modules/nf-core/salmon/index/tests/main.nf.test.snap @@ -1,26 +1,41 @@ { - "versions": { + "sarscov2 stub": { "content": [ + "[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]", [ "versions.yml:md5,85337fa0a286ea35073ee5260974e307" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.2" }, - "timestamp": "2024-10-18T10:00:47.087293189" + "timestamp": "2025-01-20T12:57:51.498323" }, - "versions stub": { + "sarscov2": { "content": [ + "[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]", [ "versions.yml:md5,85337fa0a286ea35073ee5260974e307" ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.2" }, - "timestamp": "2024-10-18T10:01:03.89824494" + "timestamp": "2025-01-20T12:57:33.474302" + }, + "sarscov2 transcriptome only": { + "content": [ + "[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]", + [ + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2025-01-20T12:57:42.420247" } } \ No newline at end of file diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index c1b8b4c66..f4c82778e 100644 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -36,16 +36,17 @@ include { GTF_FILTER } from '../../../modules/local/gt include { STAR_GENOMEGENERATE_IGENOMES } from '../../../modules/local/star_genomegenerate_igenomes' workflow PREPARE_GENOME { + take: - fasta // file: /path/to/genome.fasta - gtf // file: /path/to/genome.gtf - gff // file: /path/to/genome.gff - additional_fasta // file: /path/to/additional.fasta - transcript_fasta // file: /path/to/transcript.fasta - gene_bed // file: /path/to/gene.bed - splicesites // file: /path/to/splicesites.txt - bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt - sortmerna_fasta_list // file: /path/to/sortmerna_fasta_list.txt + fasta // file: /path/to/genome.fasta (optional!) + gtf // file: /path/to/genome.gtf + gff // file: /path/to/genome.gff + additional_fasta // file: /path/to/additional.fasta + transcript_fasta // file: /path/to/transcript.fasta + gene_bed // file: /path/to/gene.bed + splicesites // file: /path/to/splicesites.txt + bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt + sortmerna_fasta_list // file: /path/to/sortmerna_fasta_list.txt star_index // directory: /path/to/star/index/ rsem_index // directory: /path/to/rsem/index/ salmon_index // directory: /path/to/salmon/index/ @@ -53,285 +54,314 @@ workflow PREPARE_GENOME { hisat2_index // directory: /path/to/hisat2/index/ bbsplit_index // directory: /path/to/bbsplit/index/ sortmerna_index // directory: /path/to/sortmerna/index/ - gencode // boolean: whether the genome is from GENCODE - featurecounts_group_type // string: The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts - aligner // string: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2' - pseudo_aligner // string: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner' - skip_gtf_filter // boolean: Skip filtering of GTF for valid scaffolds and/ or transcript IDs - skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads - skip_sortmerna // boolean: Skip sortmerna for removal of reads mapping to sequences in sortmerna_fasta_list - skip_alignment // boolean: Skip all of the alignment-based processes within the pipeline - skip_pseudo_alignment // boolean: Skip all of the pseudoalignment-based processes within the pipeline + gencode // boolean: whether the genome is from GENCODE + featurecounts_group_type // string: The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts + aligner // string: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2' + pseudo_aligner // string: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner' + skip_gtf_filter // boolean: Skip filtering of GTF for valid scaffolds and/ or transcript IDs + skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads + skip_sortmerna // boolean: Skip sortmerna for removal of reads mapping to sequences in sortmerna_fasta_list + skip_alignment // boolean: Skip all of the alignment-based processes within the pipeline + skip_pseudo_alignment // boolean: Skip all of the pseudoalignment-based processes within the pipeline main: + // Versions collector ch_versions = Channel.empty() - // - // Uncompress genome fasta file if required - // - if (fasta.endsWith('.gz')) { - ch_fasta = GUNZIP_FASTA ( [ [:], file(fasta, checkIfExists: true) ] ).gunzip.map { it[1] } - ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) - } else { - ch_fasta = Channel.value(file(fasta, checkIfExists: true)) + //--------------------------- + // 1) Uncompress GTF or GFF -> GTF + //--------------------------- + ch_gtf = Channel.empty() + if (gtf) { + if (gtf.endsWith('.gz')) { + ch_gtf = GUNZIP_GTF ([ [:], file(gtf, checkIfExists: true) ]).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) + } else { + ch_gtf = Channel.value(file(gtf, checkIfExists: true)) + } + } else if (gff) { + def ch_gff + if (gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ([ [:], file(gff, checkIfExists: true) ]).gunzip + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) + } else { + ch_gff = Channel.value(file(gff, checkIfExists: true)).map { [ [:], it ] } + } + ch_gtf = GFFREAD(ch_gff, []).gtf.map { it[1] } + ch_versions = ch_versions.mix(GFFREAD.out.versions) } - // - // Uncompress GTF annotation file or create from GFF3 if required - // - if (gtf || gff) { - if (gtf) { - if (gtf.endsWith('.gz')) { - ch_gtf = GUNZIP_GTF ( [ [:], file(gtf, checkIfExists: true) ] ).gunzip.map { it[1] } - ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) - } else { - ch_gtf = Channel.value(file(gtf, checkIfExists: true)) - } - } else if (gff) { - if (gff.endsWith('.gz')) { - ch_gff = GUNZIP_GFF ( [ [:], file(gff, checkIfExists: true) ] ).gunzip - ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) - } else { - ch_gff = Channel.value(file(gff, checkIfExists: true)).map { [ [:], it ] } - } - ch_gtf = GFFREAD ( ch_gff, [] ).gtf.map { it[1] } - ch_versions = ch_versions.mix(GFFREAD.out.versions) - } + //------------------------------------- + // 2) Check if we actually have a FASTA + //------------------------------------- + def fasta_provided = (fasta ? true : false) - // Determine whether to filter the GTF or not - def filter_gtf = - (( - // Condition 1: Alignment is required and aligner is set - !skip_alignment && aligner - ) || - ( - // Condition 2: Pseudoalignment is required and pseudoaligner is set - !skip_pseudo_alignment && pseudo_aligner - ) || - ( - // Condition 3: Transcript FASTA file is not provided - !transcript_fasta - )) && - ( - // Condition 4: --skip_gtf_filter is not provided - !skip_gtf_filter - ) - if (filter_gtf) { - GTF_FILTER ( ch_fasta, ch_gtf ) - ch_gtf = GTF_FILTER.out.genome_gtf - ch_versions = ch_versions.mix(GTF_FILTER.out.versions) + ch_fasta = Channel.of([]) + if (fasta_provided) { + // Uncompress FASTA if needed + if (fasta.endsWith('.gz')) { + ch_fasta = GUNZIP_FASTA ([ [:], file(fasta, checkIfExists: true) ]).gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) + } else { + ch_fasta = Channel.value(file(fasta, checkIfExists: true)) } } - // - // Uncompress additional fasta file and concatenate with reference fasta and gtf files - // - def biotype = gencode ? "gene_type" : featurecounts_group_type - if (additional_fasta) { + //---------------------------------------- + // 3) Filter GTF if needed & FASTA present + //---------------------------------------- + def filter_gtf_needed = ( + (!skip_alignment && aligner) || + (!skip_pseudo_alignment && pseudo_aligner) || + (!transcript_fasta) + ) && !skip_gtf_filter + + if (filter_gtf_needed) { + GTF_FILTER(ch_fasta, ch_gtf) + ch_gtf = GTF_FILTER.out.genome_gtf.first() + ch_versions = ch_versions.mix(GTF_FILTER.out.versions) + } + + //--------------------------------------------------- + // 4) Concatenate additional FASTA (if both are given) + //--------------------------------------------------- + ch_add_fasta = Channel.empty() + if (fasta_provided && additional_fasta) { if (additional_fasta.endsWith('.gz')) { - ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], file(additional_fasta, checkIfExists: true) ] ).gunzip.map { it[1] } + ch_add_fasta = GUNZIP_ADDITIONAL_FASTA([ [:], file(additional_fasta, checkIfExists: true) ]).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_ADDITIONAL_FASTA.out.versions) } else { ch_add_fasta = Channel.value(file(additional_fasta, checkIfExists: true)) } - CUSTOM_CATADDITIONALFASTA ( + CUSTOM_CATADDITIONALFASTA( ch_fasta.combine(ch_gtf).map { fasta, gtf -> [ [:], fasta, gtf ] }, ch_add_fasta.map { [ [:], it ] }, - biotype + gencode ? "gene_type" : featurecounts_group_type ) ch_fasta = CUSTOM_CATADDITIONALFASTA.out.fasta.map { it[1] }.first() ch_gtf = CUSTOM_CATADDITIONALFASTA.out.gtf.map { it[1] }.first() ch_versions = ch_versions.mix(CUSTOM_CATADDITIONALFASTA.out.versions) } - // - // Uncompress gene BED annotation file or create from GTF if required - // + //------------------------------------------------------ + // 5) Uncompress gene BED or create from GTF if not given + //------------------------------------------------------ + ch_gene_bed = Channel.empty() if (gene_bed) { if (gene_bed.endsWith('.gz')) { - ch_gene_bed = GUNZIP_GENE_BED ( [ [:], file(gene_bed, checkIfExists: true) ] ).gunzip.map { it[1] } + ch_gene_bed = GUNZIP_GENE_BED ([ [:], file(gene_bed, checkIfExists: true) ]).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) } else { ch_gene_bed = Channel.value(file(gene_bed, checkIfExists: true)) } } else { - ch_gene_bed = GTF2BED ( ch_gtf ).bed + ch_gene_bed = GTF2BED(ch_gtf).bed ch_versions = ch_versions.mix(GTF2BED.out.versions) } - // - // Uncompress transcript fasta file / create if required - // + //---------------------------------------------------------------------- + // 6) Transcript FASTA: + // - If provided, decompress (optionally preprocess if GENCODE) + // - If not provided but have genome+GTF, create from them + //---------------------------------------------------------------------- + ch_transcript_fasta = Channel.empty() if (transcript_fasta) { + // Use user-provided transcript FASTA if (transcript_fasta.endsWith('.gz')) { - ch_transcript_fasta = GUNZIP_TRANSCRIPT_FASTA ( [ [:], file(transcript_fasta, checkIfExists: true) ] ).gunzip.map { it[1] } + ch_transcript_fasta = GUNZIP_TRANSCRIPT_FASTA ([ [:], file(transcript_fasta, checkIfExists: true) ]).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_TRANSCRIPT_FASTA.out.versions) } else { ch_transcript_fasta = Channel.value(file(transcript_fasta, checkIfExists: true)) } if (gencode) { - PREPROCESS_TRANSCRIPTS_FASTA_GENCODE ( ch_transcript_fasta ) + PREPROCESS_TRANSCRIPTS_FASTA_GENCODE(ch_transcript_fasta) ch_transcript_fasta = PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.fasta ch_versions = ch_versions.mix(PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.versions) } - } else { - ch_transcript_fasta = MAKE_TRANSCRIPTS_FASTA ( ch_fasta, ch_gtf ).transcript_fasta + } else if (fasta_provided) { + // Build transcripts from genome if we have it + ch_transcript_fasta = MAKE_TRANSCRIPTS_FASTA(ch_fasta, ch_gtf).transcript_fasta ch_versions = ch_versions.mix(MAKE_TRANSCRIPTS_FASTA.out.versions) } - // - // Create chromosome sizes file - // - CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } ) - ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } - ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } - ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + //------------------------------------------------------- + // 7) FAI / chrom.sizes only if we actually have a genome + //------------------------------------------------------- + ch_fai = Channel.empty() + ch_chrom_sizes = Channel.empty() + if (fasta_provided) { + CUSTOM_GETCHROMSIZES(ch_fasta.map { [ [:], it ] }) + ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } + ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + } - // - // Get list of indices that need to be created - // + //------------------------------------------------ + // 8) Determine which indices we actually want built + //------------------------------------------------ def prepare_tool_indices = [] - if (!skip_bbsplit) { prepare_tool_indices << 'bbsplit' } - if (!skip_sortmerna) { prepare_tool_indices << 'sortmerna' } - if (!skip_alignment) { prepare_tool_indices << aligner } - if (!skip_pseudo_alignment && pseudo_aligner) { prepare_tool_indices << pseudo_aligner } + if (!skip_bbsplit) { prepare_tool_indices << 'bbsplit' } + if (!skip_sortmerna) { prepare_tool_indices << 'sortmerna' } + if (!skip_alignment && aligner) { prepare_tool_indices << aligner } + if (!skip_pseudo_alignment && pseudo_aligner) { prepare_tool_indices << pseudo_aligner } - // - // Uncompress BBSplit index or generate from scratch if required - // + //--------------------------------------------------------- + // 9) BBSplit index: uses FASTA only if we generate from scratch + //--------------------------------------------------------- ch_bbsplit_index = Channel.empty() if ('bbsplit' in prepare_tool_indices) { if (bbsplit_index) { + // Use user-provided bbsplit index if (bbsplit_index.endsWith('.tar.gz')) { - ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ( [ [:], bbsplit_index ] ).untar.map { it[1] } + ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ([ [:], file(bbsplit_index, checkIfExists: true) ]).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_BBSPLIT_INDEX.out.versions) } else { - ch_bbsplit_index = Channel.value(file(bbsplit_index)) + ch_bbsplit_index = Channel.value(file(bbsplit_index, checkIfExists: true)) } - } else { + } + else if (fasta_provided) { + // Build it from scratch if we have FASTA Channel - .from(file(bbsplit_fasta_list)) + .from(file(bbsplit_fasta_list, checkIfExists: true)) .splitCsv() // Read in 2 column csv file: short_name,path_to_fasta - .flatMap { id, fasta -> [ [ 'id', id ], [ 'fasta', file(fasta, checkIfExists: true) ] ] } // Flatten entries to be able to groupTuple by a common key + .flatMap { id, fafile -> [ [ 'id', id ], [ 'fasta', file(fafile, checkIfExists: true) ] ] } // Flatten entries to be able to groupTuple by a common key .groupTuple() .map { it -> it[1] } // Get rid of keys and keep grouped values .collect { [ it ] } // Collect entries as a list to pass as "tuple val(short_names), path(path_to_fasta)" to module .set { ch_bbsplit_fasta_list } - ch_bbsplit_index = BBMAP_BBSPLIT ( [ [:], [] ], [], ch_fasta, ch_bbsplit_fasta_list, true ).index - ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions) + ch_bbsplit_index = BBMAP_BBSPLIT( + [ [:], [] ], + [], + ch_fasta, + ch_bbsplit_fasta_list, + true + ).index + ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions) } + // else: no FASTA and no user-provided index -> remains empty } - // - // Uncompress sortmerna index or generate from scratch if required - // + //------------------------------------------------------------- + // 10) SortMeRNA index does not require the genome FASTA at all + //------------------------------------------------------------- ch_sortmerna_index = Channel.empty() - ch_rrna_fastas = Channel.empty() - + ch_rrna_fastas = Channel.empty() if ('sortmerna' in prepare_tool_indices) { - ribo_db = file(sortmerna_fasta_list) - - // SortMeRNA needs the rRNAs even if we're providing the index + // We always need the rRNA FASTAs + def ribo_db = file(sortmerna_fasta_list) ch_rrna_fastas = Channel.from(ribo_db.readLines()) - .map { row -> file(row, checkIfExists: true) } + .map { row -> file(row) } if (sortmerna_index) { if (sortmerna_index.endsWith('.tar.gz')) { - ch_sortmerna_index = UNTAR_SORTMERNA_INDEX ( [ [:], sortmerna_index ] ).untar.map { it[1] } - ch_versions = ch_versions.mix(UNTAR_SORTMERNA_INDEX.out.versions) + ch_sortmerna_index = UNTAR_SORTMERNA_INDEX ([ [:], file(sortmerna_index, checkIfExists: true) ]).untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_SORTMERNA_INDEX.out.versions) } else { - ch_sortmerna_index = Channel.value([[:], file(sortmerna_index)]) + ch_sortmerna_index = Channel.value([ [:], file(sortmerna_index, checkIfExists: true) ]) } } else { - - SORTMERNA_INDEX ( - Channel.of([ [],[] ]), + // Build new SortMeRNA index from the rRNA references + SORTMERNA_INDEX( + Channel.of([ [], [] ]), ch_rrna_fastas.collect().map { [ 'rrna_refs', it ] }, - Channel.of([ [],[] ]) + Channel.of([ [], [] ]) ) ch_sortmerna_index = SORTMERNA_INDEX.out.index.first() - ch_versions = ch_versions.mix(SORTMERNA_INDEX.out.versions) + ch_versions = ch_versions.mix(SORTMERNA_INDEX.out.versions) } } - // - // Uncompress STAR index or generate from scratch if required - // + //---------------------------------------------------- + // 11) STAR index (e.g. for 'star_salmon') -> needs FASTA if built + //---------------------------------------------------- ch_star_index = Channel.empty() if ('star_salmon' in prepare_tool_indices) { if (star_index) { if (star_index.endsWith('.tar.gz')) { - ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map { it[1] } + ch_star_index = UNTAR_STAR_INDEX ([ [:], file(star_index, checkIfExists: true) ]).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) } else { - ch_star_index = Channel.value(file(star_index)) + ch_star_index = Channel.value(file(star_index, checkIfExists: true)) } - } else { - // Check if an AWS iGenome has been provided to use the appropriate version of STAR + } + else if (fasta_provided) { + // Build new STAR index + // Possibly check AWS iGenome conditions def is_aws_igenome = false - if (fasta && gtf) { - if ((file(fasta).getName() - '.gz' == 'genome.fa') && (file(gtf).getName() - '.gz' == 'genes.gtf')) { - is_aws_igenome = true - } + if (file(fasta, checkIfExists: true).getName() - '.gz' == 'genome.fa' && file(gtf, checkIfExists: true).getName() - '.gz' == 'genes.gtf') { + is_aws_igenome = true } if (is_aws_igenome) { - ch_star_index = STAR_GENOMEGENERATE_IGENOMES ( ch_fasta, ch_gtf ).index + ch_star_index = STAR_GENOMEGENERATE_IGENOMES(ch_fasta, ch_gtf).index ch_versions = ch_versions.mix(STAR_GENOMEGENERATE_IGENOMES.out.versions) } else { - ch_star_index = STAR_GENOMEGENERATE ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] } ).index.map { it[1] } + ch_star_index = STAR_GENOMEGENERATE( + ch_fasta.map { [ [:], it ] }, + ch_gtf.map { [ [:], it ] } + ).index.map { it[1] } ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) } } } - // - // Uncompress RSEM index or generate from scratch if required - // + //------------------------------------------------ + // 12) RSEM index -> needs FASTA & GTF if built + //------------------------------------------------ ch_rsem_index = Channel.empty() if ('star_rsem' in prepare_tool_indices) { if (rsem_index) { if (rsem_index.endsWith('.tar.gz')) { - ch_rsem_index = UNTAR_RSEM_INDEX ( [ [:], rsem_index ] ).untar.map { it[1] } + ch_rsem_index = UNTAR_RSEM_INDEX ([ [:], file(rsem_index, checkIfExists: true) ]).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_RSEM_INDEX.out.versions) } else { - ch_rsem_index = Channel.value(file(rsem_index)) + ch_rsem_index = Channel.value(file(rsem_index, checkIfExists: true)) } - } else { - ch_rsem_index = RSEM_PREPAREREFERENCE_GENOME ( ch_fasta, ch_gtf ).index + } + else if (fasta_provided) { + ch_rsem_index = RSEM_PREPAREREFERENCE_GENOME(ch_fasta, ch_gtf).index ch_versions = ch_versions.mix(RSEM_PREPAREREFERENCE_GENOME.out.versions) } } - // - // Uncompress HISAT2 index or generate from scratch if required - // + //--------------------------------------------------------- + // 13) HISAT2 index -> needs FASTA & GTF if built + //--------------------------------------------------------- ch_splicesites = Channel.empty() ch_hisat2_index = Channel.empty() if ('hisat2' in prepare_tool_indices) { - if (!splicesites) { - ch_splicesites = HISAT2_EXTRACTSPLICESITES ( ch_gtf.map { [ [:], it ] } ).txt.map { it[1] } + // splicesites + if (splicesites) { + ch_splicesites = Channel.value(file(splicesites, checkIfExists: true)) + } + else if (fasta_provided) { + ch_splicesites = HISAT2_EXTRACTSPLICESITES(ch_gtf.map { [ [:], it ] }).txt.map { it[1] } ch_versions = ch_versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions) - } else { - ch_splicesites = Channel.value(file(splicesites)) } + // the index if (hisat2_index) { if (hisat2_index.endsWith('.tar.gz')) { - ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], hisat2_index ] ).untar.map { it[1] } + ch_hisat2_index = UNTAR_HISAT2_INDEX ([ [:], file(hisat2_index, checkIfExists: true) ]).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_HISAT2_INDEX.out.versions) } else { - ch_hisat2_index = Channel.value(file(hisat2_index)) + ch_hisat2_index = Channel.value(file(hisat2_index, checkIfExists: true)) } - } else { - ch_hisat2_index = HISAT2_BUILD ( ch_fasta.map { [ [:], it ] }, ch_gtf.map { [ [:], it ] }, ch_splicesites.map { [ [:], it ] } ).index.map { it[1] } - ch_versions = ch_versions.mix(HISAT2_BUILD.out.versions) + } + else if (fasta_provided) { + ch_hisat2_index = HISAT2_BUILD( + ch_fasta.map { [ [:], it ] }, + ch_gtf.map { [ [:], it ] }, + ch_splicesites.map { [ [:], it ] } + ).index.map { it[1] } + ch_versions = ch_versions.mix(HISAT2_BUILD.out.versions) } } - // - // Uncompress Salmon index or generate from scratch if required - // + //------------------------------------------------------ + // 14) Salmon index -> can skip genome if transcript_fasta is enough + //------------------------------------------------------ + ch_salmon_index = Channel.empty() if (salmon_index) { if (salmon_index.endsWith('.tar.gz')) { @@ -340,16 +370,22 @@ workflow PREPARE_GENOME { } else { ch_salmon_index = Channel.value(file(salmon_index)) } - } else { - if ('salmon' in prepare_tool_indices) { - ch_salmon_index = SALMON_INDEX ( ch_fasta, ch_transcript_fasta ).index + } else if ('salmon' in prepare_tool_indices) { + if (ch_transcript_fasta && fasta_provided) { + // build from transcript FASTA + genome FASTA + ch_salmon_index = SALMON_INDEX(ch_fasta, ch_transcript_fasta).index + ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) + } + else if (ch_transcript_fasta) { + // some Salmon module can run with just a transcript FASTA + ch_salmon_index = SALMON_INDEX([], ch_transcript_fasta).index ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) } } - // - // Uncompress Kallisto index or generate from scratch if required - // + //-------------------------------------------------- + // 15) Kallisto index -> only needs transcript FASTA + //-------------------------------------------------- ch_kallisto_index = Channel.empty() if (kallisto_index) { if (kallisto_index.endsWith('.tar.gz')) { @@ -365,6 +401,9 @@ workflow PREPARE_GENOME { } } + //------------------ + // 16) Emit channels + //------------------ emit: fasta = ch_fasta // channel: path(genome.fasta) gtf = ch_gtf // channel: path(genome.gtf) diff --git a/subworkflows/local/prepare_genome/tests/main.nf.test.snap b/subworkflows/local/prepare_genome/tests/main.nf.test.snap index f5c0a10f7..a53a9ba4f 100644 --- a/subworkflows/local/prepare_genome/tests/main.nf.test.snap +++ b/subworkflows/local/prepare_genome/tests/main.nf.test.snap @@ -48,10 +48,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:42:50.037244182" + "timestamp": "2025-01-22T16:31:30.831489059" }, "skip_pseudo_alignment - stub": { "content": [ @@ -103,10 +103,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:36:51.108422744" + "timestamp": "2025-01-22T16:26:39.589338794" }, "skip_gtf_filter": { "content": [ @@ -157,10 +157,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:21:35.428370538" + "timestamp": "2025-01-22T16:17:41.356308487" }, "gencode = false - stub": { "content": [ @@ -212,10 +212,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:34:45.22576799" + "timestamp": "2025-01-22T16:25:11.750062191" }, "gff = false - stub": { "content": [ @@ -267,10 +267,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:37:50.119432994" + "timestamp": "2025-01-22T16:27:27.479980116" }, "skip_pseudoalignment = true - stub": { "content": [ @@ -322,10 +322,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:44:30.027925166" + "timestamp": "2025-01-22T16:32:43.04638782" }, "featurecounts_group_type = 'gene_type' - stub": { "content": [ @@ -377,10 +377,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:42:09.256212794" + "timestamp": "2025-01-22T16:31:08.944159607" }, "gtf = false": { "content": [ @@ -434,10 +434,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:24:03.080886311" + "timestamp": "2025-01-22T16:19:16.226455835" }, "gfp = false": { "content": [ @@ -451,13 +451,13 @@ "/ngi-igenomes/testdata/nf-core/pipelines/rnaseq/3.15/reference/genome.fasta" ], [ - "genome.filtered.gtf:md5,ef6fccd153a21c329670462d602ed2d0" + "genes_with_empty_tid.filtered.gtf:md5,ef6fccd153a21c329670462d602ed2d0" ], [ "genome.fasta.fai:md5,2cd76d936cbfa386b14154506c2041b2" ], [ - "genome.filtered.bed:md5,e507dc33673e76c32abe344f4dc07952" + "genes_with_empty_tid.filtered.bed:md5,e507dc33673e76c32abe344f4dc07952" ], [ "genome.fasta.sizes:md5,29218009212157c49dbc6596621ec780" @@ -488,10 +488,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:25:11.277955385" + "timestamp": "2025-01-22T16:20:01.366185938" }, "skip_bbsplit = true": { "content": [ @@ -542,10 +542,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:32:08.044528172" + "timestamp": "2025-01-22T16:23:43.324382545" }, "salmon_index = false - stub": { "content": [ @@ -597,10 +597,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:40:20.088184403" + "timestamp": "2025-01-22T16:29:42.989824541" }, "skip_alignment": { "content": [ @@ -652,10 +652,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:22:31.848885428" + "timestamp": "2025-01-22T16:18:25.928617257" }, "gfp = false - stub": { "content": [ @@ -706,10 +706,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:38:24.011200927" + "timestamp": "2025-01-22T16:27:50.906825801" }, "gencode = false": { "content": [ @@ -761,10 +761,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:21:09.927667083" + "timestamp": "2025-01-22T16:17:19.152939508" }, "default options": { "content": [ @@ -816,10 +816,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:20:44.91884847" + "timestamp": "2025-01-22T16:16:39.558002005" }, "gencode = true - stub": { "content": [ @@ -872,10 +872,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:41:38.130901233" + "timestamp": "2025-01-22T16:30:42.966501563" }, "skip_alignment - stub": { "content": [ @@ -927,10 +927,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:36:19.887113789" + "timestamp": "2025-01-22T16:26:16.37556928" }, "skip_bbsplit = true - stub": { "content": [ @@ -1040,10 +1040,10 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:43:15.317916936" + "timestamp": "2025-01-22T16:31:58.139538299" }, "transcriptome = false": { "content": [ @@ -1096,10 +1096,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:26:03.082922797" + "timestamp": "2025-01-22T16:20:43.257059452" }, "skip_pseudoalignment = true": { "content": [ @@ -1151,10 +1151,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:33:32.945973388" + "timestamp": "2025-01-22T16:24:28.57936905" }, "skip_gtf_filter - stub": { "content": [ @@ -1205,10 +1205,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:35:08.735012633" + "timestamp": "2025-01-22T16:25:32.773764178" }, "gencode = true": { "content": [ @@ -1261,10 +1261,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:29:54.92546344" + "timestamp": "2025-01-22T16:22:36.190529048" }, "hisat2_index = false": { "content": [ @@ -1316,10 +1316,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:29:08.52615029" + "timestamp": "2025-01-22T16:22:13.125436103" }, "rsem_index = false - stub": { "content": [ @@ -1371,10 +1371,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:39:53.29274644" + "timestamp": "2025-01-22T16:29:13.942291871" }, "featurecounts_group_type = 'gene_type'": { "content": [ @@ -1426,10 +1426,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:30:38.236090892" + "timestamp": "2025-01-22T16:22:58.703990578" }, "with bed - stub": { "content": [ @@ -1480,10 +1480,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:39:24.95243753" + "timestamp": "2025-01-22T16:28:44.518815375" }, "skip_pseudo_alignment": { "content": [ @@ -1535,10 +1535,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:23:12.222452353" + "timestamp": "2025-01-22T16:18:50.035264983" }, "skip_bbsplit": { "content": [ @@ -1589,10 +1589,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:22:04.417171396" + "timestamp": "2025-01-22T16:18:03.05536554" }, "with bed": { "content": [ @@ -1643,10 +1643,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:26:52.427274463" + "timestamp": "2025-01-22T16:21:05.592448003" }, "gtf = false - stub": { "content": [ @@ -1700,10 +1700,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:37:24.089579664" + "timestamp": "2025-01-22T16:27:04.167079535" }, "gff = false": { "content": [ @@ -1755,10 +1755,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:24:29.339763401" + "timestamp": "2025-01-22T16:19:39.929655668" }, "default options - stub": { "content": [ @@ -1810,10 +1810,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:34:12.631669075" + "timestamp": "2025-01-22T16:24:49.758178777" }, "salmon_index = false": { "content": [ @@ -1865,10 +1865,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:28:32.12627675" + "timestamp": "2025-01-22T16:21:50.632563783" }, "rsem_index = false": { "content": [ @@ -1920,10 +1920,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:27:44.114765753" + "timestamp": "2025-01-22T16:21:28.064555135" }, "skip_alignment = true - stub": { "content": [ @@ -1975,10 +1975,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:43:56.303595701" + "timestamp": "2025-01-22T16:32:20.147025523" }, "transcriptome = false - stub": { "content": [ @@ -2031,10 +2031,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:38:52.064137724" + "timestamp": "2025-01-22T16:28:14.178838335" }, "skip_alignment = true": { "content": [ @@ -2086,10 +2086,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:32:57.856332379" + "timestamp": "2025-01-22T16:24:06.255647938" }, "skip_gtf_filter = true": { "content": [ @@ -2140,10 +2140,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:31:25.686724069" + "timestamp": "2025-01-22T16:23:20.680526383" }, "hisat2_index = false - stub": { "content": [ @@ -2195,10 +2195,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:40:52.285127199" + "timestamp": "2025-01-22T16:30:12.605234376" }, "skip_bbsplit - stub": { "content": [ @@ -2308,9 +2308,9 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-10-21T14:35:46.545484011" + "timestamp": "2025-01-22T16:25:54.053589157" } } \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf index 5e8820ddc..03ced5d4a 100644 --- a/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_rnaseq_pipeline/main.nf @@ -180,8 +180,14 @@ def validateInputParameters() { genomeExistsError() - if (!params.fasta) { - error("Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file.") + if ( + !params.fasta && + ( + ! params.skip_alignment || // Alignment needs fasta + ! params.transcript_fasta // Dynamically making a transcript fasta needs the fasta + ) + ) { + error("Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file. You must supply a genome FASTA file or use --skip_alignment and provide your own transcript fasta using --transcript_fasta for use in quantification.") } if (!params.gtf && !params.gff) { diff --git a/tests/nofasta.nf.test b/tests/nofasta.nf.test new file mode 100644 index 000000000..41c5d9e94 --- /dev/null +++ b/tests/nofasta.nf.test @@ -0,0 +1,39 @@ +nextflow_pipeline { + + name "Test pipeline by omitting fasta input" + script "../main.nf" + + test("Params: no fasta") { + + when { + params { + outdir = "$outputDir" + skip_alignment = true + fasta = null + additional_fasta = null + salmon_index = null + transcript_fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/d1f59361a013a8820c824d606f5853db0d6c7999/reference/transcriptome_match_gtf.fa" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_rnaseq_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/nofasta.nf.test.snap b/tests/nofasta.nf.test.snap new file mode 100644 index 000000000..f3593cdb6 --- /dev/null +++ b/tests/nofasta.nf.test.snap @@ -0,0 +1,429 @@ +{ + "Params: no fasta": { + "content": [ + 41, + { + "CAT_FASTQ": { + "cat": 9.5 + }, + "CUSTOM_TX2GENE": { + "python": "3.10.4" + }, + "DESEQ2_QC_PSEUDO": { + "r-base": "4.0.3", + "bioconductor-deseq2": "1.28.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FQ_LINT": { + "fq": "0.12.0 (2024-07-08)" + }, + "FQ_SUBSAMPLE": { + "fq": "0.12.0 (2024-07-08)" + }, + "GTF2BED": { + "perl": "5.26.2" + }, + "GTF_FILTER": { + "python": "3.9.5" + }, + "GUNZIP_GTF": { + "gunzip": 1.1 + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SALMON_QUANT": { + "salmon": "1.10.3" + }, + "SE_GENE": { + "bioconductor-summarizedexperiment": "1.32.0" + }, + "TRIMGALORE": { + "trimgalore": "0.6.10", + "cutadapt": 4.9, + "pigz": 2.8 + }, + "TXIMETA_TXIMPORT": { + "bioconductor-tximeta": "1.20.1" + }, + "Workflow": { + "nf-core/rnaseq": "v3.19.0dev" + } + }, + [ + "fastqc", + "fastqc/raw", + "fastqc/raw/RAP1_IAA_30M_REP1_raw_1_fastqc.html", + "fastqc/raw/RAP1_IAA_30M_REP1_raw_1_fastqc.zip", + "fastqc/raw/RAP1_IAA_30M_REP1_raw_2_fastqc.html", + "fastqc/raw/RAP1_IAA_30M_REP1_raw_2_fastqc.zip", + "fastqc/raw/RAP1_UNINDUCED_REP1_raw_fastqc.html", + "fastqc/raw/RAP1_UNINDUCED_REP1_raw_fastqc.zip", + "fastqc/raw/RAP1_UNINDUCED_REP2_raw_fastqc.html", + "fastqc/raw/RAP1_UNINDUCED_REP2_raw_fastqc.zip", + "fastqc/raw/WT_REP1_raw_1_fastqc.html", + "fastqc/raw/WT_REP1_raw_1_fastqc.zip", + "fastqc/raw/WT_REP1_raw_2_fastqc.html", + "fastqc/raw/WT_REP1_raw_2_fastqc.zip", + "fastqc/raw/WT_REP2_raw_1_fastqc.html", + "fastqc/raw/WT_REP2_raw_1_fastqc.zip", + "fastqc/raw/WT_REP2_raw_2_fastqc.html", + "fastqc/raw/WT_REP2_raw_2_fastqc.zip", + "fastqc/trim", + "fastqc/trim/RAP1_IAA_30M_REP1_trimmed_1_val_1_fastqc.html", + "fastqc/trim/RAP1_IAA_30M_REP1_trimmed_1_val_1_fastqc.zip", + "fastqc/trim/RAP1_IAA_30M_REP1_trimmed_2_val_2_fastqc.html", + "fastqc/trim/RAP1_IAA_30M_REP1_trimmed_2_val_2_fastqc.zip", + "fastqc/trim/RAP1_UNINDUCED_REP1_trimmed_trimmed_fastqc.html", + "fastqc/trim/RAP1_UNINDUCED_REP1_trimmed_trimmed_fastqc.zip", + "fastqc/trim/RAP1_UNINDUCED_REP2_trimmed_trimmed_fastqc.html", + "fastqc/trim/RAP1_UNINDUCED_REP2_trimmed_trimmed_fastqc.zip", + "fastqc/trim/WT_REP1_trimmed_1_val_1_fastqc.html", + "fastqc/trim/WT_REP1_trimmed_1_val_1_fastqc.zip", + "fastqc/trim/WT_REP1_trimmed_2_val_2_fastqc.html", + "fastqc/trim/WT_REP1_trimmed_2_val_2_fastqc.zip", + "fastqc/trim/WT_REP2_trimmed_1_val_1_fastqc.html", + "fastqc/trim/WT_REP2_trimmed_1_val_1_fastqc.zip", + "fastqc/trim/WT_REP2_trimmed_2_val_2_fastqc.html", + "fastqc/trim/WT_REP2_trimmed_2_val_2_fastqc.zip", + "fq_lint", + "fq_lint/raw", + "fq_lint/raw/RAP1_IAA_30M_REP1.fq_lint.txt", + "fq_lint/raw/RAP1_UNINDUCED_REP1.fq_lint.txt", + "fq_lint/raw/RAP1_UNINDUCED_REP2.fq_lint.txt", + "fq_lint/raw/WT_REP1.fq_lint.txt", + "fq_lint/raw/WT_REP2.fq_lint.txt", + "fq_lint/trimmed", + "fq_lint/trimmed/RAP1_IAA_30M_REP1.fq_lint.txt", + "fq_lint/trimmed/RAP1_UNINDUCED_REP1.fq_lint.txt", + "fq_lint/trimmed/RAP1_UNINDUCED_REP2.fq_lint.txt", + "fq_lint/trimmed/WT_REP1.fq_lint.txt", + "fq_lint/trimmed/WT_REP2.fq_lint.txt", + "multiqc", + "multiqc/multiqc_report.html", + "multiqc/multiqc_report_data", + "multiqc/multiqc_report_data/cutadapt_filtered_reads_plot.txt", + "multiqc/multiqc_report_data/cutadapt_trimmed_sequences_plot_3_Counts.txt", + "multiqc/multiqc_report_data/cutadapt_trimmed_sequences_plot_3_Obs_Exp.txt", + "multiqc/multiqc_report_data/fastqc_raw-status-check-heatmap.txt", + "multiqc/multiqc_report_data/fastqc_raw_adapter_content_plot.txt", + "multiqc/multiqc_report_data/fastqc_raw_overrepresented_sequences_plot.txt", + "multiqc/multiqc_report_data/fastqc_raw_per_base_n_content_plot.txt", + "multiqc/multiqc_report_data/fastqc_raw_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_report_data/fastqc_raw_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_report_data/fastqc_raw_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_report_data/fastqc_raw_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_report_data/fastqc_raw_sequence_counts_plot.txt", + "multiqc/multiqc_report_data/fastqc_raw_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_report_data/fastqc_raw_top_overrepresented_sequences_table.txt", + "multiqc/multiqc_report_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_report_data/fastqc_trimmed-status-check-heatmap.txt", + "multiqc/multiqc_report_data/fastqc_trimmed_overrepresented_sequences_plot.txt", + "multiqc/multiqc_report_data/fastqc_trimmed_per_base_n_content_plot.txt", + "multiqc/multiqc_report_data/fastqc_trimmed_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_report_data/fastqc_trimmed_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_report_data/fastqc_trimmed_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_report_data/fastqc_trimmed_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_report_data/fastqc_trimmed_sequence_counts_plot.txt", + "multiqc/multiqc_report_data/fastqc_trimmed_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_report_data/fastqc_trimmed_top_overrepresented_sequences_table.txt", + "multiqc/multiqc_report_data/multiqc.log", + "multiqc/multiqc_report_data/multiqc_citations.txt", + "multiqc/multiqc_report_data/multiqc_cutadapt.txt", + "multiqc/multiqc_report_data/multiqc_data.json", + "multiqc/multiqc_report_data/multiqc_fastqc_fastqc_raw.txt", + "multiqc/multiqc_report_data/multiqc_fastqc_fastqc_trimmed.txt", + "multiqc/multiqc_report_data/multiqc_general_stats.txt", + "multiqc/multiqc_report_data/multiqc_salmon.txt", + "multiqc/multiqc_report_data/multiqc_sample-relationships.txt", + "multiqc/multiqc_report_data/multiqc_sample-relationships_1.txt", + "multiqc/multiqc_report_data/multiqc_software_versions.txt", + "multiqc/multiqc_report_data/multiqc_sources.txt", + "multiqc/multiqc_report_data/salmon_plot.txt", + "multiqc/multiqc_report_plots", + "multiqc/multiqc_report_plots/pdf", + "multiqc/multiqc_report_plots/pdf/cutadapt_filtered_reads_plot-cnt.pdf", + "multiqc/multiqc_report_plots/pdf/cutadapt_filtered_reads_plot-pct.pdf", + "multiqc/multiqc_report_plots/pdf/cutadapt_trimmed_sequences_plot_3_Counts.pdf", + "multiqc/multiqc_report_plots/pdf/cutadapt_trimmed_sequences_plot_3_Obs_Exp.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw-status-check-heatmap.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_adapter_content_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_overrepresented_sequences_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_per_base_n_content_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_raw_top_overrepresented_sequences_table.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed-status-check-heatmap.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed_overrepresented_sequences_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed_per_base_n_content_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_report_plots/pdf/fastqc_trimmed_top_overrepresented_sequences_table.pdf", + "multiqc/multiqc_report_plots/pdf/general_stats_table.pdf", + "multiqc/multiqc_report_plots/pdf/salmon_plot.pdf", + "multiqc/multiqc_report_plots/pdf/sample-relationships.pdf", + "multiqc/multiqc_report_plots/png", + "multiqc/multiqc_report_plots/png/cutadapt_filtered_reads_plot-cnt.png", + "multiqc/multiqc_report_plots/png/cutadapt_filtered_reads_plot-pct.png", + "multiqc/multiqc_report_plots/png/cutadapt_trimmed_sequences_plot_3_Counts.png", + "multiqc/multiqc_report_plots/png/cutadapt_trimmed_sequences_plot_3_Obs_Exp.png", + "multiqc/multiqc_report_plots/png/fastqc_raw-status-check-heatmap.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_adapter_content_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_overrepresented_sequences_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_per_base_n_content_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_per_base_sequence_quality_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_sequence_counts_plot-cnt.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_sequence_counts_plot-pct.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_sequence_duplication_levels_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_raw_top_overrepresented_sequences_table.png", + "multiqc/multiqc_report_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed-status-check-heatmap.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed_overrepresented_sequences_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed_per_base_n_content_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed_per_base_sequence_quality_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed_sequence_counts_plot-cnt.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed_sequence_counts_plot-pct.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed_sequence_duplication_levels_plot.png", + "multiqc/multiqc_report_plots/png/fastqc_trimmed_top_overrepresented_sequences_table.png", + "multiqc/multiqc_report_plots/png/general_stats_table.png", + "multiqc/multiqc_report_plots/png/salmon_plot.png", + "multiqc/multiqc_report_plots/png/sample-relationships.png", + "multiqc/multiqc_report_plots/svg", + "multiqc/multiqc_report_plots/svg/cutadapt_filtered_reads_plot-cnt.svg", + "multiqc/multiqc_report_plots/svg/cutadapt_filtered_reads_plot-pct.svg", + "multiqc/multiqc_report_plots/svg/cutadapt_trimmed_sequences_plot_3_Counts.svg", + "multiqc/multiqc_report_plots/svg/cutadapt_trimmed_sequences_plot_3_Obs_Exp.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw-status-check-heatmap.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_adapter_content_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_overrepresented_sequences_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_per_base_n_content_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_sequence_counts_plot-pct.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_raw_top_overrepresented_sequences_table.svg", + "multiqc/multiqc_report_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed-status-check-heatmap.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed_overrepresented_sequences_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed_per_base_n_content_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed_sequence_counts_plot-pct.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_report_plots/svg/fastqc_trimmed_top_overrepresented_sequences_table.svg", + "multiqc/multiqc_report_plots/svg/general_stats_table.svg", + "multiqc/multiqc_report_plots/svg/salmon_plot.svg", + "multiqc/multiqc_report_plots/svg/sample-relationships.svg", + "pipeline_info", + "pipeline_info/nf_core_rnaseq_software_mqc_versions.yml", + "salmon", + "salmon/RAP1_IAA_30M_REP1", + "salmon/RAP1_IAA_30M_REP1/aux_info", + "salmon/RAP1_IAA_30M_REP1/aux_info/ambig_info.tsv", + "salmon/RAP1_IAA_30M_REP1/aux_info/expected_bias.gz", + "salmon/RAP1_IAA_30M_REP1/aux_info/fld.gz", + "salmon/RAP1_IAA_30M_REP1/aux_info/meta_info.json", + "salmon/RAP1_IAA_30M_REP1/aux_info/observed_bias.gz", + "salmon/RAP1_IAA_30M_REP1/aux_info/observed_bias_3p.gz", + "salmon/RAP1_IAA_30M_REP1/cmd_info.json", + "salmon/RAP1_IAA_30M_REP1/libParams", + "salmon/RAP1_IAA_30M_REP1/libParams/flenDist.txt", + "salmon/RAP1_IAA_30M_REP1/lib_format_counts.json", + "salmon/RAP1_IAA_30M_REP1/logs", + "salmon/RAP1_IAA_30M_REP1/logs/salmon_quant.log", + "salmon/RAP1_IAA_30M_REP1/quant.genes.sf", + "salmon/RAP1_IAA_30M_REP1/quant.sf", + "salmon/RAP1_UNINDUCED_REP1", + "salmon/RAP1_UNINDUCED_REP1/aux_info", + "salmon/RAP1_UNINDUCED_REP1/aux_info/ambig_info.tsv", + "salmon/RAP1_UNINDUCED_REP1/aux_info/expected_bias.gz", + "salmon/RAP1_UNINDUCED_REP1/aux_info/fld.gz", + "salmon/RAP1_UNINDUCED_REP1/aux_info/meta_info.json", + "salmon/RAP1_UNINDUCED_REP1/aux_info/observed_bias.gz", + "salmon/RAP1_UNINDUCED_REP1/aux_info/observed_bias_3p.gz", + "salmon/RAP1_UNINDUCED_REP1/cmd_info.json", + "salmon/RAP1_UNINDUCED_REP1/libParams", + "salmon/RAP1_UNINDUCED_REP1/libParams/flenDist.txt", + "salmon/RAP1_UNINDUCED_REP1/lib_format_counts.json", + "salmon/RAP1_UNINDUCED_REP1/logs", + "salmon/RAP1_UNINDUCED_REP1/logs/salmon_quant.log", + "salmon/RAP1_UNINDUCED_REP1/quant.genes.sf", + "salmon/RAP1_UNINDUCED_REP1/quant.sf", + "salmon/RAP1_UNINDUCED_REP2", + "salmon/RAP1_UNINDUCED_REP2/aux_info", + "salmon/RAP1_UNINDUCED_REP2/aux_info/ambig_info.tsv", + "salmon/RAP1_UNINDUCED_REP2/aux_info/expected_bias.gz", + "salmon/RAP1_UNINDUCED_REP2/aux_info/fld.gz", + "salmon/RAP1_UNINDUCED_REP2/aux_info/meta_info.json", + "salmon/RAP1_UNINDUCED_REP2/aux_info/observed_bias.gz", + "salmon/RAP1_UNINDUCED_REP2/aux_info/observed_bias_3p.gz", + "salmon/RAP1_UNINDUCED_REP2/cmd_info.json", + "salmon/RAP1_UNINDUCED_REP2/libParams", + "salmon/RAP1_UNINDUCED_REP2/libParams/flenDist.txt", + "salmon/RAP1_UNINDUCED_REP2/lib_format_counts.json", + "salmon/RAP1_UNINDUCED_REP2/logs", + "salmon/RAP1_UNINDUCED_REP2/logs/salmon_quant.log", + "salmon/RAP1_UNINDUCED_REP2/quant.genes.sf", + "salmon/RAP1_UNINDUCED_REP2/quant.sf", + "salmon/WT_REP1", + "salmon/WT_REP1/aux_info", + "salmon/WT_REP1/aux_info/ambig_info.tsv", + "salmon/WT_REP1/aux_info/expected_bias.gz", + "salmon/WT_REP1/aux_info/fld.gz", + "salmon/WT_REP1/aux_info/meta_info.json", + "salmon/WT_REP1/aux_info/observed_bias.gz", + "salmon/WT_REP1/aux_info/observed_bias_3p.gz", + "salmon/WT_REP1/cmd_info.json", + "salmon/WT_REP1/libParams", + "salmon/WT_REP1/libParams/flenDist.txt", + "salmon/WT_REP1/lib_format_counts.json", + "salmon/WT_REP1/logs", + "salmon/WT_REP1/logs/salmon_quant.log", + "salmon/WT_REP1/quant.genes.sf", + "salmon/WT_REP1/quant.sf", + "salmon/WT_REP2", + "salmon/WT_REP2/aux_info", + "salmon/WT_REP2/aux_info/ambig_info.tsv", + "salmon/WT_REP2/aux_info/expected_bias.gz", + "salmon/WT_REP2/aux_info/fld.gz", + "salmon/WT_REP2/aux_info/meta_info.json", + "salmon/WT_REP2/aux_info/observed_bias.gz", + "salmon/WT_REP2/aux_info/observed_bias_3p.gz", + "salmon/WT_REP2/cmd_info.json", + "salmon/WT_REP2/libParams", + "salmon/WT_REP2/libParams/flenDist.txt", + "salmon/WT_REP2/lib_format_counts.json", + "salmon/WT_REP2/logs", + "salmon/WT_REP2/logs/salmon_quant.log", + "salmon/WT_REP2/quant.genes.sf", + "salmon/WT_REP2/quant.sf", + "salmon/deseq2_qc", + "salmon/deseq2_qc/R_sessionInfo.log", + "salmon/deseq2_qc/deseq2.dds.RData", + "salmon/deseq2_qc/deseq2.pca.vals.txt", + "salmon/deseq2_qc/deseq2.plots.pdf", + "salmon/deseq2_qc/deseq2.sample.dists.txt", + "salmon/deseq2_qc/size_factors", + "salmon/deseq2_qc/size_factors/RAP1_IAA_30M_REP1.txt", + "salmon/deseq2_qc/size_factors/RAP1_UNINDUCED_REP1.txt", + "salmon/deseq2_qc/size_factors/RAP1_UNINDUCED_REP2.txt", + "salmon/deseq2_qc/size_factors/WT_REP1.txt", + "salmon/deseq2_qc/size_factors/WT_REP2.txt", + "salmon/deseq2_qc/size_factors/deseq2.size_factors.RData", + "salmon/salmon.merged.gene_counts.SummarizedExperiment.rds", + "salmon/salmon.merged.gene_counts.tsv", + "salmon/salmon.merged.gene_counts_length_scaled.SummarizedExperiment.rds", + "salmon/salmon.merged.gene_counts_length_scaled.tsv", + "salmon/salmon.merged.gene_counts_scaled.SummarizedExperiment.rds", + "salmon/salmon.merged.gene_counts_scaled.tsv", + "salmon/salmon.merged.gene_lengths.tsv", + "salmon/salmon.merged.gene_tpm.tsv", + "salmon/salmon.merged.transcript_counts.SummarizedExperiment.rds", + "salmon/salmon.merged.transcript_counts.tsv", + "salmon/salmon.merged.transcript_lengths.tsv", + "salmon/salmon.merged.transcript_tpm.tsv", + "salmon/tx2gene.tsv", + "trimgalore", + "trimgalore/RAP1_IAA_30M_REP1_trimmed_1.fastq.gz_trimming_report.txt", + "trimgalore/RAP1_IAA_30M_REP1_trimmed_2.fastq.gz_trimming_report.txt", + "trimgalore/RAP1_UNINDUCED_REP1_trimmed.fastq.gz_trimming_report.txt", + "trimgalore/RAP1_UNINDUCED_REP2_trimmed.fastq.gz_trimming_report.txt", + "trimgalore/WT_REP1_trimmed_1.fastq.gz_trimming_report.txt", + "trimgalore/WT_REP1_trimmed_2.fastq.gz_trimming_report.txt", + "trimgalore/WT_REP2_trimmed_1.fastq.gz_trimming_report.txt", + "trimgalore/WT_REP2_trimmed_2.fastq.gz_trimming_report.txt" + ], + [ + "cutadapt_filtered_reads_plot.txt:md5,6fa381627f7c1f664f3d4b2cb79cce90", + "cutadapt_trimmed_sequences_plot_3_Counts.txt:md5,13dfa866fd91dbb072689efe9aa83b1f", + "cutadapt_trimmed_sequences_plot_3_Obs_Exp.txt:md5,07145dd8dd3db654859b18eb0389046c", + "fastqc_raw-status-check-heatmap.txt:md5,5a89b0d8d162f6b1dbdaf39457bbc03b", + "fastqc_raw_adapter_content_plot.txt:md5,da0389be84cfdd189b1d045212eb2974", + "fastqc_raw_overrepresented_sequences_plot.txt:md5,25d88ea8a72f55e8a374ae802bc7f0b1", + "fastqc_raw_per_base_n_content_plot.txt:md5,d368d7e36ca2f73dcde61f2b486d8213", + "fastqc_raw_per_base_sequence_quality_plot.txt:md5,5c3065b549129702b185ea1b817da420", + "fastqc_raw_per_sequence_gc_content_plot_Counts.txt:md5,9ddaa50167117d3c9188ccf015427704", + "fastqc_raw_per_sequence_gc_content_plot_Percentages.txt:md5,f10ee2881b61308af35f304aa3d810a3", + "fastqc_raw_per_sequence_quality_scores_plot.txt:md5,b5f9a02933e3065952237afd2ec9ce82", + "fastqc_raw_sequence_counts_plot.txt:md5,cbae4979d5db66d3b894abcf8d1c453c", + "fastqc_raw_sequence_duplication_levels_plot.txt:md5,8812cee16f6ca65e2c33635754de1772", + "fastqc_sequence_length_distribution_plot.txt:md5,6fe2c985606abad947bcca99b015ae33", + "fastqc_trimmed-status-check-heatmap.txt:md5,22a03548736b88b23be6bc0c9ef1b4a6", + "fastqc_trimmed_overrepresented_sequences_plot.txt:md5,c755e9d044ea1a82b2c8edde867b4878", + "fastqc_trimmed_per_base_n_content_plot.txt:md5,418610c1ce119cb786ad434db75d366e", + "fastqc_trimmed_per_base_sequence_quality_plot.txt:md5,bd22e06e41c096ad4f745d40fe96a1e5", + "fastqc_trimmed_per_sequence_gc_content_plot_Counts.txt:md5,004c60768ceb6197765154e3eaa37b7a", + "fastqc_trimmed_per_sequence_gc_content_plot_Percentages.txt:md5,95d29060b687f745288ad1ec47750037", + "fastqc_trimmed_per_sequence_quality_scores_plot.txt:md5,0f9834cc19f76dd5c87cf8cba7435a7c", + "fastqc_trimmed_sequence_counts_plot.txt:md5,9fd642bdd1da354f296bb8092205608f", + "fastqc_trimmed_sequence_duplication_levels_plot.txt:md5,0758257b497283b1ef28171e694db6db", + "multiqc_citations.txt:md5,f789abe663d4b4214f0ddeb413a7f150", + "multiqc_cutadapt.txt:md5,583b7b9ba76b26162bb9610ed746454b", + "multiqc_fastqc_fastqc_raw.txt:md5,81c3c1a2575a1891a7f2a9637a0f2cc0", + "multiqc_fastqc_fastqc_trimmed.txt:md5,54743154d0e8858980acffeb5b6f6a97", + "ambig_info.tsv:md5,5e9128e825dd0173d1eda78709cebb47", + "expected_bias.gz:md5,3407f87245d0003e0ffbfdf6d8c04f20", + "observed_bias.gz:md5,92bcd0592d22a6a58d0360fc76103e56", + "observed_bias_3p.gz:md5,92bcd0592d22a6a58d0360fc76103e56", + "cmd_info.json:md5,e89ce91f3cc03732bee42c381f3b1d1b", + "lib_format_counts.json:md5,288063651e63fda4ed95834d252cefd3", + "ambig_info.tsv:md5,12e70e29f44c7786f081e4b59e4ce7ce", + "expected_bias.gz:md5,3407f87245d0003e0ffbfdf6d8c04f20", + "observed_bias.gz:md5,92bcd0592d22a6a58d0360fc76103e56", + "observed_bias_3p.gz:md5,92bcd0592d22a6a58d0360fc76103e56", + "cmd_info.json:md5,d4db8fa9cb231648076bda1ec4e34114", + "lib_format_counts.json:md5,8a2ab54a8ec1d78be040c9bec57b5101", + "ambig_info.tsv:md5,fe67ac3fc0f0fc813216c09aec21c4e8", + "expected_bias.gz:md5,3407f87245d0003e0ffbfdf6d8c04f20", + "observed_bias.gz:md5,92bcd0592d22a6a58d0360fc76103e56", + "observed_bias_3p.gz:md5,92bcd0592d22a6a58d0360fc76103e56", + "cmd_info.json:md5,394264c8964da0cdd1c40d1c16995ceb", + "lib_format_counts.json:md5,c1ed7330956d6411d833fed78f1182bd", + "ambig_info.tsv:md5,3bc4e9d9fb9a95086a94c8c01349e67f", + "expected_bias.gz:md5,3407f87245d0003e0ffbfdf6d8c04f20", + "observed_bias.gz:md5,92bcd0592d22a6a58d0360fc76103e56", + "observed_bias_3p.gz:md5,92bcd0592d22a6a58d0360fc76103e56", + "cmd_info.json:md5,8009941fa85ff1e32128bceafc974ea8", + "lib_format_counts.json:md5,bcb3207290115f621a93a198ec5b6dfd", + "ambig_info.tsv:md5,f7304f8876d8c47aab65205236d7b721", + "expected_bias.gz:md5,3407f87245d0003e0ffbfdf6d8c04f20", + "observed_bias.gz:md5,92bcd0592d22a6a58d0360fc76103e56", + "observed_bias_3p.gz:md5,92bcd0592d22a6a58d0360fc76103e56", + "cmd_info.json:md5,3e6d06d032abd3f5cd7e79df1b5bcde0", + "lib_format_counts.json:md5,fce2c4840048c294c016f45f0df15384", + "R_sessionInfo.log:md5,fb0da0d7ad6994ed66a8e68348b19676", + "tx2gene.tsv:md5,1be389a28cc26d94b19ea918959ac72e" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-01-21T18:27:30.970398934" + } +} \ No newline at end of file diff --git a/workflows/rnaseq/main.nf b/workflows/rnaseq/main.nf index 27c7e5639..6e9ed6abe 100755 --- a/workflows/rnaseq/main.nf +++ b/workflows/rnaseq/main.nf @@ -141,7 +141,7 @@ workflow RNASEQ { ch_sortmerna_index, ch_bbsplit_index, ch_ribo_db, - params.skip_bbsplit, + params.skip_bbsplit || ! params.fasta, params.skip_fastqc || params.skip_qc, params.skip_trimming, params.skip_umi_extract,