diff --git a/CHANGELOG.md b/CHANGELOG.md index 4265b2a6..c739194c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#286](https://github.com/nf-core/differentialabundance/pull/286)] - Integration of limma voom for rnaseq data ([@KamilMaliszArdigen](https://github.com/KamilMaliszArdigen), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#380](https://github.com/nf-core/differentialabundance/pull/380)] - Replace local filter_diff_table module with nf-core one, and create nf-tests for tabular_to_gsea_chip. ([@nschcolnicov](https://github.com/nschcolnicov), review by [@pinin4fjords](https://github.com/pinin4fjords)) - [[#382](https://github.com/nf-core/differentialabundance/pull/382)] - Add YAML formatted contrasts file handling. ([@nschcolnicov](https://github.com/nschcolnicov), review by [@TODO](https://github.com/TODO)) +- [[#411](https://github.com/nf-core/differentialabundance/pull/411)] - Replace local tabulartogseachip with nf-core version. ([@nschcolnicov](https://github.com/nschcolnicov), review by [@TODO](https://github.com/TODO)) ### Fixed diff --git a/conf/modules.config b/conf/modules.config index f84aef02..b18c8b1d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -478,7 +478,7 @@ process { ] } - withName: 'TABULAR_TO_GSEA_CHIP' { + withName: 'CUSTOM_TABULARTOGSEACHIP' { publishDir = [ enabled: false ] diff --git a/modules.json b/modules.json index 23d47cc3..f46a3b1f 100644 --- a/modules.json +++ b/modules.json @@ -25,6 +25,11 @@ "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "custom/tabulartogseachip": { + "branch": "master", + "git_sha": "7900e49fb84969a2479ecfbeef7bcbe296513c90", + "installed_by": ["modules"] + }, "custom/tabulartogseacls": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/local/tabulartogseachip/main.nf b/modules/local/tabulartogseachip/main.nf deleted file mode 100644 index 314a2e70..00000000 --- a/modules/local/tabulartogseachip/main.nf +++ /dev/null @@ -1,54 +0,0 @@ -process TABULAR_TO_GSEA_CHIP { - - tag "$id" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "community.wave.seqera.io/library/gawk:5.1.0--fa97c4ccf4cfbc4b" - - input: - path tsv - tuple val(id), val(symbol) - - output: - path "*.chip" , emit: chip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def VERSION = '9.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - function find_column_number { - file=\$1 - column=\$2 - - head -n 1 \$file | tr '\\t' '\\n' | grep -n "^\${column}\$" | awk -F':' '{print \$1}' - } - - id_col=\$(find_column_number $tsv $id) - symbol_col=\$(find_column_number $tsv $symbol) - outfile=\$(echo $tsv | sed 's/\\(.*\\)\\..*/\\1/').chip - - echo -e "Probe Set ID\\tGene Symbol\\tGene Title" > \${outfile}.tmp - tail -n +2 $tsv | awk -F'\\t' -v id=\$id_col -v symbol=\$symbol_col '{print \$id"\\t"\$symbol"\\tNA"}' >> \${outfile}.tmp - mv \${outfile}.tmp \${outfile} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bash: \$(echo \$(bash --version | grep -Eo 'version [[:alnum:].]+' | sed 's/version //')) - END_VERSIONS - """ - - stub: - """ - touch stub.chip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bash: \$(echo \$(bash --version | grep -Eo 'version [[:alnum:].]+' | sed 's/version //')) - END_VERSIONS - """ - -} diff --git a/modules/local/tabulartogseachip/meta.yml b/modules/local/tabulartogseachip/meta.yml deleted file mode 100644 index aa9d7d93..00000000 --- a/modules/local/tabulartogseachip/meta.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: tabular_to_gsea_chip -description: Convert tabular data into GSEA-compatible CHIP files. -keywords: - - GSEA - - CHIP - - Bioinformatics -tools: - - gawk: - description: | - GNU Awk (gawk) is a powerful programming language designed for text processing and typically used as a data extraction and reporting tool. - It is used here to manipulate tabular data and create the CHIP file format. - homepage: https://www.gnu.org/software/gawk/ - documentation: https://www.gnu.org/software/gawk/manual/ - licence: ["GPL-3.0-or-later"] -input: - - tsv: - type: file - description: | - Tab-separated values (TSV) file containing data to be converted into a CHIP file. - - - id: - type: string - description: | - Feature ID attribute in the abundance table as well as in the GTF file (e.g. the gene_id field). - - symbol: - type: string - description: | - Feature name attribute in the abundance table as well as in the GTF file (e.g. the gene symbol field). -output: - - chip: - type: file - description: Generated GSEA-compatible CHIP file. - pattern: "*.chip" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@pinin4fjords" # Replace with actual author handles -maintainers: - - "@nschcolnicov" diff --git a/modules/local/tabulartogseachip/tests/main.nf.test b/modules/local/tabulartogseachip/tests/main.nf.test deleted file mode 100644 index e6c5321b..00000000 --- a/modules/local/tabulartogseachip/tests/main.nf.test +++ /dev/null @@ -1,49 +0,0 @@ -nextflow_process { - - name "Test Process TABULAR_TO_GSEA_CHIP" - script "../main.nf" - process "TABULAR_TO_GSEA_CHIP" - tag "modules" - tag "modules_nfcore" - tag "tabular_to_gsea_chip" - - test("test_tabular_to_gsea_chip") { - - when { - process { - """ - input[0] = Channel.fromPath("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/differentialabundance/modules_testdata/Mus_musculus.anno.feature_metadata.tsv") - input[1] = ["gene_id", "gene_name"] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - ) - } - } - - test("test_tabular_to_gsea_chip - stub") { - - options "-stub" - - when { - process { - """ - input[0] = Channel.fromPath("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/differentialabundance/modules_testdata/Mus_musculus.anno.feature_metadata.tsv") - input[1] = ["gene_id", "gene_name"] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - ) - } - } -} diff --git a/modules/local/tabulartogseachip/tests/main.nf.test.snap b/modules/local/tabulartogseachip/tests/main.nf.test.snap deleted file mode 100644 index 29d017ed..00000000 --- a/modules/local/tabulartogseachip/tests/main.nf.test.snap +++ /dev/null @@ -1,48 +0,0 @@ -{ - "test_tabular_to_gsea_chip - stub": { - "content": [ - { - "0": [ - "stub.chip:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "1": [ - "versions.yml:md5,a422b1a01e86be433a25d1776d497f5a" - ], - "chip": [ - "stub.chip:md5,d41d8cd98f00b204e9800998ecf8427e" - ], - "versions": [ - "versions.yml:md5,a422b1a01e86be433a25d1776d497f5a" - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-06T21:03:06.781994447" - }, - "test_tabular_to_gsea_chip": { - "content": [ - { - "0": [ - "Mus_musculus.anno.feature_metadata.chip:md5,5abf60bb982bb2ff5b8b3e37f14a21e4" - ], - "1": [ - "versions.yml:md5,a422b1a01e86be433a25d1776d497f5a" - ], - "chip": [ - "Mus_musculus.anno.feature_metadata.chip:md5,5abf60bb982bb2ff5b8b3e37f14a21e4" - ], - "versions": [ - "versions.yml:md5,a422b1a01e86be433a25d1776d497f5a" - ] - } - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.10.2" - }, - "timestamp": "2024-12-06T21:02:58.608468636" - } -} \ No newline at end of file diff --git a/modules/local/tabulartogseachip/environment.yml b/modules/nf-core/custom/tabulartogseachip/environment.yml similarity index 63% rename from modules/local/tabulartogseachip/environment.yml rename to modules/nf-core/custom/tabulartogseachip/environment.yml index a6e8e310..cc49deff 100644 --- a/modules/local/tabulartogseachip/environment.yml +++ b/modules/nf-core/custom/tabulartogseachip/environment.yml @@ -1,6 +1,5 @@ channels: - conda-forge - bioconda - dependencies: - - conda-forge::gawk=5.1.0 + - "conda-forge::gawk=5.1.0" diff --git a/modules/nf-core/custom/tabulartogseachip/main.nf b/modules/nf-core/custom/tabulartogseachip/main.nf new file mode 100644 index 00000000..c0622cda --- /dev/null +++ b/modules/nf-core/custom/tabulartogseachip/main.nf @@ -0,0 +1,56 @@ +process CUSTOM_TABULARTOGSEACHIP { + + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'biocontainers/gawk:5.1.0' }" + + input: + tuple val(meta), path(tabular) + tuple val(id) , val(symbol) + + output: + tuple val(meta), path("*.chip"), emit: chip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + function find_column_number { + file=\$1 + column=\$2 + + head -n 1 \$file | tr '\\t' '\\n' | grep -n "^\${column}\$" | awk -F':' '{print \$1}' + } + + id_col=\$(find_column_number $tabular $id) + symbol_col=\$(find_column_number $tabular $symbol) + outfile=${prefix}.chip + + echo -e "Probe Set ID\\tGene Symbol\\tGene Title" > \${outfile}.tmp + tail -n +2 $tabular | awk -F'\\t' -v id=\$id_col -v symbol=\$symbol_col '{print \$id"\\t"\$symbol"\\tNA"}' >> \${outfile}.tmp + mv \${outfile}.tmp \${outfile} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + outfile=${prefix}.chip + touch \$outfile + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/custom/tabulartogseachip/meta.yml b/modules/nf-core/custom/tabulartogseachip/meta.yml new file mode 100644 index 00000000..5759874c --- /dev/null +++ b/modules/nf-core/custom/tabulartogseachip/meta.yml @@ -0,0 +1,53 @@ +name: "custom_tabulartogseachip" +description: Make a GSEA class file (.chip) from tabular inputs +keywords: + - gsea + - chip + - convert + - tabular +tools: + - custom: + description: "Make a GSEA annotation file (.chip) from tabular inputs" + tool_dev_url: "https://github.com/nf-core/modules/blob/master/modules/nf-core/custom/tabulartogseachip/main.nf" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing data information. + This can be used at the workflow level to pass optional parameters to the module. + [id: 'test', ...] + - tabular: + type: file + description: | + Tabular (NOTE that for the moment it only works for TSV file) containing a column with the + features ids, and another column with the features symbols. + pattern: "*.{tsv}" + - - id: + type: string + description: The name of the column containing feature ids + - symbol: + type: string + description: The name of the column containing feature symbols +output: + - chip: + - meta: + type: map + description: Groovy Map containing metadata e.g. [ id:'test', ... ] + - "*.chip": + type: file + description: | + A categorical class format file (.chip) as defined by the Broad + documentation at + https://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats + pattern: "*.chip" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@pinin4fjords" + - "@suzannejin" +maintainers: + - "@pinin4fjords" diff --git a/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test b/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test new file mode 100644 index 00000000..6c2025a1 --- /dev/null +++ b/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test @@ -0,0 +1,68 @@ +nextflow_process { + + name "Test Process CUSTOM_TABULARTOGSEACHIP" + script "../main.nf" + process "CUSTOM_TABULARTOGSEACHIP" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/tabulartogseachip" + + test("test tsv to chip") { + + when { + process { + """ + input[0] = Channel + .fromPath(params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) + .map { it -> [ + [id:it.baseName], it + ]} + input[1] = Channel.of(['gene_id', 'gene_name']) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.chip, + process.out.versions + ).match() } + ) + } + + } + + test("test tsv to chip - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel + .fromPath(params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true) + .map { it -> [ + [id:it.baseName], it + ]} + input[1] = Channel.of(['gene_id', 'gene_name']) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.chip, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test.snap b/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test.snap new file mode 100644 index 00000000..bb66d232 --- /dev/null +++ b/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test.snap @@ -0,0 +1,42 @@ +{ + "test tsv to chip": { + "content": [ + [ + [ + { + "id": "SRP254919.salmon.merged.gene_counts.top1000cov" + }, + "SRP254919.salmon.merged.gene_counts.top1000cov.chip:md5,2ab8a685c675ce2fb97142526766044a" + ] + ], + [ + "versions.yml:md5,61dab2d2b9aa1333c4c3bfd7bd893ce5" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-16T18:05:22.341224384" + }, + "test tsv to chip - stub": { + "content": [ + [ + [ + { + "id": "SRP254919.salmon.merged.gene_counts.top1000cov" + }, + "SRP254919.salmon.merged.gene_counts.top1000cov.chip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,61dab2d2b9aa1333c4c3bfd7bd893ce5" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-17T13:14:53.792845507" + } +} diff --git a/tests/test.nf.test.snap b/tests/test.nf.test.snap index 37f61c5e..bbd6b2de 100644 --- a/tests/test.nf.test.snap +++ b/tests/test.nf.test.snap @@ -3,6 +3,9 @@ "content": [ 21, { + "CUSTOM_TABULARTOGSEACHIP": { + "gawk": "5.1.0" + }, "DESEQ2_DIFFERENTIAL": { "r-base": "4.1.3", "bioconductor-deseq2": "1.34.0" @@ -23,9 +26,6 @@ "PLOT_EXPLORATORY": { "r-shinyngs": "2.0.0" }, - "TABULAR_TO_GSEA_CHIP": { - "bash": "5.2.21 3" - }, "VALIDATOR": { "r-base": "4.3.3", "r-shinyngs": "2.0.0" @@ -186,12 +186,15 @@ "nf-test": "0.9.0", "nextflow": "24.10.3" }, - "timestamp": "2024-12-17T11:56:20.743162854" + "timestamp": "2024-12-20T16:13:56.445211657" }, "Test profile with yaml contrasts": { "content": [ 21, { + "CUSTOM_TABULARTOGSEACHIP": { + "gawk": "5.1.0" + }, "DESEQ2_DIFFERENTIAL": { "r-base": "4.1.3", "bioconductor-deseq2": "1.34.0" @@ -212,9 +215,6 @@ "PLOT_EXPLORATORY": { "r-shinyngs": "2.0.0" }, - "TABULAR_TO_GSEA_CHIP": { - "bash": "5.2.21 3" - }, "VALIDATOR": { "r-base": "4.3.3", "r-shinyngs": "2.0.0" @@ -375,6 +375,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.3" }, - "timestamp": "2024-12-20T13:41:10.4647836" + "timestamp": "2024-12-20T16:16:49.167979669" } } \ No newline at end of file diff --git a/tests/test_affy.nf.test.snap b/tests/test_affy.nf.test.snap index 2252b03a..4c0d12c1 100644 --- a/tests/test_affy.nf.test.snap +++ b/tests/test_affy.nf.test.snap @@ -7,6 +7,9 @@ "r-base": "4.3.1", "bioconductor-affy": "1.78.0" }, + "CUSTOM_TABULARTOGSEACHIP": { + "gawk": "5.1.0" + }, "GSEA_GSEA": { "gsea": "4.3.2" }, @@ -21,9 +24,6 @@ "PLOT_EXPLORATORY": { "r-shinyngs": "2.0.0" }, - "TABULAR_TO_GSEA_CHIP": { - "bash": "5.2.21 3" - }, "VALIDATOR": { "r-base": "4.3.3", "r-shinyngs": "2.0.0" @@ -236,6 +236,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.3" }, - "timestamp": "2024-12-17T12:03:13.731672183" + "timestamp": "2024-12-20T15:43:35.560675645" } } \ No newline at end of file diff --git a/tests/test_rnaseq_limma.nf.test.snap b/tests/test_rnaseq_limma.nf.test.snap index acd27efc..3e2b9af7 100644 --- a/tests/test_rnaseq_limma.nf.test.snap +++ b/tests/test_rnaseq_limma.nf.test.snap @@ -3,6 +3,9 @@ "content": [ 20, { + "CUSTOM_TABULARTOGSEACHIP": { + "gawk": "5.1.0" + }, "GSEA_GSEA": { "gsea": "4.3.2" }, @@ -23,9 +26,6 @@ "PLOT_EXPLORATORY": { "r-shinyngs": "2.0.0" }, - "TABULAR_TO_GSEA_CHIP": { - "bash": "5.2.21 3" - }, "VALIDATOR": { "r-base": "4.3.3", "r-shinyngs": "2.0.0" @@ -177,6 +177,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.3" }, - "timestamp": "2024-12-17T12:10:44.503110769" + "timestamp": "2024-12-20T16:29:49.698674022" } } \ No newline at end of file diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index 5a4faeb0..5f376041 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -98,7 +98,7 @@ citations_file = file(params.citations_file, checkIfExists: true) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { TABULAR_TO_GSEA_CHIP } from '../modules/local/tabulartogseachip' +include { CUSTOM_TABULARTOGSEACHIP } from '../modules/nf-core/custom/tabulartogseachip/main' include { CUSTOM_FILTERDIFFERENTIALTABLE } from '../modules/nf-core/custom/filterdifferentialtable/main' /* @@ -464,8 +464,8 @@ workflow DIFFERENTIALABUNDANCE { CUSTOM_TABULARTOGSEACLS(ch_contrasts_and_samples) - TABULAR_TO_GSEA_CHIP( - VALIDATOR.out.feature_meta.map{ it[1] }, + CUSTOM_TABULARTOGSEACHIP( + VALIDATOR.out.feature_meta, [params.features_id_col, params.features_name_col] ) @@ -482,7 +482,7 @@ workflow DIFFERENTIALABUNDANCE { GSEA_GSEA( ch_gsea_inputs, ch_gsea_inputs.map{ tuple(it[0].reference, it[0].target) }, // * - TABULAR_TO_GSEA_CHIP.out.chip.first() + CUSTOM_TABULARTOGSEACHIP.out.chip.first().map{ it[1] } ) // * Note: GSEA module currently uses a value channel for the mandatory @@ -495,7 +495,7 @@ workflow DIFFERENTIALABUNDANCE { // Record GSEA versions ch_versions = ch_versions - .mix(TABULAR_TO_GSEA_CHIP.out.versions) + .mix(CUSTOM_TABULARTOGSEACHIP.out.versions) .mix(GSEA_GSEA.out.versions) }