From 33aa26b832932671b3d9638f59fbcedc34a11b6b Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Fri, 17 Jan 2025 16:49:29 +0000 Subject: [PATCH 1/4] Allow transcriptome-only salmon indexing --- modules/nf-core/salmon/index/main.nf | 27 ++++++++++++------- .../nf-core/salmon/index/tests/main.nf.test | 24 +++++++++++++++++ .../salmon/index/tests/main.nf.test.snap | 24 ++++++++++++----- 3 files changed, 59 insertions(+), 16 deletions(-) diff --git a/modules/nf-core/salmon/index/main.nf b/modules/nf-core/salmon/index/main.nf index 3d653c0d08e..2e9c6224cd1 100644 --- a/modules/nf-core/salmon/index/main.nf +++ b/modules/nf-core/salmon/index/main.nf @@ -20,22 +20,29 @@ process SALMON_INDEX { script: def args = task.ext.args ?: '' - def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt" - def gentrome = "gentrome.fa" - if (genome_fasta.endsWith('.gz')) { - get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt" - gentrome = "gentrome.fa.gz" + def decoys = '' + def fasta = transcript_fasta + if (genome_fasta){ + if (genome_fasta.endsWith('.gz')) { + genome_fasta = "<(gunzip -c $genome_fasta)" + } + decoys='-d decoys.txt' + fasta='gentrome.fa' + } + if (transcript_fasta.endsWith('.gz')) { + transcript_fasta = "<(gunzip -c $transcript_fasta)" } """ - $get_decoy_ids - sed -i.bak -e 's/>//g' decoys.txt - cat $transcript_fasta $genome_fasta > $gentrome + if [ -n '$genome_fasta' ]; then + grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 | sed 's/>//g' > decoys.txt + cat $transcript_fasta $genome_fasta > $fasta + fi salmon \\ index \\ --threads $task.cpus \\ - -t $gentrome \\ - -d decoys.txt \\ + -t $fasta \\ + $decoys \\ $args \\ -i salmon diff --git a/modules/nf-core/salmon/index/tests/main.nf.test b/modules/nf-core/salmon/index/tests/main.nf.test index 16b3c1a7914..9096c87f707 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test +++ b/modules/nf-core/salmon/index/tests/main.nf.test @@ -30,6 +30,30 @@ nextflow_process { ) } + } + + test("sarscov2 transcriptome only") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([]) + input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.index.get(0)).exists() }, + { assert snapshot(process.out.versions).match("versions_transcriptome_only") } + ) + } + } test("sarscov2 stub") { diff --git a/modules/nf-core/salmon/index/tests/main.nf.test.snap b/modules/nf-core/salmon/index/tests/main.nf.test.snap index e5899b51151..b85959bd289 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test.snap +++ b/modules/nf-core/salmon/index/tests/main.nf.test.snap @@ -6,10 +6,10 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.2" }, - "timestamp": "2024-10-18T10:00:47.087293189" + "timestamp": "2025-01-17T16:46:53.472914" }, "versions stub": { "content": [ @@ -18,9 +18,21 @@ ] ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nf-test": "0.9.2", + "nextflow": "24.10.2" }, - "timestamp": "2024-10-18T10:01:03.89824494" + "timestamp": "2025-01-17T16:47:11.559333" + }, + "versions_transcriptome_only": { + "content": [ + [ + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2025-01-17T16:47:02.453235" } } \ No newline at end of file From 4266407701ff5012a2a7c394f9e97204b2afcc5e Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 20 Jan 2025 12:50:16 +0000 Subject: [PATCH 2/4] Fix up tests to add files --- .../nf-core/salmon/index/tests/main.nf.test | 19 ++++++--- .../salmon/index/tests/main.nf.test.snap | 39 ++++++++++++------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/modules/nf-core/salmon/index/tests/main.nf.test b/modules/nf-core/salmon/index/tests/main.nf.test index 9096c87f707..3a4eadfb6fc 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test +++ b/modules/nf-core/salmon/index/tests/main.nf.test @@ -26,13 +26,16 @@ nextflow_process { assertAll( { assert process.success }, { assert path(process.out.index.get(0)).exists() }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot( + file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(), + path(process.out.versions.get(0)).yaml + ).match()} ) } } - test("sarscov2 transcriptome only") { + test("sarscov2 transcriptome only") { when { params { @@ -50,13 +53,16 @@ nextflow_process { assertAll( { assert process.success }, { assert path(process.out.index.get(0)).exists() }, - { assert snapshot(process.out.versions).match("versions_transcriptome_only") } + { assert snapshot( + file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(), + path(process.out.versions.get(0)).yaml + ).match()} ) } } - test("sarscov2 stub") { + test("sarscov2 stub") { options "-stub" when { params { @@ -74,7 +80,10 @@ nextflow_process { assertAll( { assert process.success }, { assert path(process.out.index.get(0)).exists() }, - { assert snapshot(process.out.versions).match("versions stub") } + { assert snapshot( + file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(), + path(process.out.versions.get(0)).yaml + ).match()} ) } diff --git a/modules/nf-core/salmon/index/tests/main.nf.test.snap b/modules/nf-core/salmon/index/tests/main.nf.test.snap index b85959bd289..a99c5d07f62 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test.snap +++ b/modules/nf-core/salmon/index/tests/main.nf.test.snap @@ -1,38 +1,47 @@ { - "versions": { + "sarscov2 stub": { "content": [ - [ - "versions.yml:md5,85337fa0a286ea35073ee5260974e307" - ] + "[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]", + { + "SALMON_INDEX": { + "salmon": "1.10.3" + } + } ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2025-01-17T16:46:53.472914" + "timestamp": "2025-01-20T12:08:18.365399" }, - "versions stub": { + "sarscov2": { "content": [ - [ - "versions.yml:md5,85337fa0a286ea35073ee5260974e307" - ] + "[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]", + { + "SALMON_INDEX": { + "salmon": "1.10.3" + } + } ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2025-01-17T16:47:11.559333" + "timestamp": "2025-01-20T12:08:00.388411" }, - "versions_transcriptome_only": { + "sarscov2 transcriptome only": { "content": [ - [ - "versions.yml:md5,85337fa0a286ea35073ee5260974e307" - ] + "[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]", + { + "SALMON_INDEX": { + "salmon": "1.10.3" + } + } ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2025-01-17T16:47:02.453235" + "timestamp": "2025-01-20T12:08:09.405121" } } \ No newline at end of file From 27753c516b98fda4cde218d5b4f6395731b3ee1f Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 20 Jan 2025 12:55:03 +0000 Subject: [PATCH 3/4] Revert plain text versions --- modules/nf-core/salmon/index/tests/main.nf.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/salmon/index/tests/main.nf.test b/modules/nf-core/salmon/index/tests/main.nf.test index 3a4eadfb6fc..30b7359b92f 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test +++ b/modules/nf-core/salmon/index/tests/main.nf.test @@ -28,7 +28,7 @@ nextflow_process { { assert path(process.out.index.get(0)).exists() }, { assert snapshot( file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(), - path(process.out.versions.get(0)).yaml + process.out.versions ).match()} ) } @@ -55,7 +55,7 @@ nextflow_process { { assert path(process.out.index.get(0)).exists() }, { assert snapshot( file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(), - path(process.out.versions.get(0)).yaml + process.out.versions ).match()} ) } @@ -82,7 +82,7 @@ nextflow_process { { assert path(process.out.index.get(0)).exists() }, { assert snapshot( file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(), - path(process.out.versions.get(0)).yaml + process.out.versions ).match()} ) } From 06441bc7cfe9e9cf46868ef26d446ce4cf7d26d1 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 20 Jan 2025 12:58:01 +0000 Subject: [PATCH 4/4] update snap --- .../salmon/index/tests/main.nf.test.snap | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/modules/nf-core/salmon/index/tests/main.nf.test.snap b/modules/nf-core/salmon/index/tests/main.nf.test.snap index a99c5d07f62..f8ed44d7df0 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test.snap +++ b/modules/nf-core/salmon/index/tests/main.nf.test.snap @@ -2,46 +2,40 @@ "sarscov2 stub": { "content": [ "[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]", - { - "SALMON_INDEX": { - "salmon": "1.10.3" - } - } + [ + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" + ] ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2025-01-20T12:08:18.365399" + "timestamp": "2025-01-20T12:57:51.498323" }, "sarscov2": { "content": [ "[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]", - { - "SALMON_INDEX": { - "salmon": "1.10.3" - } - } + [ + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" + ] ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2025-01-20T12:08:00.388411" + "timestamp": "2025-01-20T12:57:33.474302" }, "sarscov2 transcriptome only": { "content": [ "[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]", - { - "SALMON_INDEX": { - "salmon": "1.10.3" - } - } + [ + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" + ] ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2025-01-20T12:08:09.405121" + "timestamp": "2025-01-20T12:57:42.420247" } } \ No newline at end of file