Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow transcriptome-only salmon indexing #7327

Merged
merged 9 commits into from
Jan 20, 2025
27 changes: 17 additions & 10 deletions modules/nf-core/salmon/index/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,29 @@ process SALMON_INDEX {

script:
def args = task.ext.args ?: ''
def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt"
def gentrome = "gentrome.fa"
if (genome_fasta.endsWith('.gz')) {
get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 > decoys.txt"
gentrome = "gentrome.fa.gz"
def decoys = ''
def fasta = transcript_fasta
if (genome_fasta){
if (genome_fasta.endsWith('.gz')) {
genome_fasta = "<(gunzip -c $genome_fasta)"
}
decoys='-d decoys.txt'
fasta='gentrome.fa'
}
if (transcript_fasta.endsWith('.gz')) {
transcript_fasta = "<(gunzip -c $transcript_fasta)"
}
"""
$get_decoy_ids
sed -i.bak -e 's/>//g' decoys.txt
cat $transcript_fasta $genome_fasta > $gentrome
if [ -n '$genome_fasta' ]; then
grep '^>' $genome_fasta | cut -d ' ' -f 1 | cut -d \$'\\t' -f 1 | sed 's/>//g' > decoys.txt
cat $transcript_fasta $genome_fasta > $fasta
fi

salmon \\
index \\
--threads $task.cpus \\
-t $gentrome \\
-d decoys.txt \\
-t $fasta \\
$decoys \\
$args \\
-i salmon

Expand Down
39 changes: 36 additions & 3 deletions modules/nf-core/salmon/index/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,43 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert path(process.out.index.get(0)).exists() },
{ assert snapshot(process.out.versions).match("versions") }
{ assert snapshot(
file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(),
process.out.versions
).match()}
)
}

}

test("sarscov2 stub") {
test("sarscov2 transcriptome only") {

when {
params {
outdir = "$outputDir"
}
process {
"""
input[0] = Channel.of([])
input[1] = Channel.of([file(params.modules_testdata_base_path + "genomics/sarscov2/genome/transcriptome.fasta", checkIfExists: true)])
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert path(process.out.index.get(0)).exists() },
{ assert snapshot(
file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(),
process.out.versions
).match()}
)
}

}

test("sarscov2 stub") {
options "-stub"
when {
params {
Expand All @@ -50,7 +80,10 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert path(process.out.index.get(0)).exists() },
{ assert snapshot(process.out.versions).match("versions stub") }
{ assert snapshot(
file(process.out.index[0]).listFiles().collect { it.getName() }.sort().toString(),
process.out.versions
).match()}
)
}

Expand Down
31 changes: 23 additions & 8 deletions modules/nf-core/salmon/index/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,26 +1,41 @@
{
"versions": {
"sarscov2 stub": {
"content": [
"[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]",
[
"versions.yml:md5,85337fa0a286ea35073ee5260974e307"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
"nf-test": "0.9.2",
"nextflow": "24.10.2"
},
"timestamp": "2024-10-18T10:00:47.087293189"
"timestamp": "2025-01-20T12:57:51.498323"
},
"versions stub": {
"sarscov2": {
"content": [
"[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]",
[
"versions.yml:md5,85337fa0a286ea35073ee5260974e307"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
"nf-test": "0.9.2",
"nextflow": "24.10.2"
},
"timestamp": "2024-10-18T10:01:03.89824494"
"timestamp": "2025-01-20T12:57:33.474302"
},
"sarscov2 transcriptome only": {
"content": [
"[complete_ref_lens.bin, ctable.bin, ctg_offsets.bin, duplicate_clusters.tsv, info.json, mphf.bin, pos.bin, pre_indexing.log, rank.bin, refAccumLengths.bin, ref_indexing.log, reflengths.bin, refseq.bin, seq.bin, versionInfo.json]",
[
"versions.yml:md5,85337fa0a286ea35073ee5260974e307"
pinin4fjords marked this conversation as resolved.
Show resolved Hide resolved
]
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.2"
},
"timestamp": "2025-01-20T12:57:42.420247"
}
}
Loading