From 8d40b364d090080a12b83ff7a7747a5ee794ec78 Mon Sep 17 00:00:00 2001 From: nservant Date: Thu, 15 Jun 2023 19:12:07 +0200 Subject: [PATCH] [MODIF] add pairtools #162 --- conf/modules.config | 33 ++++++ conf/test.config | 2 + environment.yml | 6 +- main.nf | 1 + modules.json | 119 ++++++++++++++++++-- modules/local/cooltools/insulation.nf | 2 +- modules/local/pairtools/pairtools_merge.nf | 40 +++++++ modules/local/pairtools/pairtools_split.nf | 41 +++++++ modules/local/pairtools/pairtools_stats.nf | 39 +++++++ modules/nf-core/bwa/index/main.nf | 51 +++++++++ modules/nf-core/bwa/index/meta.yml | 42 +++++++ modules/nf-core/bwa/mem/main.nf | 43 +++++++ modules/nf-core/bwa/mem/meta.yml | 55 +++++++++ modules/nf-core/cooler/makebins/main.nf | 2 +- modules/nf-core/pairix/main.nf | 32 ++++++ modules/nf-core/pairix/meta.yml | 42 +++++++ modules/nf-core/pairtools/dedup/main.nf | 39 +++++++ modules/nf-core/pairtools/dedup/main.nf~ | 39 +++++++ modules/nf-core/pairtools/dedup/meta.yml | 44 ++++++++ modules/nf-core/pairtools/parse/main.nf | 43 +++++++ modules/nf-core/pairtools/parse/main.nf~ | 41 +++++++ modules/nf-core/pairtools/parse/meta.yml | 48 ++++++++ modules/nf-core/pairtools/restrict/main.nf | 39 +++++++ modules/nf-core/pairtools/restrict/meta.yml | 46 ++++++++ modules/nf-core/pairtools/select/main.nf | 38 +++++++ modules/nf-core/pairtools/select/meta.yml | 44 ++++++++ modules/nf-core/pairtools/sort/main.nf | 39 +++++++ modules/nf-core/pairtools/sort/main.nf~ | 40 +++++++ modules/nf-core/pairtools/sort/meta.yml | 40 +++++++ modules/nf-core/samtools/index/main.nf | 48 ++++++++ modules/nf-core/samtools/index/meta.yml | 53 +++++++++ subworkflows/local/pairtools.nf | 88 +++++++++++++++ subworkflows/local/prepare_genome.nf | 44 ++++++-- workflows/hic.nf | 44 +++++--- 34 files changed, 1325 insertions(+), 42 deletions(-) create mode 100644 modules/local/pairtools/pairtools_merge.nf create mode 100644 modules/local/pairtools/pairtools_split.nf create mode 100644 modules/local/pairtools/pairtools_stats.nf create mode 100644 modules/nf-core/bwa/index/main.nf create mode 100644 modules/nf-core/bwa/index/meta.yml create mode 100644 modules/nf-core/bwa/mem/main.nf create mode 100644 modules/nf-core/bwa/mem/meta.yml create mode 100644 modules/nf-core/pairix/main.nf create mode 100644 modules/nf-core/pairix/meta.yml create mode 100644 modules/nf-core/pairtools/dedup/main.nf create mode 100644 modules/nf-core/pairtools/dedup/main.nf~ create mode 100644 modules/nf-core/pairtools/dedup/meta.yml create mode 100644 modules/nf-core/pairtools/parse/main.nf create mode 100644 modules/nf-core/pairtools/parse/main.nf~ create mode 100644 modules/nf-core/pairtools/parse/meta.yml create mode 100644 modules/nf-core/pairtools/restrict/main.nf create mode 100644 modules/nf-core/pairtools/restrict/meta.yml create mode 100644 modules/nf-core/pairtools/select/main.nf create mode 100644 modules/nf-core/pairtools/select/meta.yml create mode 100644 modules/nf-core/pairtools/sort/main.nf create mode 100644 modules/nf-core/pairtools/sort/main.nf~ create mode 100644 modules/nf-core/pairtools/sort/meta.yml create mode 100644 modules/nf-core/samtools/index/main.nf create mode 100644 modules/nf-core/samtools/index/meta.yml create mode 100644 subworkflows/local/pairtools.nf diff --git a/conf/modules.config b/conf/modules.config index 096a860..61104f1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -174,6 +174,39 @@ process { ] } + //******************************************* + // PAIRTOOLS + + withName: 'BWA_MEM' { + ext.args = '-5SP -T0' + } + + withName: 'PAIRTOOLS_PARSE' { + ext.args = { "--min-mapq 40 --walks-policy 5unique --max-inter-align-gap 30 --output-stats ${meta.id}_parse.stats --add-columns mapq --drop-sam --drop-seq" } + + } + + withName: 'PAIRTOOLS_DEDUP' { + ext.args = { "--mark-dups"} // --output-stats ${meta.id}_dedup.stats" } + } + + withName: 'PAIRTOOLS_SPLIT' { + ext.args = params.save_interaction_bam ? "--output-sam ${prefix}.bam" : '' + } + + withName: 'PAIRTOOLS_SELECT' { + ext.args = { [ + "(mapq1>${params.min_mapq} and mapq2>${params.min_mapq})", + params.min_cis_dist > 0 ? " and (abs(pos1-pos2) < ${params.min_cis_dist})" : '', + params.keep_multi ? " and ((pair_type=='UU') or (pair_type=='UR') or (pair_type=='RU') or (pair_type=='MM') or (pair_type=='MU'))" : + " and ((pair_type=='UU') or (pair_type=='UR') or (pair_type=='RU'))", + //params.min_insert_size > 0 ? " -s ${params.min_insert_size}" : '', + //params.max_insert_size > 0 ? " -l ${params.max_insert_size}" : '', + //params.min_restriction_fragment_size > 0 ? " -t ${params.min_restriction_fragment_size}" : '', + //params.max_restriction_fragment_size > 0 ? " -m ${params.max_restriction_fragment_size}" : '', + ].join(' ').trim() } + } + //***************************************** // QUALITY METRICS diff --git a/conf/test.config b/conf/test.config index 1501b02..9fb02d7 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,6 +24,8 @@ params { // Annotations fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa' + //fasta = '/data/annotations/pipelines/Human/hg38/genome/hg38.fa' + //bwa_index = '/data/annotations/pipelines/Human/hg38/indexes/bwamem2/' digestion = 'hindiii' min_mapq = 10 min_restriction_fragment_size = 100 diff --git a/environment.yml b/environment.yml index b8abcdf..ba1b598 100644 --- a/environment.yml +++ b/environment.yml @@ -1,5 +1,5 @@ # You can use this file to create a conda environment for this pipeline: -# conda env create -f environment.yml +# conda env create2-f environment.yml name: nf-core-hic-2.0.0 channels: - conda-forge @@ -16,7 +16,9 @@ dependencies: - bioconda::pysam=0.19.0=py39h5030a8b_0 - conda-forge::pymdown-extensions=7.1=pyh9f0ad1d_0 - bioconda::cooler=0.8.11=pyh5e36f6f_1 - - bioconda::cooltools=0.5.1=py39h5371cbf_1 + - bioconda::pairtools=1.0.2 + - bioconda::cooltools=0.5.1 + - bioconda::bwa-mem2=2.2.1 - bioconda::bowtie2=2.4.5=py39hd2f7db1_2 - bioconda::samtools=1.15.1=h1170115_0 - bioconda::multiqc=1.12=pyhdfd78af_0 diff --git a/main.nf b/main.nf index 82aaf0f..d87bb41 100644 --- a/main.nf +++ b/main.nf @@ -19,6 +19,7 @@ nextflow.enable.dsl = 2 params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') params.bwt2_index = WorkflowMain.getGenomeAttribute(params, 'bowtie2') +params.bwa_index = WorkflowMain.getGenomeAttribute(params, 'bwamem2') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/modules.json b/modules.json index 6d7a030..0de1673 100644 --- a/modules.json +++ b/modules.json @@ -8,52 +8,149 @@ "bowtie2/align": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bowtie2/build": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "bwa/index": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] + }, + "bwa/mem": { + "branch": "master", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "installed_by": [ + "modules" + ] + }, + "bwamem2/index": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] + }, + "bwamem2/mem": { + "branch": "master", + "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "installed_by": [ + "modules" + ] }, "cooler/balance": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/cload": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/dump": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/makebins": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/zoomify": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/getchromsizes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "pairix": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] + }, + "pairtools/dedup": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] + }, + "pairtools/parse": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] + }, + "pairtools/restrict": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] + }, + "pairtools/select": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] + }, + "pairtools/sort": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] + }, + "samtools/index": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": [ + "modules" + ] } } }, @@ -62,4 +159,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/local/cooltools/insulation.nf b/modules/local/cooltools/insulation.nf index af53529..77a9472 100644 --- a/modules/local/cooltools/insulation.nf +++ b/modules/local/cooltools/insulation.nf @@ -26,7 +26,7 @@ process COOLTOOLS_INSULATION { cat <<-END_VERSIONS > versions.yml "${task.process}": - cooltools: \$(cooltools --version 2>&1 | sed 's/cooltools, version //') + cooltools: \$(cooltools --version 2>&1 | grep version | sed 's/cooltools, version //') END_VERSIONS """ } diff --git a/modules/local/pairtools/pairtools_merge.nf b/modules/local/pairtools/pairtools_merge.nf new file mode 100644 index 0000000..eefe134 --- /dev/null +++ b/modules/local/pairtools/pairtools_merge.nf @@ -0,0 +1,40 @@ +/* + * Pairtools - merge + * Merge multiple sorted pairs files + */ + +process PAIRTOOLS_MERGE { + tag "${meta.id}" + label 'process_medium' + + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(allpairs) + + output: + tuple val(meta), path("*pairs.gz"), emit:pairs + path("versions.yml"), emit:versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_merged" + """ + pairtools merge \ + ${args} \ + --nproc ${task.cpus} \ + -o ${prefix}.pairs.gz \ + ${allpairs} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools, version //') + END_VERSIONS + """ +} diff --git a/modules/local/pairtools/pairtools_split.nf b/modules/local/pairtools/pairtools_split.nf new file mode 100644 index 0000000..672330c --- /dev/null +++ b/modules/local/pairtools/pairtools_split.nf @@ -0,0 +1,41 @@ +/* + * Pairtools - Split + * Split a .pairsam file into .pairs and .sam + */ + +process PAIRTOOLS_SPLIT { + tag "${meta.id}" + label 'process_medium' + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(pairs) + + output: + tuple val(meta), path("*.split.pairs.gz"), emit:pairs + tuple val(meta), path("*.bam"), optional:true, emit:bam + tuple val(meta), path("*.txt"), optional: true, emit:stats + path("versions.yml"), emit:versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pairtools split \ + --nproc-in ${task.cpus} --nproc-out ${task.cpus} \ + --output-pairs ${prefix}.split.pairs.gz \ + ${args} \ + ${pairs} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools, version //') + END_VERSIONS + """ +} diff --git a/modules/local/pairtools/pairtools_stats.nf b/modules/local/pairtools/pairtools_stats.nf new file mode 100644 index 0000000..4bd65a3 --- /dev/null +++ b/modules/local/pairtools/pairtools_stats.nf @@ -0,0 +1,39 @@ +/* + * Pairtools - Stats + * Statistics on pairs file + */ + +process PAIRTOOLS_STATS { + tag "${meta.id}" + label 'process_low' + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(pairs) + + output: + tuple val(meta), path("*txt"), emit:stats + path("versions.yml"), emit:versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}_stats" + """ + pairtools stats \ + ${args} \ + --nproc-in ${task.cpus} --nproc-out ${task.cpus} \ + -o ${prefix}.txt \ + ${pairs} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools, version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf new file mode 100644 index 0000000..8d2e56d --- /dev/null +++ b/modules/nf-core/bwa/index/main.nf @@ -0,0 +1,51 @@ +process BWA_INDEX { + tag "$fasta" + label 'process_single' + + conda "bioconda::bwa=0.7.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : + 'biocontainers/bwa:0.7.17--hed695b0_7' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path(bwa) , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bwa + bwa \\ + index \\ + $args \\ + -p bwa/${fasta.baseName} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir bwa + + touch bwa/genome.amb + touch bwa/genome.ann + touch bwa/genome.bwt + touch bwa/genome.pac + touch bwa/genome.sa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml new file mode 100644 index 0000000..2c6cfcd --- /dev/null +++ b/modules/nf-core/bwa/index/meta.yml @@ -0,0 +1,42 @@ +name: bwa_index +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/bwa/mem/main.nf b/modules/nf-core/bwa/mem/main.nf new file mode 100644 index 0000000..d2f85da --- /dev/null +++ b/modules/nf-core/bwa/mem/main.nf @@ -0,0 +1,43 @@ +process BWA_MEM { + tag "$meta.id" + label 'process_high' + + conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml new file mode 100644 index 0000000..62357bf --- /dev/null +++ b/modules/nf-core/bwa/mem/meta.yml @@ -0,0 +1,55 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@jeremy1805" diff --git a/modules/nf-core/cooler/makebins/main.nf b/modules/nf-core/cooler/makebins/main.nf index 25d6a40..8c555d1 100644 --- a/modules/nf-core/cooler/makebins/main.nf +++ b/modules/nf-core/cooler/makebins/main.nf @@ -1,5 +1,5 @@ process COOLER_MAKEBINS { - tag "${meta.id}}" + tag "${meta.id}" label 'process_low' conda "bioconda::cooler=0.8.11" diff --git a/modules/nf-core/pairix/main.nf b/modules/nf-core/pairix/main.nf new file mode 100644 index 0000000..66bf652 --- /dev/null +++ b/modules/nf-core/pairix/main.nf @@ -0,0 +1,32 @@ +process PAIRIX { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::pairix=0.3.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairix:0.3.7--py36h30a8e3e_3' : + 'biocontainers/pairix:0.3.7--py36h30a8e3e_3' }" + + input: + tuple val(meta), path(pair) + + output: + tuple val(meta), path(pair), path("*.px2"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + pairix \\ + $args \\ + $pair + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairix: \$(echo \$(pairix --help 2>&1) | sed 's/^.*Version: //; s/Usage.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pairix/meta.yml b/modules/nf-core/pairix/meta.yml new file mode 100644 index 0000000..e1318ef --- /dev/null +++ b/modules/nf-core/pairix/meta.yml @@ -0,0 +1,42 @@ +name: pairix +description: | + a tool for indexing and querying on a block-compressed text file + containing pairs of genomic coordinates +keywords: + - index +tools: + - pairix: + description: 2D indexing on bgzipped text files of paired genomic coordinates + homepage: "https://github.com/4dn-dcic/pairix" + documentation: "https://github.com/4dn-dcic/pairix" + tool_dev_url: "https://github.com/4dn-dcic/pairix" + + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pair: + type: file + description: pair file + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - index: + type: file + description: pair index file + pattern: "*.px2" + +authors: + - "@jianhong" diff --git a/modules/nf-core/pairtools/dedup/main.nf b/modules/nf-core/pairtools/dedup/main.nf new file mode 100644 index 0000000..ef7f01f --- /dev/null +++ b/modules/nf-core/pairtools/dedup/main.nf @@ -0,0 +1,39 @@ +process PAIRTOOLS_DEDUP { + tag "$meta.id" + label 'process_high' + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.pairs.gz") , emit: pairs + tuple val(meta), path("*.pairs.stat"), emit: stat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pairtools dedup \\ + $args \\ + --n-proc ${task.cpus} \\ + -o ${prefix}.pairs.gz \\ + --output-stats ${prefix}.pairs.stat \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pairtools/dedup/main.nf~ b/modules/nf-core/pairtools/dedup/main.nf~ new file mode 100644 index 0000000..44a4ef7 --- /dev/null +++ b/modules/nf-core/pairtools/dedup/main.nf~ @@ -0,0 +1,39 @@ +process PAIRTOOLS_DEDUP { + tag "$meta.id" + label 'process_high' + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.pairs.gz") , emit: pairs + tuple val(meta), path("*.pairs.stat"), emit: stat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pairtools dedup \\ + $args \\ + --n-proc ${task.cpus \\ + -o ${prefix}.pairs.gz \\ + --output-stats ${prefix}.pairs.stat \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pairtools/dedup/meta.yml b/modules/nf-core/pairtools/dedup/meta.yml new file mode 100644 index 0000000..14c1980 --- /dev/null +++ b/modules/nf-core/pairtools/dedup/meta.yml @@ -0,0 +1,44 @@ +name: pairtools_dedup +description: Find and remove PCR/optical duplicates +keywords: + - dedup +tools: + - pairtools: + description: CLI tools to process mapped Hi-C data + homepage: http://pairtools.readthedocs.io/ + documentation: http://pairtools.readthedocs.io/ + tool_dev_url: https://github.com/mirnylab/pairtools + + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: pair file + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - pairs: + type: file + description: Duplicates removed pairs + pattern: "*.{pairs.gz}" + - stat: + type: file + description: stats of the pairs + pattern: "*.{pairs.stat}" + +authors: + - "@jianhong" diff --git a/modules/nf-core/pairtools/parse/main.nf b/modules/nf-core/pairtools/parse/main.nf new file mode 100644 index 0000000..a21f37a --- /dev/null +++ b/modules/nf-core/pairtools/parse/main.nf @@ -0,0 +1,43 @@ +process PAIRTOOLS_PARSE { + tag "$meta.id" + label 'process_low' + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(bam) + tuple val(meta2), path(chromsizes) + + output: + tuple val(meta), path("*.pairsam.gz") , emit: pairsam + tuple val(meta), path("*.pairsam.stat"), emit: stat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def assembly = meta2.id ? "--assembly ${meta2.id}" : "" + """ + pairtools parse \\ + -c $chromsizes \\ + --nproc-in ${task.cpus} --nproc-out ${task.cpus} \\ + $args \\ + $assembly \\ + --output-stats ${prefix}.pairsam.stat \\ + -o ${prefix}.pairsam.gz \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pairtools/parse/main.nf~ b/modules/nf-core/pairtools/parse/main.nf~ new file mode 100644 index 0000000..354e4b3 --- /dev/null +++ b/modules/nf-core/pairtools/parse/main.nf~ @@ -0,0 +1,41 @@ +process PAIRTOOLS_PARSE { + tag "$meta.id" + label 'process_low' + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(bam) + path chromsizes + + output: + tuple val(meta), path("*.pairsam.gz") , emit: pairsam + tuple val(meta), path("*.pairsam.stat"), emit: stat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pairtools \\ + parse \\ + -c $chromsizes \\ + $args \\ + --output-stats ${prefix}.pairsam.stat \\ + -o ${prefix}.pairsam.gz \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pairtools/parse/meta.yml b/modules/nf-core/pairtools/parse/meta.yml new file mode 100644 index 0000000..5c0ce4a --- /dev/null +++ b/modules/nf-core/pairtools/parse/meta.yml @@ -0,0 +1,48 @@ +name: pairtools_parse +description: Find ligation junctions in .sam, make .pairs +keywords: + - parse +tools: + - pairtools: + description: CLI tools to process mapped Hi-C data + homepage: http://pairtools.readthedocs.io/ + documentation: http://pairtools.readthedocs.io/ + tool_dev_url: https://github.com/mirnylab/pairtools + + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - chromsizes: + type: file + description: chromosome size file + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - pairsam: + type: file + description: parsed pair file + pattern: "*.{pairsam.gz}" + - stat: + type: file + description: stats of the pairs + pattern: "*.{pairsam.stat}" + +authors: + - "@jianhong" diff --git a/modules/nf-core/pairtools/restrict/main.nf b/modules/nf-core/pairtools/restrict/main.nf new file mode 100644 index 0000000..3adc2f7 --- /dev/null +++ b/modules/nf-core/pairtools/restrict/main.nf @@ -0,0 +1,39 @@ +process PAIRTOOLS_RESTRICT { + tag "$meta.id" + label 'process_high' + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(pairs) + path frag + + output: + tuple val(meta), path("*.pairs.gz"), emit: restrict + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pairtools \\ + restrict \\ + -f $frag \\ + $args \\ + -o ${prefix}.pairs.gz \\ + $pairs + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pairtools/restrict/meta.yml b/modules/nf-core/pairtools/restrict/meta.yml new file mode 100644 index 0000000..fc70b0c --- /dev/null +++ b/modules/nf-core/pairtools/restrict/meta.yml @@ -0,0 +1,46 @@ +name: pairtools_restrict +description: Assign restriction fragments to pairs +keywords: + - sort +tools: + - pairtools: + description: CLI tools to process mapped Hi-C data + homepage: http://pairtools.readthedocs.io/ + documentation: http://pairtools.readthedocs.io/ + tool_dev_url: https://github.com/mirnylab/pairtools + + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pairs: + type: file + description: pairs file + - frag: + type: file + description: | + a tab-separated BED file with the positions of restriction fragments + (chrom, start, end). + Can be generated using cooler digest. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - restrict: + type: file + description: Filtered pairs file + pattern: "*.{pairs.gz}" + +authors: + - "@jianhong" diff --git a/modules/nf-core/pairtools/select/main.nf b/modules/nf-core/pairtools/select/main.nf new file mode 100644 index 0000000..fb4a1a3 --- /dev/null +++ b/modules/nf-core/pairtools/select/main.nf @@ -0,0 +1,38 @@ +process PAIRTOOLS_SELECT { + tag "$meta.id" + label 'process_medium' + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.selected.pairs.gz") , emit: selected + tuple val(meta), path("*.unselected.pairs.gz"), emit: unselected + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pairtools select \\ + "$args" \\ + -o ${prefix}.selected.pairs.gz \\ + --output-rest ${prefix}.unselected.pairs.gz \\ + ${input} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pairtools/select/meta.yml b/modules/nf-core/pairtools/select/meta.yml new file mode 100644 index 0000000..d2008c0 --- /dev/null +++ b/modules/nf-core/pairtools/select/meta.yml @@ -0,0 +1,44 @@ +name: pairtools_select +description: Select pairs according to given condition by options.args +keywords: + - select +tools: + - pairtools: + description: CLI tools to process mapped Hi-C data + homepage: http://pairtools.readthedocs.io/ + documentation: http://pairtools.readthedocs.io/ + tool_dev_url: https://github.com/mirnylab/pairtools + + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: pairs file + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - selected: + type: file + description: Selected pairs file + pattern: "*.{selected.pairs.gz}" + - unselected: + type: file + description: Rest pairs file. + pattern: "*.{unselected.pairs.gz}" + +authors: + - "@jianhong" diff --git a/modules/nf-core/pairtools/sort/main.nf b/modules/nf-core/pairtools/sort/main.nf new file mode 100644 index 0000000..23fedac --- /dev/null +++ b/modules/nf-core/pairtools/sort/main.nf @@ -0,0 +1,39 @@ +process PAIRTOOLS_SORT { + tag "$meta.id" + label 'process_high' + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.pairs.gz"), emit: sorted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mem = task.memory.toString().replaceAll(/(\s|\.|B)+/, '') + """ + pairtools sort \\ + $args \\ + --nproc $task.cpus \\ + --memory "$mem" \\ + -o ${prefix}.pairs.gz \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pairtools/sort/main.nf~ b/modules/nf-core/pairtools/sort/main.nf~ new file mode 100644 index 0000000..68c48bc --- /dev/null +++ b/modules/nf-core/pairtools/sort/main.nf~ @@ -0,0 +1,40 @@ +process PAIRTOOLS_SORT { + tag "$meta.id" + label 'process_high' + + // Pinning numpy to 1.23 until https://github.com/open2c/pairtools/issues/170 is resolved + // Not an issue with the biocontainers because they were built prior to numpy 1.24 + conda "bioconda::pairtools=1.0.2 conda-forge::numpy=1.23" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pairtools:1.0.2--py39h2a9f597_0' : + 'biocontainers/pairtools:1.0.2--py39h2a9f597_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.pairs.gz"), emit: sorted + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mem = task.memory.toString().replaceAll(/(\s|\.|B)+/, '') + """ + pairtools \\ + sort \\ + $args \\ + --nproc $task.cpus \\ + --memory "$mem" \\ + -o ${prefix}.pairs.gz \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pairtools: \$(pairtools --version 2>&1 | sed 's/pairtools.*version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pairtools/sort/meta.yml b/modules/nf-core/pairtools/sort/meta.yml new file mode 100644 index 0000000..0e068de --- /dev/null +++ b/modules/nf-core/pairtools/sort/meta.yml @@ -0,0 +1,40 @@ +name: pairtools_sort +description: Sort a .pairs/.pairsam file +keywords: + - sort +tools: + - pairtools: + description: CLI tools to process mapped Hi-C data + homepage: http://pairtools.readthedocs.io/ + documentation: http://pairtools.readthedocs.io/ + tool_dev_url: https://github.com/mirnylab/pairtools + + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: A pairs file + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - sorted: + type: file + description: Sorted pairs file + pattern: "*.{pairs.gz}" + +authors: + - "@jianhong" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 0000000..0b20aa4 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000..8bd2fa6 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/subworkflows/local/pairtools.nf b/subworkflows/local/pairtools.nf new file mode 100644 index 0000000..7470e41 --- /dev/null +++ b/subworkflows/local/pairtools.nf @@ -0,0 +1,88 @@ +/* + * PAIRTOOLS + * MAIN WORKFLOW + * From the raw sequencing reads to the list of valid interactions + */ + +//include { BWAMEM2_MEM } from '../../modules/nf-core/bwamem2/mem/main' +include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main' +include { PAIRTOOLS_DEDUP } from '../../modules/nf-core/pairtools/dedup/main' +include { PAIRTOOLS_PARSE } from '../../modules/nf-core/pairtools/parse/main' +include { PAIRTOOLS_RESTRICT } from '../../modules/nf-core/pairtools/restrict/main' +include { PAIRTOOLS_SELECT } from '../../modules/nf-core/pairtools/select/main' +include { PAIRTOOLS_SORT } from '../../modules/nf-core/pairtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { PAIRIX } from '../../modules/nf-core/pairix/main' + +include { PAIRTOOLS_MERGE } from '../../modules/local/pairtools/pairtools_merge' +include { PAIRTOOLS_SPLIT } from '../../modules/local/pairtools/pairtools_split' +include { PAIRTOOLS_STATS } from '../../modules/local/pairtools/pairtools_stats' + + +// Remove meta.chunks +def removeChunks(row){ + meta = row[0].clone() + meta.remove('chunk') + return [meta, row[1]] +} + +workflow PAIRTOOLS { + + take: + reads // [meta, read1, read2] + index // [meta2, path] + chrsize // path + + main: + ch_versions = Channel.empty() + + BWA_MEM( + reads, + index, + Channel.value("view") + ) + + PAIRTOOLS_PARSE( + BWA_MEM.out.bam, + chrsize + ) + + PAIRTOOLS_SORT( + PAIRTOOLS_PARSE.out.pairsam + ) + + ch_valid_pairs = PAIRTOOLS_SORT.out.sorted.map{ it -> removeChunks(it)}.groupTuple() + PAIRTOOLS_MERGE( + ch_valid_pairs + ) + + PAIRTOOLS_DEDUP( + PAIRTOOLS_MERGE.out.pairs + ) + + PAIRTOOLS_SPLIT( + PAIRTOOLS_DEDUP.out.pairs + ) + + SAMTOOLS_INDEX( + PAIRTOOLS_SPLIT.out.bam + ) + + PAIRTOOLS_SELECT( + PAIRTOOLS_SPLIT.out.pairs + ) + + PAIRTOOLS_STATS( + PAIRTOOLS_SPLIT.out.pairs + ) + + PAIRIX( + PAIRTOOLS_SELECT.out.selected + ) + + emit: + versions = ch_versions + pairs = PAIRIX.out.index + bam = PAIRTOOLS_SPLIT.out.bam.join(SAMTOOLS_INDEX.out.bai) + stats = PAIRTOOLS_STATS.out.stats.map{it->it[1]} +} diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index a4a2399..6a55e15 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -3,6 +3,7 @@ */ include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' +include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main' include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' include { GET_RESTRICTION_FRAGMENTS } from '../../modules/local/hicpro/get_restriction_fragments' @@ -16,18 +17,37 @@ workflow PREPARE_GENOME { ch_versions = Channel.empty() //*************************************** - // Bowtie Index - if(!params.bwt2_index){ - BOWTIE2_BUILD ( - fasta - ) - ch_index = BOWTIE2_BUILD.out.index - ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) - }else{ - Channel.fromPath( params.bwt2_index , checkIfExists: true) - .map { it -> [[:], it]} - .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } - .set { ch_index } + // Bowtie index + if (params.processing == "hicpro"){ + if(!params.bwt2_index){ + BOWTIE2_BUILD ( + fasta + ) + ch_index = BOWTIE2_BUILD.out.index + ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) + }else{ + Channel.fromPath( params.bwt2_index , checkIfExists: true) + .map { it -> [[:], it]} + .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } + .set { ch_index } + } + } + + //*************************************** + // Bwa-mem index + if (params.processing == "pairtools"){ + if(!params.bwa_index){ + BWA_INDEX ( + fasta + ) + ch_index = BWA_INDEX.out.index + ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + }else{ + Channel.fromPath( params.bwa_index , checkIfExists: true) + .map { it -> [[:], it]} + .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwa_index}" } + .set { ch_index } + } } //*************************************** diff --git a/workflows/hic.nf b/workflows/hic.nf index 2ffa5b4..e996a0f 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -108,6 +108,7 @@ include { MULTIQC } from '../modules/local/multiqc' include { INPUT_CHECK } from '../subworkflows/local/input_check' include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' include { HICPRO } from '../subworkflows/local/hicpro' +include { PAIRTOOLS } from '../subworkflows/local/pairtools' include { COOLER } from '../subworkflows/local/cooler' include { COMPARTMENTS } from '../subworkflows/local/compartments' include { TADS } from '../subworkflows/local/tads' @@ -130,9 +131,10 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +def genomeName = params.genome ?: params.fasta.substring(params.fasta.lastIndexOf(File.separator)+1) Channel.fromPath( params.fasta ) .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" } - .map{it->[[:],it]} + .map{it->[[id:genomeName],it]} .set { ch_fasta } /* @@ -175,22 +177,34 @@ workflow HIC { // // SUB-WORFLOW: HiC-Pro // - INPUT_CHECK.out.reads.view() - HICPRO ( - INPUT_CHECK.out.reads, - PREPARE_GENOME.out.index, - PREPARE_GENOME.out.res_frag, - PREPARE_GENOME.out.chromosome_size, - ch_ligation_site, - ch_map_res - ) - ch_versions = ch_versions.mix(HICPRO.out.versions) + if (params.processing == 'hicpro'){ + HICPRO ( + INPUT_CHECK.out.reads, + PREPARE_GENOME.out.index, + PREPARE_GENOME.out.res_frag, + PREPARE_GENOME.out.chromosome_size, + ch_ligation_site, + ch_map_res + ) + ch_versions = ch_versions.mix(HICPRO.out.versions) + ch_pairs = HICPRO.out.pairs + ch_process_mqc = HICPRO.out.mqc + }else if (params.processing == 'pairtools'){ + PAIRTOOLS( + INPUT_CHECK.out.reads, + PREPARE_GENOME.out.index, + PREPARE_GENOME.out.chromosome_size + ) + ch_versions = ch_versions.mix(PAIRTOOLS.out.versions) + ch_pairs = PAIRTOOLS.out.pairs + ch_process_mqc = PAIRTOOLS.out.stats + } // // SUB-WORKFLOW: COOLER // COOLER ( - HICPRO.out.pairs, + ch_pairs, PREPARE_GENOME.out.chromosome_size, ch_map_res ) @@ -239,7 +253,7 @@ workflow HIC { .filter{ it[0].resolution == it[2] } .map { it -> [it[0], it[1]]} .set{ ch_cool_tads } - + TADS( ch_cool_tads ) @@ -264,14 +278,14 @@ workflow HIC { ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.map{it->it[1]}) - ch_multiqc_files = ch_multiqc_files.mix(HICPRO.out.mqc) + ch_multiqc_files = ch_multiqc_files.mix(ch_process_mqc) MULTIQC ( ch_multiqc_config, ch_multiqc_custom_config.collect().ifEmpty([]), ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), FASTQC.out.zip.map{it->it[1]}, - HICPRO.out.mqc.collect() + ch_process_mqc.collect() ) multiqc_report = MULTIQC.out.report.toList() }