nf-core · JoseEspinosa · Nov 15, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [[#179](https://github.com/nf-core/proteinfold/issues/179)]- Produce an interactive html report for the predicted structures.
 - [[#180](https://github.com/nf-core/proteinfold/issues/180)]- Implement Fooldseek.
 - [[#188](https://github.com/nf-core/proteinfold/issues/188)]- Fix colabfold image to run in gpus.
+- [[PR ##205](https://github.com/nf-core/proteinfold/pull/205)] - Change input schema from `sequence,fasta` to `id,fasta`.
+- [[PR #210](https://github.com/nf-core/proteinfold/pull/210)]- Moving post-processing logic to a subworkflow, change wave images pointing to oras to point to https and refactor module to match nf-core folder structure.
 
 ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30
 
@@ -72,7 +74,6 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements
 - [[PR ##163](https://github.com/nf-core/proteinfold/pull/163)] - Fix full test CI.
 - [[#150]](https://github.com/nf-core/proteinfold/issues/150)] - Add thanks to the AWS Open Data Sponsorship program in `README.md`.
 - [[PR ##166](https://github.com/nf-core/proteinfold/pull/166)] - Create 2 different parameters for Colabfold and ESMfold number of recycles.
-- [[PR ##205](https://github.com/nf-core/proteinfold/pull/205)] - Change input schema from `sequence,fasta` to `id,fasta`.
 
 ### Parameters
 

diff --git a/assets/proteinfold_template.html → assets/report_template.html b/assets/proteinfold_template.html → assets/report_template.html
diff --git a/main.nf b/main.nf
@@ -52,21 +52,26 @@ params.colabfold_alphafold2_params_path = getColabfoldAlphafold2ParamsPath()
 //
 // WORKFLOW: Run main analysis pipeline
 //
+
+ch_dummy_file = Channel.fromPath("$projectDir/assets/NO_FILE")
+
 workflow NFCORE_PROTEINFOLD {
 
     take:
     samplesheet // channel: samplesheet read in from --input
 
     main:
     ch_samplesheet              = samplesheet
+    ch_alphafold_top_ranked_pdb = Channel.empty()
+    ch_colabfold_top_ranked_pdb = Channel.empty()
+    ch_esmfold_top_ranked_pdb   = Channel.empty()
     ch_multiqc                  = Channel.empty()
     ch_versions                 = Channel.empty()
     ch_report_input             = Channel.empty()
     ch_foldseek_db              = Channel.empty()
-    ch_colabfold_out            = Channel.empty()
-    ch_esmfold_out              = Channel.empty()
-    ch_alphafold2_out           = Channel.empty()
     requested_modes             = params.mode.toLowerCase().split(",")
+    requested_modes_size        = requested_modes.size()
+
     //
     // WORKFLOW: Run alphafold2
     //
@@ -122,16 +127,10 @@ workflow NFCORE_PROTEINFOLD {
             PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres,
             PREPARE_ALPHAFOLD2_DBS.out.uniprot
         )
-        ch_multiqc  = ch_multiqc.mix(ALPHAFOLD2.out.multiqc_report.collect())
-        ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions)
-        ch_report_input = ch_report_input.mix(
-            ALPHAFOLD2.out.pdb.join(ALPHAFOLD2.out.msa).map{it[0]["model"] = "alphafold2"; it}
-        )
-        ALPHAFOLD2
-            .out
-            .main_pdb
-            .map{[it[0]["id"], it[0], it[1]]}
-            .set{ch_alphafold2_out}
+        ch_alphafold_top_ranked_pdb = ALPHAFOLD2.out.top_ranked_pdb
+        ch_multiqc                  = ch_multiqc.mix(ALPHAFOLD2.out.multiqc_report.collect())
+        ch_versions                 = ch_versions.mix(ALPHAFOLD2.out.versions)
+        ch_report_input             = ch_report_input.mix(ALPHAFOLD2.out.pdb_msa)
     }
 
     //
@@ -166,23 +165,11 @@ workflow NFCORE_PROTEINFOLD {
             PREPARE_COLABFOLD_DBS.out.uniref30,
             params.num_recycles_colabfold
         )
-        ch_multiqc  = ch_multiqc.mix(COLABFOLD.out.multiqc_report)
-        ch_versions = ch_versions.mix(COLABFOLD.out.versions)
-        ch_report_input = ch_report_input.mix(
-            COLABFOLD
-                .out
-                .pdb
-                .join(COLABFOLD.out.msa)
-                .map { it[0]["model"] = "colabfold"; it }
-        )
-        COLABFOLD
-                .out
-                .main_pdb
-                .map{[it[0]["id"], it[0], it[1]]}
-                .join(COLABFOLD.out.msa
-                        .map{[it[0]["id"], it[1]]},
-                    remainder:true
-                ).set{ch_colabfold_out}
+
+        ch_colabfold_top_ranked_pdb = COLABFOLD.out.top_ranked_pdb
+        ch_multiqc                  = ch_multiqc.mix(COLABFOLD.out.multiqc_report)
+        ch_versions                 = ch_versions.mix(COLABFOLD.out.versions)
+        ch_report_input             = ch_report_input.mix(COLABFOLD.out.pdb_msa)
     }
 
     //
@@ -208,49 +195,59 @@ workflow NFCORE_PROTEINFOLD {
             ch_samplesheet,
             ch_versions,
             PREPARE_ESMFOLD_DBS.out.params,
-            params.num_recycles_esmfold
+            params.num_recycles_esmfold,
+            ch_dummy_file
         )
-        ch_multiqc  = ch_multiqc.mix(ESMFOLD.out.multiqc_report.collect())
-        ch_versions = ch_versions.mix(ESMFOLD.out.versions)
-        ch_report_input = ch_report_input.mix(
-            ESMFOLD.out.pdb.combine(Channel.fromPath("$projectDir/assets/NO_FILE")).map{it[0]["model"] = "esmfold"; it}
-        )
-        ch_report_input.filter{it[0]["model"] == "esmfold"}
-            .map{[it[0]["id"], it[0], it[1], it[2]]}
-            .set{ch_esmfold_out}
+
+        ch_esmfold_top_ranked_pdb = ESMFOLD.out.top_ranked_pdb
+        ch_multiqc                = ch_multiqc.mix(ESMFOLD.out.multiqc_report.collect())
+        ch_versions               = ch_versions.mix(ESMFOLD.out.versions)
+        ch_report_input           = ch_report_input.mix(ESMFOLD.out.pdb_msa)
     }
+
     //
-    // POST PROCESSING: generate visulaisation reports
+    // POST PROCESSING: generate visualisation reports
     //
+    // TODO: we need to validate the rest of foldseek parameters if foldseek is set to run
+    // TODO: maybe create a parameter that is run_foldseek or skip_foldsee instead as there are no more mode than can be use now
+
+    // TODO move it to pdb.config? asign as in prepare dbs
     if (params.foldseek_search == "easysearch"){
-        ch_foldseek_db = channel.value([["id": params.foldseek_db],
-                                        file(params.foldseek_db_path,
-                                            checkIfExists: true)])
+        ch_foldseek_db = channel.value([
+            [
+                id: params.foldseek_db,
+            ],
+            file(params.foldseek_db_path, checkIfExists: true)
+        ])
     }
 
-    ch_multiqc_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty()
-    ch_multiqc_logo = params.multiqc_logo   ? Channel.fromPath( params.multiqc_logo )   : Channel.empty()
+    ch_multiqc_config        = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first()
+    ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ).first()  : Channel.empty()
+    ch_multiqc_logo          = params.multiqc_logo   ? Channel.fromPath( params.multiqc_logo ).first()    : Channel.empty()
     ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
-//ch_multiqc.view()
+    ch_report_template     = Channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true))
+    ch_comparison_template = Channel.value(file("$projectDir/assets/comparison_template.html", checkIfExists: true))
+
     POST_PROCESSING(
         params.skip_visualisation,
-        requested_modes.size(),
+        params.mode,
+        requested_modes_size,
         ch_report_input,
-        Channel.fromPath("$projectDir/assets/proteinfold_template.html", checkIfExists: true).first(),
-        Channel.fromPath("$projectDir/assets/comparison_template.html", checkIfExists: true).first(),
+        ch_report_template,
+        ch_comparison_template,
         params.foldseek_search,
         ch_foldseek_db,
         params.skip_multiqc,
         params.outdir,
         ch_versions,
         ch_multiqc,
-        Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first(),
-        ch_multiqc_config.first(),
-        ch_multiqc_logo.first(),
-        ch_multiqc_methods_description.first(),
-        ch_alphafold2_out,
-        ch_esmfold_out,
-        ch_colabfold_out
+        ch_multiqc_config,
+        ch_multiqc_custom_config,
+        ch_multiqc_logo,
+        ch_multiqc_methods_description,
+        ch_alphafold_top_ranked_pdb,
+        ch_colabfold_top_ranked_pdb,
+        ch_esmfold_top_ranked_pdb
     )
 
     emit:

diff --git a/modules/local/colabfold_batch.nf → modules/local/colabfold_batch/main.nf b/modules/local/colabfold_batch.nf → modules/local/colabfold_batch/main.nf
@@ -18,11 +18,11 @@ process COLABFOLD_BATCH {
     val   numRec
 
     output:
-    tuple val(meta), path ("${meta.id}_colabfold.pdb"), emit: main_pdb
-    tuple val(meta), path ("*_relaxed_rank_*.pdb"), emit: pdb
-    tuple val(meta), path ("*_coverage.png")      , emit: msa
-    tuple val(meta), path ("*_mqc.png")           , emit: multiqc
-    path "versions.yml"                           , emit: versions
+    tuple val(meta), path ("${meta.id}_colabfold.pdb"), emit: top_ranked_pdb
+    tuple val(meta), path ("*_relaxed_rank_*.pdb")    , emit: pdb
+    tuple val(meta), path ("*_coverage.png")          , emit: msa
+    tuple val(meta), path ("*_mqc.png")               , emit: multiqc
+    path "versions.yml"                               , emit: versions
 
     when:
     task.ext.when == null || task.ext.when

diff --git a/modules/local/combine_uniprot/environment.yml b/modules/local/combine_uniprot/environment.yml
@@ -0,0 +1,8 @@
+name: combine_uniprot
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::grep=3.11
+  - conda-forge::sed=4.8
+  - conda-forge::tar=1.34
diff --git a/modules/local/combine_uniprot.nf → modules/local/combine_uniprot/main.nf b/modules/local/combine_uniprot.nf → modules/local/combine_uniprot/main.nf
@@ -1,7 +1,7 @@
 process COMBINE_UNIPROT {
     label 'process_single'
 
-    conda "conda-forge::sed=4.7"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
         'nf-core/ubuntu:20.04' }"

diff --git a/modules/local/compare_structures/environment.yml b/modules/local/compare_structures/environment.yml
@@ -0,0 +1,9 @@
+name: compare_structures
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::biopython=1.84
+  - conda-forge::matplotlib=3.9.2
+  - conda-forge::pip=24.2
+  - conda-forge::plotly=5.24.1
diff --git a/modules/local/compare_structures.nf → modules/local/compare_structures/main.nf b/modules/local/compare_structures.nf → modules/local/compare_structures/main.nf
@@ -2,10 +2,10 @@ process COMPARE_STRUCTURES {
     tag   "$meta.id"
     label 'process_single'
 
-    conda "bioconda::multiqc:1.21"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'oras://community.wave.seqera.io/library/pip_biopython_matplotlib_plotly:e865101a15ad0014' :
-        'community.wave.seqera.io/library/pip_biopython_matplotlib_plotly:4d51afeb4bb75495' }"
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/24/241f0746484727a3633f544c3747bfb77932e1c8c252e769640bd163232d9112/data' :
+        'community.wave.seqera.io/library/biopython_matplotlib_pip_plotly:35975fa0fc54b2d3' }"
 
     input:
     tuple val(meta), path(pdb)

diff --git a/modules/local/download_pdbmmcif/environment.yml b/modules/local/download_pdbmmcif/environment.yml
@@ -0,0 +1,7 @@
+name: download_pdbmmcif
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::aria2=1.36.0
+  - conda-forge::rsync=3.3.0
diff --git a/modules/local/download_pdbmmcif.nf → modules/local/download_pdbmmcif/main.nf b/modules/local/download_pdbmmcif.nf → modules/local/download_pdbmmcif/main.nf
@@ -6,10 +6,10 @@ process DOWNLOAD_PDBMMCIF {
     label 'process_low'
     label 'error_retry'
 
-    conda "bioconda::aria2=1.36.0 conda-forge::rsync=3.2.7"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' :
-        'biocontainers/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' }"
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3c/3c2e1079a0721851248bd2aa45f3d4cd32bfdb7395d609132567d772150965cc/data' :
+        'community.wave.seqera.io/library/aria2_rsync:1627a7e9b559cfa0' }"
 
     input:
     val source_url_pdb_mmcif

diff --git a/modules/local/generate_report/environment.yml b/modules/local/generate_report/environment.yml
@@ -0,0 +1,9 @@
+name: generate_report
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::biopython=1.84
+  - conda-forge::matplotlib=3.9.2
+  - conda-forge::pip=24.2
+  - conda-forge::plotly=5.24.1
diff --git a/modules/local/generate_report.nf → modules/local/generate_report/main.nf b/modules/local/generate_report.nf → modules/local/generate_report/main.nf
@@ -2,10 +2,10 @@ process GENERATE_REPORT {
     tag   "$meta.id-$meta.model"
     label 'process_single'
 
-    conda "bioconda::multiqc:1.21"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'oras://community.wave.seqera.io/library/pip_biopython_matplotlib_plotly:e865101a15ad0014' :
-        'community.wave.seqera.io/library/pip_biopython_matplotlib_plotly:4d51afeb4bb75495' }"
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/24/241f0746484727a3633f544c3747bfb77932e1c8c252e769640bd163232d9112/data' :
+        'community.wave.seqera.io/library/biopython_matplotlib_pip_plotly:35975fa0fc54b2d3' }"
 
     input:
     tuple val(meta), path(pdb)
@@ -14,10 +14,10 @@ process GENERATE_REPORT {
     path(template)
 
     output:
-    tuple val(meta), path ("*report.html"), emit: report
+    tuple val(meta), path ("*report.html")     , emit: report
     tuple val(meta), path ("*seq_coverage.png"), optional: true, emit: sequence_coverage
-    tuple val(meta), path ("*_LDDT.html"), emit: plddt
-    path "versions.yml"        , emit: versions
+    tuple val(meta), path ("*_LDDT.html")      , emit: plddt
+    path "versions.yml"                        , emit: versions
 
     when:
     task.ext.when == null || task.ext.when

diff --git a/modules/local/mmseqs_colabfoldsearch.nf → modules/local/mmseqs_colabfoldsearch/main.nf b/modules/local/mmseqs_colabfoldsearch.nf → modules/local/mmseqs_colabfoldsearch/main.nf
diff --git a/modules/local/multifasta_to_csv/environment.yml b/modules/local/multifasta_to_csv/environment.yml
@@ -0,0 +1,7 @@
+name: multifasta_to_csv
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::grep=3.11
+  - conda-forge::sed=4.8
+  - conda-forge::tar=1.34
diff --git a/modules/local/multifasta_to_csv.nf → modules/local/multifasta_to_csv/main.nf b/modules/local/multifasta_to_csv.nf → modules/local/multifasta_to_csv/main.nf
@@ -2,7 +2,7 @@ process MULTIFASTA_TO_CSV {
     tag "$meta.id"
     label 'process_single'
 
-    conda "conda-forge::sed=4.7"
+    conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
         'nf-core/ubuntu:20.04' }"

diff --git a/modules/local/multifasta_to_singlefasta/environment.yml b/modules/local/multifasta_to_singlefasta/environment.yml
@@ -0,0 +1,8 @@
+name: multifasta_to_singlefasta
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::grep=3.11
+  - conda-forge::sed=4.8
+  - conda-forge::tar=1.34
diff --git a/modules/local/multifasta_to_singlefasta.nf → ...s/local/multifasta_to_singlefasta/main.nf b/modules/local/multifasta_to_singlefasta.nf → ...s/local/multifasta_to_singlefasta/main.nf
diff --git a/modules/local/run_alphafold2.nf → modules/local/run_alphafold2/main.nf b/modules/local/run_alphafold2.nf → modules/local/run_alphafold2/main.nf
@@ -29,7 +29,7 @@ process RUN_ALPHAFOLD2 {
 
     output:
     path ("${fasta.baseName}*")
-    tuple val(meta), path ("${meta.id}_alphafold2.pdb"), emit: main_pdb
+    tuple val(meta), path ("${meta.id}_alphafold2.pdb")   , emit: top_ranked_pdb
     tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: pdb
     tuple val(meta), path ("${fasta.baseName}/*_msa.tsv") , emit: msa
     tuple val(meta), path ("*_mqc.tsv")                   , emit: multiqc

diff --git a/modules/local/run_alphafold2_msa.nf → modules/local/run_alphafold2_msa/main.nf b/modules/local/run_alphafold2_msa.nf → modules/local/run_alphafold2_msa/main.nf
@@ -30,7 +30,7 @@ process RUN_ALPHAFOLD2_MSA {
     output:
     path ("${fasta.baseName}*")
     tuple val(meta), path ("${fasta.baseName}.features.pkl"), emit: features
-    path "versions.yml"                    , emit: versions
+    path "versions.yml"                                     , emit: versions
 
     when:
     task.ext.when == null || task.ext.when

diff --git a/modules/local/run_alphafold2_pred.nf → modules/local/run_alphafold2_pred/main.nf b/modules/local/run_alphafold2_pred.nf → modules/local/run_alphafold2_pred/main.nf
@@ -30,11 +30,11 @@ process RUN_ALPHAFOLD2_PRED {
 
     output:
     path ("${fasta.baseName}*")
-    tuple val(meta), path ("${meta.id}_alphafold2.pdb"), emit: main_pdb
+    tuple val(meta), path ("${meta.id}_alphafold2.pdb")   , emit: top_ranked_pdb
     tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: pdb
-    tuple val(meta), path ("*_msa.tsv"), emit: msa
-    tuple val(meta), path ("*_mqc.tsv"), emit: multiqc
-    path "versions.yml", emit: versions
+    tuple val(meta), path ("*_msa.tsv")                   , emit: msa
+    tuple val(meta), path ("*_mqc.tsv")                   , emit: multiqc
+    path "versions.yml"                                   , emit: versions
 
     when:
     task.ext.when == null || task.ext.when

diff --git a/modules/local/run_esmfold.nf → modules/local/run_esmfold/main.nf b/modules/local/run_esmfold.nf → modules/local/run_esmfold/main.nf
@@ -16,7 +16,7 @@ process RUN_ESMFOLD {
     output:
     tuple val(meta), path ("${meta.id}_esmfold.pdb")  , emit: pdb
     tuple val(meta), path ("${meta.id}_plddt_mqc.tsv"), emit: multiqc
-    path "versions.yml", emit: versions
+    path "versions.yml"                               , emit: versions
 
     when:
     task.ext.when == null || task.ext.when