diff --git a/CHANGELOG.md b/CHANGELOG.md index 25051396..bdb35127 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#179](https://github.com/nf-core/proteinfold/issues/179)]- Produce an interactive html report for the predicted structures. - [[#180](https://github.com/nf-core/proteinfold/issues/180)]- Implement Fooldseek. - [[#188](https://github.com/nf-core/proteinfold/issues/188)]- Fix colabfold image to run in gpus. +- [[PR ##205](https://github.com/nf-core/proteinfold/pull/205)] - Change input schema from `sequence,fasta` to `id,fasta`. +- [[PR #210](https://github.com/nf-core/proteinfold/pull/210)]- Moving post-processing logic to a subworkflow, change wave images pointing to oras to point to https and refactor module to match nf-core folder structure. ## [[1.1.1](https://github.com/nf-core/proteinfold/releases/tag/1.1.1)] - 2025-07-30 @@ -72,7 +74,6 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements - [[PR ##163](https://github.com/nf-core/proteinfold/pull/163)] - Fix full test CI. - [[#150]](https://github.com/nf-core/proteinfold/issues/150)] - Add thanks to the AWS Open Data Sponsorship program in `README.md`. - [[PR ##166](https://github.com/nf-core/proteinfold/pull/166)] - Create 2 different parameters for Colabfold and ESMfold number of recycles. -- [[PR ##205](https://github.com/nf-core/proteinfold/pull/205)] - Change input schema from `sequence,fasta` to `id,fasta`. ### Parameters diff --git a/assets/proteinfold_template.html b/assets/report_template.html similarity index 100% rename from assets/proteinfold_template.html rename to assets/report_template.html diff --git a/main.nf b/main.nf index edb73940..d1ec1a6b 100644 --- a/main.nf +++ b/main.nf @@ -52,6 +52,9 @@ params.colabfold_alphafold2_params_path = getColabfoldAlphafold2ParamsPath() // // WORKFLOW: Run main analysis pipeline // + +ch_dummy_file = Channel.fromPath("$projectDir/assets/NO_FILE") + workflow NFCORE_PROTEINFOLD { take: @@ -59,14 +62,16 @@ workflow NFCORE_PROTEINFOLD { main: ch_samplesheet = samplesheet + ch_alphafold_top_ranked_pdb = Channel.empty() + ch_colabfold_top_ranked_pdb = Channel.empty() + ch_esmfold_top_ranked_pdb = Channel.empty() ch_multiqc = Channel.empty() ch_versions = Channel.empty() ch_report_input = Channel.empty() ch_foldseek_db = Channel.empty() - ch_colabfold_out = Channel.empty() - ch_esmfold_out = Channel.empty() - ch_alphafold2_out = Channel.empty() requested_modes = params.mode.toLowerCase().split(",") + requested_modes_size = requested_modes.size() + // // WORKFLOW: Run alphafold2 // @@ -122,16 +127,10 @@ workflow NFCORE_PROTEINFOLD { PREPARE_ALPHAFOLD2_DBS.out.pdb_seqres, PREPARE_ALPHAFOLD2_DBS.out.uniprot ) - ch_multiqc = ch_multiqc.mix(ALPHAFOLD2.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) - ch_report_input = ch_report_input.mix( - ALPHAFOLD2.out.pdb.join(ALPHAFOLD2.out.msa).map{it[0]["model"] = "alphafold2"; it} - ) - ALPHAFOLD2 - .out - .main_pdb - .map{[it[0]["id"], it[0], it[1]]} - .set{ch_alphafold2_out} + ch_alphafold_top_ranked_pdb = ALPHAFOLD2.out.top_ranked_pdb + ch_multiqc = ch_multiqc.mix(ALPHAFOLD2.out.multiqc_report.collect()) + ch_versions = ch_versions.mix(ALPHAFOLD2.out.versions) + ch_report_input = ch_report_input.mix(ALPHAFOLD2.out.pdb_msa) } // @@ -166,23 +165,11 @@ workflow NFCORE_PROTEINFOLD { PREPARE_COLABFOLD_DBS.out.uniref30, params.num_recycles_colabfold ) - ch_multiqc = ch_multiqc.mix(COLABFOLD.out.multiqc_report) - ch_versions = ch_versions.mix(COLABFOLD.out.versions) - ch_report_input = ch_report_input.mix( - COLABFOLD - .out - .pdb - .join(COLABFOLD.out.msa) - .map { it[0]["model"] = "colabfold"; it } - ) - COLABFOLD - .out - .main_pdb - .map{[it[0]["id"], it[0], it[1]]} - .join(COLABFOLD.out.msa - .map{[it[0]["id"], it[1]]}, - remainder:true - ).set{ch_colabfold_out} + + ch_colabfold_top_ranked_pdb = COLABFOLD.out.top_ranked_pdb + ch_multiqc = ch_multiqc.mix(COLABFOLD.out.multiqc_report) + ch_versions = ch_versions.mix(COLABFOLD.out.versions) + ch_report_input = ch_report_input.mix(COLABFOLD.out.pdb_msa) } // @@ -208,49 +195,59 @@ workflow NFCORE_PROTEINFOLD { ch_samplesheet, ch_versions, PREPARE_ESMFOLD_DBS.out.params, - params.num_recycles_esmfold + params.num_recycles_esmfold, + ch_dummy_file ) - ch_multiqc = ch_multiqc.mix(ESMFOLD.out.multiqc_report.collect()) - ch_versions = ch_versions.mix(ESMFOLD.out.versions) - ch_report_input = ch_report_input.mix( - ESMFOLD.out.pdb.combine(Channel.fromPath("$projectDir/assets/NO_FILE")).map{it[0]["model"] = "esmfold"; it} - ) - ch_report_input.filter{it[0]["model"] == "esmfold"} - .map{[it[0]["id"], it[0], it[1], it[2]]} - .set{ch_esmfold_out} + + ch_esmfold_top_ranked_pdb = ESMFOLD.out.top_ranked_pdb + ch_multiqc = ch_multiqc.mix(ESMFOLD.out.multiqc_report.collect()) + ch_versions = ch_versions.mix(ESMFOLD.out.versions) + ch_report_input = ch_report_input.mix(ESMFOLD.out.pdb_msa) } + // - // POST PROCESSING: generate visulaisation reports + // POST PROCESSING: generate visualisation reports // + // TODO: we need to validate the rest of foldseek parameters if foldseek is set to run + // TODO: maybe create a parameter that is run_foldseek or skip_foldsee instead as there are no more mode than can be use now + + // TODO move it to pdb.config? asign as in prepare dbs if (params.foldseek_search == "easysearch"){ - ch_foldseek_db = channel.value([["id": params.foldseek_db], - file(params.foldseek_db_path, - checkIfExists: true)]) + ch_foldseek_db = channel.value([ + [ + id: params.foldseek_db, + ], + file(params.foldseek_db_path, checkIfExists: true) + ]) } - ch_multiqc_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first() + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ).first() : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ).first() : Channel.empty() ch_multiqc_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) -//ch_multiqc.view() + ch_report_template = Channel.value(file("$projectDir/assets/report_template.html", checkIfExists: true)) + ch_comparison_template = Channel.value(file("$projectDir/assets/comparison_template.html", checkIfExists: true)) + POST_PROCESSING( params.skip_visualisation, - requested_modes.size(), + params.mode, + requested_modes_size, ch_report_input, - Channel.fromPath("$projectDir/assets/proteinfold_template.html", checkIfExists: true).first(), - Channel.fromPath("$projectDir/assets/comparison_template.html", checkIfExists: true).first(), + ch_report_template, + ch_comparison_template, params.foldseek_search, ch_foldseek_db, params.skip_multiqc, params.outdir, ch_versions, ch_multiqc, - Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true).first(), - ch_multiqc_config.first(), - ch_multiqc_logo.first(), - ch_multiqc_methods_description.first(), - ch_alphafold2_out, - ch_esmfold_out, - ch_colabfold_out + ch_multiqc_config, + ch_multiqc_custom_config, + ch_multiqc_logo, + ch_multiqc_methods_description, + ch_alphafold_top_ranked_pdb, + ch_colabfold_top_ranked_pdb, + ch_esmfold_top_ranked_pdb ) emit: diff --git a/modules/local/colabfold_batch.nf b/modules/local/colabfold_batch/main.nf similarity index 85% rename from modules/local/colabfold_batch.nf rename to modules/local/colabfold_batch/main.nf index 8710f9eb..4a54dde1 100644 --- a/modules/local/colabfold_batch.nf +++ b/modules/local/colabfold_batch/main.nf @@ -18,11 +18,11 @@ process COLABFOLD_BATCH { val numRec output: - tuple val(meta), path ("${meta.id}_colabfold.pdb"), emit: main_pdb - tuple val(meta), path ("*_relaxed_rank_*.pdb"), emit: pdb - tuple val(meta), path ("*_coverage.png") , emit: msa - tuple val(meta), path ("*_mqc.png") , emit: multiqc - path "versions.yml" , emit: versions + tuple val(meta), path ("${meta.id}_colabfold.pdb"), emit: top_ranked_pdb + tuple val(meta), path ("*_relaxed_rank_*.pdb") , emit: pdb + tuple val(meta), path ("*_coverage.png") , emit: msa + tuple val(meta), path ("*_mqc.png") , emit: multiqc + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/combine_uniprot/environment.yml b/modules/local/combine_uniprot/environment.yml new file mode 100644 index 00000000..a5702139 --- /dev/null +++ b/modules/local/combine_uniprot/environment.yml @@ -0,0 +1,8 @@ +name: combine_uniprot +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/local/combine_uniprot.nf b/modules/local/combine_uniprot/main.nf similarity index 96% rename from modules/local/combine_uniprot.nf rename to modules/local/combine_uniprot/main.nf index 7f4637b3..e295476d 100644 --- a/modules/local/combine_uniprot.nf +++ b/modules/local/combine_uniprot/main.nf @@ -1,7 +1,7 @@ process COMBINE_UNIPROT { label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/local/compare_structures/environment.yml b/modules/local/compare_structures/environment.yml new file mode 100644 index 00000000..9f657a6f --- /dev/null +++ b/modules/local/compare_structures/environment.yml @@ -0,0 +1,9 @@ +name: compare_structures +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::biopython=1.84 + - conda-forge::matplotlib=3.9.2 + - conda-forge::pip=24.2 + - conda-forge::plotly=5.24.1 diff --git a/modules/local/compare_structures.nf b/modules/local/compare_structures/main.nf similarity index 80% rename from modules/local/compare_structures.nf rename to modules/local/compare_structures/main.nf index 756d2525..f2b1b590 100644 --- a/modules/local/compare_structures.nf +++ b/modules/local/compare_structures/main.nf @@ -2,10 +2,10 @@ process COMPARE_STRUCTURES { tag "$meta.id" label 'process_single' - conda "bioconda::multiqc:1.21" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/pip_biopython_matplotlib_plotly:e865101a15ad0014' : - 'community.wave.seqera.io/library/pip_biopython_matplotlib_plotly:4d51afeb4bb75495' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/24/241f0746484727a3633f544c3747bfb77932e1c8c252e769640bd163232d9112/data' : + 'community.wave.seqera.io/library/biopython_matplotlib_pip_plotly:35975fa0fc54b2d3' }" input: tuple val(meta), path(pdb) diff --git a/modules/local/download_pdbmmcif/environment.yml b/modules/local/download_pdbmmcif/environment.yml new file mode 100644 index 00000000..4a53be04 --- /dev/null +++ b/modules/local/download_pdbmmcif/environment.yml @@ -0,0 +1,7 @@ +name: download_pdbmmcif +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::aria2=1.36.0 + - conda-forge::rsync=3.3.0 diff --git a/modules/local/download_pdbmmcif.nf b/modules/local/download_pdbmmcif/main.nf similarity index 84% rename from modules/local/download_pdbmmcif.nf rename to modules/local/download_pdbmmcif/main.nf index 98ef831e..a8a12963 100644 --- a/modules/local/download_pdbmmcif.nf +++ b/modules/local/download_pdbmmcif/main.nf @@ -6,10 +6,10 @@ process DOWNLOAD_PDBMMCIF { label 'process_low' label 'error_retry' - conda "bioconda::aria2=1.36.0 conda-forge::rsync=3.2.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' : - 'biocontainers/mulled-v2-4a7c46784ad871c48746744c6b8dbc5d0a97b9ca:33e61a87922824f8afcecf88a7717a2d4cb514e9-0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3c/3c2e1079a0721851248bd2aa45f3d4cd32bfdb7395d609132567d772150965cc/data' : + 'community.wave.seqera.io/library/aria2_rsync:1627a7e9b559cfa0' }" input: val source_url_pdb_mmcif diff --git a/modules/local/generate_report/environment.yml b/modules/local/generate_report/environment.yml new file mode 100644 index 00000000..07a5b9f1 --- /dev/null +++ b/modules/local/generate_report/environment.yml @@ -0,0 +1,9 @@ +name: generate_report +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::biopython=1.84 + - conda-forge::matplotlib=3.9.2 + - conda-forge::pip=24.2 + - conda-forge::plotly=5.24.1 diff --git a/modules/local/generate_report.nf b/modules/local/generate_report/main.nf similarity index 72% rename from modules/local/generate_report.nf rename to modules/local/generate_report/main.nf index 3bfdc04e..8ab8fc5d 100644 --- a/modules/local/generate_report.nf +++ b/modules/local/generate_report/main.nf @@ -2,10 +2,10 @@ process GENERATE_REPORT { tag "$meta.id-$meta.model" label 'process_single' - conda "bioconda::multiqc:1.21" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/pip_biopython_matplotlib_plotly:e865101a15ad0014' : - 'community.wave.seqera.io/library/pip_biopython_matplotlib_plotly:4d51afeb4bb75495' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/24/241f0746484727a3633f544c3747bfb77932e1c8c252e769640bd163232d9112/data' : + 'community.wave.seqera.io/library/biopython_matplotlib_pip_plotly:35975fa0fc54b2d3' }" input: tuple val(meta), path(pdb) @@ -14,10 +14,10 @@ process GENERATE_REPORT { path(template) output: - tuple val(meta), path ("*report.html"), emit: report + tuple val(meta), path ("*report.html") , emit: report tuple val(meta), path ("*seq_coverage.png"), optional: true, emit: sequence_coverage - tuple val(meta), path ("*_LDDT.html"), emit: plddt - path "versions.yml" , emit: versions + tuple val(meta), path ("*_LDDT.html") , emit: plddt + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/mmseqs_colabfoldsearch.nf b/modules/local/mmseqs_colabfoldsearch/main.nf similarity index 100% rename from modules/local/mmseqs_colabfoldsearch.nf rename to modules/local/mmseqs_colabfoldsearch/main.nf diff --git a/modules/local/multifasta_to_csv/environment.yml b/modules/local/multifasta_to_csv/environment.yml new file mode 100644 index 00000000..d92aa27d --- /dev/null +++ b/modules/local/multifasta_to_csv/environment.yml @@ -0,0 +1,7 @@ +name: multifasta_to_csv + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/local/multifasta_to_csv.nf b/modules/local/multifasta_to_csv/main.nf similarity index 96% rename from modules/local/multifasta_to_csv.nf rename to modules/local/multifasta_to_csv/main.nf index d5d68fbf..5a737f78 100644 --- a/modules/local/multifasta_to_csv.nf +++ b/modules/local/multifasta_to_csv/main.nf @@ -2,7 +2,7 @@ process MULTIFASTA_TO_CSV { tag "$meta.id" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/local/multifasta_to_singlefasta/environment.yml b/modules/local/multifasta_to_singlefasta/environment.yml new file mode 100644 index 00000000..273e53dd --- /dev/null +++ b/modules/local/multifasta_to_singlefasta/environment.yml @@ -0,0 +1,8 @@ +name: multifasta_to_singlefasta +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/local/multifasta_to_singlefasta.nf b/modules/local/multifasta_to_singlefasta/main.nf similarity index 100% rename from modules/local/multifasta_to_singlefasta.nf rename to modules/local/multifasta_to_singlefasta/main.nf diff --git a/modules/local/run_alphafold2.nf b/modules/local/run_alphafold2/main.nf similarity index 97% rename from modules/local/run_alphafold2.nf rename to modules/local/run_alphafold2/main.nf index f41636a9..6ebd3c1d 100644 --- a/modules/local/run_alphafold2.nf +++ b/modules/local/run_alphafold2/main.nf @@ -29,7 +29,7 @@ process RUN_ALPHAFOLD2 { output: path ("${fasta.baseName}*") - tuple val(meta), path ("${meta.id}_alphafold2.pdb"), emit: main_pdb + tuple val(meta), path ("${meta.id}_alphafold2.pdb") , emit: top_ranked_pdb tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: pdb tuple val(meta), path ("${fasta.baseName}/*_msa.tsv") , emit: msa tuple val(meta), path ("*_mqc.tsv") , emit: multiqc diff --git a/modules/local/run_alphafold2_msa.nf b/modules/local/run_alphafold2_msa/main.nf similarity index 97% rename from modules/local/run_alphafold2_msa.nf rename to modules/local/run_alphafold2_msa/main.nf index a4f00676..7428eb7f 100644 --- a/modules/local/run_alphafold2_msa.nf +++ b/modules/local/run_alphafold2_msa/main.nf @@ -30,7 +30,7 @@ process RUN_ALPHAFOLD2_MSA { output: path ("${fasta.baseName}*") tuple val(meta), path ("${fasta.baseName}.features.pkl"), emit: features - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/run_alphafold2_pred.nf b/modules/local/run_alphafold2_pred/main.nf similarity index 90% rename from modules/local/run_alphafold2_pred.nf rename to modules/local/run_alphafold2_pred/main.nf index d5e1b9b5..13fb15a7 100644 --- a/modules/local/run_alphafold2_pred.nf +++ b/modules/local/run_alphafold2_pred/main.nf @@ -30,11 +30,11 @@ process RUN_ALPHAFOLD2_PRED { output: path ("${fasta.baseName}*") - tuple val(meta), path ("${meta.id}_alphafold2.pdb"), emit: main_pdb + tuple val(meta), path ("${meta.id}_alphafold2.pdb") , emit: top_ranked_pdb tuple val(meta), path ("${fasta.baseName}/ranked*pdb"), emit: pdb - tuple val(meta), path ("*_msa.tsv"), emit: msa - tuple val(meta), path ("*_mqc.tsv"), emit: multiqc - path "versions.yml", emit: versions + tuple val(meta), path ("*_msa.tsv") , emit: msa + tuple val(meta), path ("*_mqc.tsv") , emit: multiqc + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/run_esmfold.nf b/modules/local/run_esmfold/main.nf similarity index 96% rename from modules/local/run_esmfold.nf rename to modules/local/run_esmfold/main.nf index 83397be1..9f0478f1 100644 --- a/modules/local/run_esmfold.nf +++ b/modules/local/run_esmfold/main.nf @@ -16,7 +16,7 @@ process RUN_ESMFOLD { output: tuple val(meta), path ("${meta.id}_esmfold.pdb") , emit: pdb tuple val(meta), path ("${meta.id}_plddt_mqc.tsv"), emit: multiqc - path "versions.yml", emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/subworkflows/local/post_processing.nf b/subworkflows/local/post_processing.nf index 631644d7..45d2f0b6 100644 --- a/subworkflows/local/post_processing.nf +++ b/subworkflows/local/post_processing.nf @@ -1,5 +1,5 @@ // -// Post processing analysis for the predected structures +// Post processing analysis for the predicted structures // // @@ -20,9 +20,10 @@ workflow POST_PROCESSING { take: skip_visualisation + requested_modes requested_modes_size ch_report_input - ch_proteinfold_template + ch_report_template ch_comparison_template foldseek_search ch_foldseek_db @@ -33,46 +34,54 @@ workflow POST_PROCESSING { ch_multiqc_config ch_multiqc_custom_config ch_multiqc_logo - ch_multiqc_custom_methods_description - ch_alphafold2_out - ch_esmfold_out - ch_colabfold_out + ch_multiqc_methods_description + ch_alphafold2_top_ranked_pdb + ch_colabfold_top_ranked_pdb + ch_esmfold_top_ranked_pdb main: - ch_comparision_report_files = Channel.empty() + ch_comparison_report_files = Channel.empty() if (!skip_visualisation){ GENERATE_REPORT( - ch_report_input.map{[it[0], it[1]]}, - ch_report_input.map{[it[0], it[2]]}, - ch_report_input.map{it[0].model}, - ch_proteinfold_template + ch_report_input.map { [it[0], it[1]] }, + ch_report_input.map { [it[0], it[2]] }, + ch_report_input.map { it[0].model }, + ch_report_template ) ch_versions = ch_versions.mix(GENERATE_REPORT.out.versions) if (requested_modes_size > 1){ - ch_comparision_report_files = ch_comparision_report_files.mix(ch_alphafold2_out + ch_comparison_report_files = ch_comparison_report_files.mix(ch_alphafold2_top_ranked_pdb .join(GENERATE_REPORT.out.sequence_coverage - .filter{it[0]["model"] == "alphafold2"} - .map{[it[0]["id"], it[1]]}, remainder:true + .filter { it[0]["model"] == "alphafold2" } + .map { [it[0]["id"], it[1]] }, remainder:true ) ) - ch_comparision_report_files = ch_comparision_report_files.mix( - ch_colabfold_out + ch_comparison_report_files = ch_comparison_report_files.mix( + ch_colabfold_top_ranked_pdb ) - ch_comparision_report_files = ch_comparision_report_files.mix( - ch_esmfold_out + ch_comparison_report_files = ch_comparison_report_files.mix( + ch_esmfold_top_ranked_pdb ) - //ch_comparision_report_files.view() - ch_comparision_report_files + + ch_comparison_report_files .groupTuple(by: [0], size: requested_modes_size) - .set{ch_comparision_report_input} + .set { ch_comparison_report_input } COMPARE_STRUCTURES( - ch_comparision_report_input.map{it[1][0]["models"] = params.mode.toLowerCase(); [it[1][0], it[2]]}, - ch_comparision_report_input.map{it[1][0]["models"] = params.mode.toLowerCase(); [it[1][0], it[3]]}, + ch_comparison_report_input + .map { + it[1][0]["models"] = requested_modes.toLowerCase(); + [ it[1][0], it[2] ] + }, + ch_comparison_report_input + .map{ + it[1][0]["models"] = requested_modes.toLowerCase(); + [ it[1][0], it[3] ] + }, ch_comparison_template ) ch_versions = ch_versions.mix(COMPARE_STRUCTURES.out.versions) @@ -82,12 +91,12 @@ workflow POST_PROCESSING { if (foldseek_search == "easysearch"){ FOLDSEEK_EASYSEARCH( ch_report_input - .map{ - if (it[0].model == "esmfold") - [it[0], it[1]] - else - [it[0], it[1][0]] - }, + .map{ + if (it[0].model == "esmfold") + [ it[0], it[1] ] + else + [ it[0], it[1][0] ] + }, ch_foldseek_db ) } @@ -98,33 +107,44 @@ workflow POST_PROCESSING { softwareVersionsToYAML(ch_versions) .collectFile(storeDir: "${outdir}/pipeline_info", name: 'nf_core_proteinfold_software_versions.yml', sort: true, newLine: true) .set { ch_collated_versions } + // // MODULE: MultiQC // + ch_multiqc_report = Channel.empty() + if (!skip_multiqc) { summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_methods_description)) ch_multiqc_files = Channel.empty() ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + MULTIQC ( - ch_multiqc_rep.combine(ch_multiqc_files.collect().map{[it]}).map{[it[0], it[1] + it[2]]}, + ch_multiqc_rep + .combine( + ch_multiqc_files + .collect() + .map { [it] } + ) + .map { [ it[0], it[1] + it[2] ] }, ch_multiqc_config, - ch_multiqc_custom_config.collect().ifEmpty([]), - ch_multiqc_logo.collect().ifEmpty([]), + ch_multiqc_custom_config + .collect() + .ifEmpty([]), + ch_multiqc_logo + .collect() + .ifEmpty([]), [], [] ) ch_multiqc_report = MULTIQC.out.report.toList() - }else{ - ch_multiqc_report = Channel.empty() } emit: - versions = ch_versions + versions = ch_versions multiqc_report = ch_multiqc_report } diff --git a/workflows/alphafold2.nf b/workflows/alphafold2.nf index 63b6e081..1f5344f5 100644 --- a/workflows/alphafold2.nf +++ b/workflows/alphafold2.nf @@ -43,11 +43,11 @@ workflow ALPHAFOLD2 { ch_uniprot // channel: path(uniprot) main: - ch_multiqc_files = Channel.empty() - ch_pdb = Channel.empty() - ch_main_pdb = Channel.empty() - ch_msa = Channel.empty() - ch_multiqc_rep = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_pdb = Channel.empty() + ch_top_ranked_pdb = Channel.empty() + ch_msa = Channel.empty() + ch_multiqc_report = Channel.empty() if (alphafold2_model_preset != 'multimer') { ch_samplesheet @@ -78,11 +78,19 @@ workflow ALPHAFOLD2 { ch_pdb_seqres, ch_uniprot ) - ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2.out.pdb) - ch_main_pdb = ch_main_pdb.mix(RUN_ALPHAFOLD2.out.main_pdb) - ch_msa = ch_msa.mix(RUN_ALPHAFOLD2.out.msa) - ch_multiqc_rep = RUN_ALPHAFOLD2.out.multiqc.map{it[1]}.toSortedList().map{[["model":"alphafold2"], it]} - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2.out.versions) + + RUN_ALPHAFOLD2 + .out + .multiqc + .map { it[1] } + .toSortedList() + .map { [ [ "model": "alphafold2" ], it.flatten() ] } + .set { ch_multiqc_report } + + ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2.out.pdb) + ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_ALPHAFOLD2.out.top_ranked_pdb) + ch_msa = ch_msa.mix(RUN_ALPHAFOLD2.out.msa) + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2.out.versions) } else if (alphafold2_mode == 'split_msa_prediction') { // @@ -121,18 +129,37 @@ workflow ALPHAFOLD2 { ch_uniprot, RUN_ALPHAFOLD2_MSA.out.features ) - ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2_PRED.out.pdb) - ch_main_pdb = ch_main_pdb.mix(RUN_ALPHAFOLD2_PRED.out.main_pdb) - ch_msa = ch_msa.mix(RUN_ALPHAFOLD2_PRED.out.msa) - ch_multiqc_rep = RUN_ALPHAFOLD2_PRED.out.multiqc.map{it[1]}.toSortedList().map{[["model":"alphafold2"], it]} - ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) + + RUN_ALPHAFOLD2_PRED + .out + .multiqc + .map { it[1] } + .toSortedList() + .map { [ [ "model": "alphafold2" ], it.flatten() ] } + .set { ch_multiqc_report } + + ch_top_ranked_pdb = ch_top_ranked_pdb.mix(RUN_ALPHAFOLD2_PRED.out.top_ranked_pdb) + ch_pdb = ch_pdb.mix(RUN_ALPHAFOLD2_PRED.out.pdb) + ch_msa = ch_msa.mix(RUN_ALPHAFOLD2_PRED.out.msa) + ch_versions = ch_versions.mix(RUN_ALPHAFOLD2_PRED.out.versions) } + ch_top_ranked_pdb + .map { [ it[0]["id"], it[0], it[1] ] } + .set { ch_top_ranked_pdb } + + ch_pdb + .join(ch_msa) + .map { + it[0]["model"] = "alphafold2" + it + } + .set { ch_pdb_msa } + emit: - main_pdb = ch_main_pdb // channel: /path/to/*.pdb - pdb = ch_pdb // channel: /path/to/*.pdb - msa = ch_msa // channel: /path/to/*msa.tsv - multiqc_report = ch_multiqc_rep // channel: /path/to/multiqc_report.html + top_ranked_pdb = ch_top_ranked_pdb // channel: [ id, /path/to/*.pdb ] + pdb_msa = ch_pdb_msa // channel: [ meta, /path/to/*.pdb, /path/to/*_coverage.png ] + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/colabfold.nf b/workflows/colabfold.nf index c7875cb1..adfa24b6 100644 --- a/workflows/colabfold.nf +++ b/workflows/colabfold.nf @@ -35,7 +35,7 @@ workflow COLABFOLD { num_recycles // int: Number of recycles for esmfold main: - ch_multiqc_files = Channel.empty() + ch_multiqc_report = Channel.empty() if (params.colabfold_server == 'webserver') { // @@ -104,14 +104,53 @@ workflow COLABFOLD { ch_uniref30, num_recycles ) - ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) + ch_versions = ch_versions.mix(COLABFOLD_BATCH.out.versions) } + COLABFOLD_BATCH + .out + .top_ranked_pdb + .map { [ it[0]["id"], it[0], it[1] ] } + .join( + COLABFOLD_BATCH + .out + .msa + .map { [ it[0]["id"], it[1] ] }, + remainder:true + ) + .set { ch_top_ranked_pdb } + + COLABFOLD_BATCH + .out + .pdb + .join(COLABFOLD_BATCH.out.msa) + .map { + it[0]["model"] = "colabfold" + it + } + .set { ch_pdb_msa } + + COLABFOLD_BATCH + .out + .multiqc + .map { it[1] } + .toSortedList() + .map { [ [ "model":"colabfold"], it.flatten() ] } + .set { ch_multiqc_report } + + COLABFOLD_BATCH + .out + .multiqc + + COLABFOLD_BATCH + .out + .multiqc + .collect() + emit: - pdb = COLABFOLD_BATCH.out.pdb // channel: /path/to/*.pdb - main_pdb = COLABFOLD_BATCH.out.main_pdb // channel: /path/to/*.pdb - msa = COLABFOLD_BATCH.out.msa // channel: /path/to/*_coverage.png - multiqc_report = COLABFOLD_BATCH.out.multiqc.map{it[1]}.flatten().toList().map{[["model":"colabfold"], it]} // channel: /path/to/multiqc_report.html + top_ranked_pdb = ch_top_ranked_pdb // channel: [ id, /path/to/*.pdb ] + pdb_msa = ch_pdb_msa // channel: [ meta, /path/to/*.pdb, /path/to/*_coverage.png ] + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/workflows/esmfold.nf b/workflows/esmfold.nf index 57aaa04e..c93c869a 100644 --- a/workflows/esmfold.nf +++ b/workflows/esmfold.nf @@ -29,6 +29,7 @@ workflow ESMFOLD { ch_versions // channel: [ path(versions.yml) ] ch_esmfold_params // directory: /path/to/esmfold/params/ ch_num_recycles // int: Number of recycles for esmfold + ch_dummy_file // channel: [ path(NO_FILE) ] main: ch_multiqc_files = Channel.empty() @@ -56,9 +57,42 @@ workflow ESMFOLD { ch_versions = ch_versions.mix(RUN_ESMFOLD.out.versions) } + RUN_ESMFOLD + .out + .pdb + .combine(ch_dummy_file) + .map { + it[0]["model"] = "esmfold" + [ it[0]["id"], it[0], it[1], it[2] ] + } + .set { ch_top_ranked_pdb } + + RUN_ESMFOLD + .out + .multiqc + .map { it[1] } + .toSortedList() + .map { [ [ "model": "esmfold"], it.flatten() ] } + .set { ch_multiqc_report } + + RUN_ESMFOLD + .out + .pdb + .combine(ch_dummy_file) + .map { + it[0]["model"] = "esmfold" + it + } + .set { ch_pdb_msa } + + ch_pdb_msa + .map { [ it[0]["id"], it[0], it[1], it[2] ] } + .set { ch_top_ranked_pdb } + emit: - pdb = RUN_ESMFOLD.out.pdb // channel: /path/to/*pdb - multiqc_report = RUN_ESMFOLD.out.multiqc.map{it[1]}.toSortedList().map{[["model":"esmfold"], it]} // channel: /path/to/multiqc_report.html + pdb_msa = ch_pdb_msa // channel: [ meta, /path/to/*.pdb, dummy_file ] + top_ranked_pdb = ch_top_ranked_pdb // channel: [ id, /path/to/*.pdb ] + multiqc_report = ch_multiqc_report // channel: /path/to/multiqc_report.html versions = ch_versions // channel: [ path(versions.yml) ] }