From 6092226ad260fa9fd0055aa231481d79306b8ca3 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 10 Apr 2020 12:19:07 +0200 Subject: [PATCH] Code refactoring Rules followed: main.nf should only contain workflows that are standalone i.e.: shouldn't take any inputs Any workflows that take input(s) should be put in ./workflows --- main.nf | 70 ++--------------------- workflows/single_sample.nf | 111 +++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 64 deletions(-) create mode 100644 workflows/single_sample.nf diff --git a/main.nf b/main.nf index 7aebf4c5..e7c212dc 100644 --- a/main.nf +++ b/main.nf @@ -7,74 +7,16 @@ import static groovy.json.JsonOutput.* include '../utils/workflows/utils.nf' params(params) INIT() -include '../utils/processes/utils.nf' params(params) +include '../src/utils/processes/utils.nf' params(params) +include SINGLE_SAMPLE from './workflows/single_sample.nf' params(params) -include QC_FILTER from './workflows/qc_filter.nf' params(params) -include NORMALIZE_TRANSFORM from './workflows/normalize_transform.nf' params(params) -include HVG_SELECTION from './workflows/hvg_selection.nf' params(params) -include SC__SCANPY__REGRESS_OUT from './processes/regress_out.nf' params(params) -include NEIGHBORHOOD_GRAPH from './workflows/neighborhood_graph.nf' params(params) -include DIM_REDUCTION_PCA from './workflows/dim_reduction_pca.nf' params(params) -include './workflows/dim_reduction.nf' params(params) -include './processes/cluster.nf' params(params) -include CLUSTER_IDENTIFICATION from './workflows/cluster_identification.nf' params(params) - -// reporting: -include UTILS__GENERATE_WORKFLOW_CONFIG_REPORT from '../utils/processes/reports.nf' params(params) -include SC__SCANPY__MERGE_REPORTS from './processes/reports.nf' params(params) -include SC__SCANPY__REPORT_TO_HTML from './processes/reports.nf' params(params) -include COMBINE_REPORTS from './workflows/combine_reports.nf' params(params) workflow single_sample { - take: - data - main: - // Process the data - out = params.sc.scanpy.containsKey("filter") ? QC_FILTER( data ).filtered : data - out = params.sc.scanpy.containsKey("data_transformation") && - params.sc.scanpy.containsKey("normalization") ? NORMALIZE_TRANSFORM( out ) : out - out = HVG_SELECTION( out ) - out = params.sc.scanpy.containsKey("regress_out") ? SC__SCANPY__REGRESS_OUT( out.scaled ) : out.scaled - DIM_REDUCTION_PCA( out ) - NEIGHBORHOOD_GRAPH( DIM_REDUCTION_PCA.out ) - DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out ) - CLUSTER_IDENTIFICATION( - NORMALIZE_TRANSFORM.out, - DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, - "No Batch Effect Correction" - ) - - // Reporting - samples = data.map { it -> it[0] }.view() - UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( - file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) - ) - - ipynbs = COMBINE_REPORTS( - samples, - UTILS__GENERATE_WORKFLOW_CONFIG_REPORT.out, - QC_FILTER.out.report, - HVG_SELECTION.out.report, - DIM_REDUCTION_TSNE_UMAP.out.report, - CLUSTER_IDENTIFICATION.out.report - ) - - def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) - merged_report = SC__SCANPY__MERGE_REPORTS( - ipynbs, - "merged_report", - clusteringParams.isParameterExplorationModeOn() - ) - SC__SCANPY__REPORT_TO_HTML(SC__SCANPY__MERGE_REPORTS.out) - - emit: - filtered_data = params.sc.scanpy.containsKey("filter") ? QC_FILTER.out.filtered : Channel.empty() - hvg_data = HVG_SELECTION.out.hvg - dr_pca_data = DIM_REDUCTION_PCA.out - final_processed_data = CLUSTER_IDENTIFICATION.out.marker_genes - reports = ipynbs - merged_report + // run the pipeline + getDataChannel | \ + SC__FILE_CONVERTER | \ + SINGLE_SAMPLE } diff --git a/workflows/single_sample.nf b/workflows/single_sample.nf new file mode 100644 index 00000000..e03fe051 --- /dev/null +++ b/workflows/single_sample.nf @@ -0,0 +1,111 @@ +nextflow.preview.dsl=2 + +import static groovy.json.JsonOutput.* + +////////////////////////////////////////////////////// +// Import sub-workflows + +include '../../utils/processes/utils.nf' params(params) +include SC__H5AD_TO_FILTERED_LOOM from '../../utils/processes/h5adToLoom.nf' params(params) +include FILE_CONVERTER from '../../utils/workflows/fileConverter.nf' params(params) + +include QC_FILTER from './qc_filter.nf' params(params) +include NORMALIZE_TRANSFORM from './normalize_transform.nf' params(params) +include HVG_SELECTION from './hvg_selection.nf' params(params) +include SC__SCANPY__REGRESS_OUT from '../processes/regress_out.nf' params(params) +include NEIGHBORHOOD_GRAPH from './neighborhood_graph.nf' params(params) +include DIM_REDUCTION_PCA from './dim_reduction_pca.nf' params(params) +include './dim_reduction.nf' params(params) +include '../processes/cluster.nf' params(params) +include CLUSTER_IDENTIFICATION from './cluster_identification.nf' params(params) + +// reporting: +include UTILS__GENERATE_WORKFLOW_CONFIG_REPORT from '../../utils/processes/reports.nf' params(params) +include SC__SCANPY__MERGE_REPORTS from '../processes/reports.nf' params(params) +include SC__SCANPY__REPORT_TO_HTML from '../processes/reports.nf' params(params) +include COMBINE_REPORTS from './combine_reports.nf' params(params) + +workflow SINGLE_SAMPLE { + + take: + // Expects (sampleId, h5ad) + data + + main: + // Process the data + out = params.sc.scanpy.containsKey("filter") ? QC_FILTER( data ).filtered : data + out = params.sc.scanpy.containsKey("data_transformation") && + params.sc.scanpy.containsKey("normalization") ? NORMALIZE_TRANSFORM( out ) : out + out = HVG_SELECTION( out ) + out = params.sc.scanpy.containsKey("regress_out") ? SC__SCANPY__REGRESS_OUT( out.scaled ) : out.scaled + DIM_REDUCTION_PCA( out ) + NEIGHBORHOOD_GRAPH( DIM_REDUCTION_PCA.out ) + DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out ) + CLUSTER_IDENTIFICATION( + NORMALIZE_TRANSFORM.out, + DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, + "No Batch Effect Correction" + ) + + // Reporting + samples = data.map { it -> it[0] }.view() + UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( + file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) + ) + + ipynbs = COMBINE_REPORTS( + samples, + UTILS__GENERATE_WORKFLOW_CONFIG_REPORT.out, + QC_FILTER.out.report, + HVG_SELECTION.out.report, + DIM_REDUCTION_TSNE_UMAP.out.report, + CLUSTER_IDENTIFICATION.out.report + ) + + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) + merged_report = SC__SCANPY__MERGE_REPORTS( + ipynbs, + "merged_report", + clusteringParams.isParameterExplorationModeOn() + ) + SC__SCANPY__REPORT_TO_HTML(SC__SCANPY__MERGE_REPORTS.out) + + // Conversion + // Convert h5ad to X (here we choose: loom format) + if(params.sc.scanpy.containsKey("filter")) { + filtered_loom = SC__H5AD_TO_FILTERED_LOOM( QC_FILTER.out.filtered ) + // In parameter exploration mode, this automatically merge all the results into the resulting loom + final_processed_scope_loom = FILE_CONVERTER( + CLUSTER_IDENTIFICATION.out.marker_genes.groupTuple(), + 'loom', + QC_FILTER.out.filtered + ) + } else { + filtered_loom = SC__H5AD_TO_FILTERED_LOOM( SC__FILE_CONVERTER.out ) + final_processed_scope_loom = FILE_CONVERTER( + CLUSTER_IDENTIFICATION.out.marker_genes.groupTuple(), + 'loom', + SC__FILE_CONVERTER.out + ) + } + + // Publishing + final_published_data = SC__PUBLISH_H5AD( + CLUSTER_IDENTIFICATION.out.marker_genes.map { + it -> tuple(it[0], it[1], null) + }, + params.global.project_name+".single_sample.output" + ) + + emit: + filtered_data = params.sc.scanpy.containsKey("filter") ? QC_FILTER.out.filtered : Channel.empty() + filtered_loom + hvg_data = HVG_SELECTION.out.hvg + dr_pca_data = DIM_REDUCTION_PCA.out + final_processed_data = CLUSTER_IDENTIFICATION.out.marker_genes + final_published_data + final_processed_scope_loom + reports = ipynbs + merged_report + +}