Code refactoring

Rules followed: main.nf should only contain workflows that are standalone i.e.: shouldn't take any inputs Any workflows that take input(s) should be put in ./workflows
vib-singlecell-nf · Apr 10, 2020 · 6092226 · 6092226
1 parent cb2e489
commit 6092226
Show file tree

Hide file tree

Showing 2 changed files with 117 additions and 64 deletions.
diff --git a/main.nf b/main.nf
@@ -7,74 +7,16 @@ import static groovy.json.JsonOutput.*
 
 include '../utils/workflows/utils.nf' params(params)
 INIT()
-include '../utils/processes/utils.nf' params(params)
+include '../src/utils/processes/utils.nf' params(params)
+include SINGLE_SAMPLE from './workflows/single_sample.nf' params(params)
 
-include QC_FILTER from './workflows/qc_filter.nf' params(params)
-include NORMALIZE_TRANSFORM from './workflows/normalize_transform.nf' params(params)
-include HVG_SELECTION from './workflows/hvg_selection.nf' params(params)
-include SC__SCANPY__REGRESS_OUT from './processes/regress_out.nf' params(params)
-include NEIGHBORHOOD_GRAPH from './workflows/neighborhood_graph.nf' params(params)
-include DIM_REDUCTION_PCA from './workflows/dim_reduction_pca.nf' params(params)
-include './workflows/dim_reduction.nf' params(params)
-include './processes/cluster.nf' params(params)
-include CLUSTER_IDENTIFICATION from './workflows/cluster_identification.nf' params(params)
-
-// reporting:
-include UTILS__GENERATE_WORKFLOW_CONFIG_REPORT from '../utils/processes/reports.nf' params(params)
-include SC__SCANPY__MERGE_REPORTS from './processes/reports.nf' params(params)
-include SC__SCANPY__REPORT_TO_HTML from './processes/reports.nf' params(params)
-include COMBINE_REPORTS from './workflows/combine_reports.nf' params(params)
 
 workflow single_sample {
 
-    take:
-        data
-
     main:
-        // Process the data
-        out = params.sc.scanpy.containsKey("filter") ? QC_FILTER( data ).filtered : data
-        out = params.sc.scanpy.containsKey("data_transformation") && 
-            params.sc.scanpy.containsKey("normalization") ? NORMALIZE_TRANSFORM( out ) : out
-        out = HVG_SELECTION( out )
-        out = params.sc.scanpy.containsKey("regress_out") ? SC__SCANPY__REGRESS_OUT( out.scaled ) : out.scaled
-        DIM_REDUCTION_PCA( out )
-        NEIGHBORHOOD_GRAPH( DIM_REDUCTION_PCA.out )
-        DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out )
-        CLUSTER_IDENTIFICATION(
-            NORMALIZE_TRANSFORM.out,
-            DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap,
-            "No Batch Effect Correction"
-        )
-
-        // Reporting
-        samples = data.map { it -> it[0] }.view()
-        UTILS__GENERATE_WORKFLOW_CONFIG_REPORT(
-            file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb)
-        )
-
-        ipynbs = COMBINE_REPORTS(
-            samples,
-            UTILS__GENERATE_WORKFLOW_CONFIG_REPORT.out,
-            QC_FILTER.out.report,
-            HVG_SELECTION.out.report,
-            DIM_REDUCTION_TSNE_UMAP.out.report,
-            CLUSTER_IDENTIFICATION.out.report
-        )
-
-        def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) )
-        merged_report = SC__SCANPY__MERGE_REPORTS(
-            ipynbs,
-            "merged_report",
-            clusteringParams.isParameterExplorationModeOn()
-        )
-        SC__SCANPY__REPORT_TO_HTML(SC__SCANPY__MERGE_REPORTS.out)
-
-    emit:
-        filtered_data = params.sc.scanpy.containsKey("filter") ? QC_FILTER.out.filtered : Channel.empty()
-        hvg_data = HVG_SELECTION.out.hvg
-        dr_pca_data = DIM_REDUCTION_PCA.out
-        final_processed_data = CLUSTER_IDENTIFICATION.out.marker_genes
-        reports = ipynbs
-        merged_report
+        // run the pipeline
+        getDataChannel | \
+            SC__FILE_CONVERTER | \
+            SINGLE_SAMPLE
 
 }
diff --git a/workflows/single_sample.nf b/workflows/single_sample.nf
@@ -0,0 +1,111 @@
+nextflow.preview.dsl=2
+
+import static groovy.json.JsonOutput.*
+
+//////////////////////////////////////////////////////
+//  Import sub-workflows
+
+include '../../utils/processes/utils.nf' params(params)
+include SC__H5AD_TO_FILTERED_LOOM from '../../utils/processes/h5adToLoom.nf' params(params)
+include FILE_CONVERTER from '../../utils/workflows/fileConverter.nf' params(params)
+
+include QC_FILTER from './qc_filter.nf' params(params)
+include NORMALIZE_TRANSFORM from './normalize_transform.nf' params(params)
+include HVG_SELECTION from './hvg_selection.nf' params(params)
+include SC__SCANPY__REGRESS_OUT from '../processes/regress_out.nf' params(params)
+include NEIGHBORHOOD_GRAPH from './neighborhood_graph.nf' params(params)
+include DIM_REDUCTION_PCA from './dim_reduction_pca.nf' params(params)
+include './dim_reduction.nf' params(params)
+include '../processes/cluster.nf' params(params)
+include CLUSTER_IDENTIFICATION from './cluster_identification.nf' params(params)
+
+// reporting:
+include UTILS__GENERATE_WORKFLOW_CONFIG_REPORT from '../../utils/processes/reports.nf' params(params)
+include SC__SCANPY__MERGE_REPORTS from '../processes/reports.nf' params(params)
+include SC__SCANPY__REPORT_TO_HTML from '../processes/reports.nf' params(params)
+include COMBINE_REPORTS from './combine_reports.nf' params(params)
+
+workflow SINGLE_SAMPLE {
+
+    take:
+        // Expects (sampleId, h5ad)
+        data
+
+    main:
+        // Process the data
+        out = params.sc.scanpy.containsKey("filter") ? QC_FILTER( data ).filtered : data
+        out = params.sc.scanpy.containsKey("data_transformation") && 
+            params.sc.scanpy.containsKey("normalization") ? NORMALIZE_TRANSFORM( out ) : out
+        out = HVG_SELECTION( out )
+        out = params.sc.scanpy.containsKey("regress_out") ? SC__SCANPY__REGRESS_OUT( out.scaled ) : out.scaled
+        DIM_REDUCTION_PCA( out )
+        NEIGHBORHOOD_GRAPH( DIM_REDUCTION_PCA.out )
+        DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out )
+        CLUSTER_IDENTIFICATION(
+            NORMALIZE_TRANSFORM.out,
+            DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap,
+            "No Batch Effect Correction"
+        )
+
+        // Reporting
+        samples = data.map { it -> it[0] }.view()
+        UTILS__GENERATE_WORKFLOW_CONFIG_REPORT(
+            file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb)
+        )
+
+        ipynbs = COMBINE_REPORTS(
+            samples,
+            UTILS__GENERATE_WORKFLOW_CONFIG_REPORT.out,
+            QC_FILTER.out.report,
+            HVG_SELECTION.out.report,
+            DIM_REDUCTION_TSNE_UMAP.out.report,
+            CLUSTER_IDENTIFICATION.out.report
+        )
+
+        def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) )
+        merged_report = SC__SCANPY__MERGE_REPORTS(
+            ipynbs,
+            "merged_report",
+            clusteringParams.isParameterExplorationModeOn()
+        )
+        SC__SCANPY__REPORT_TO_HTML(SC__SCANPY__MERGE_REPORTS.out)
+
+        // Conversion
+        // Convert h5ad to X (here we choose: loom format)
+        if(params.sc.scanpy.containsKey("filter")) {
+            filtered_loom = SC__H5AD_TO_FILTERED_LOOM( QC_FILTER.out.filtered )
+            // In parameter exploration mode, this automatically merge all the results into the resulting loom
+            final_processed_scope_loom = FILE_CONVERTER(
+                CLUSTER_IDENTIFICATION.out.marker_genes.groupTuple(),
+                'loom',
+                QC_FILTER.out.filtered
+            )
+        } else {
+            filtered_loom = SC__H5AD_TO_FILTERED_LOOM( SC__FILE_CONVERTER.out )
+            final_processed_scope_loom = FILE_CONVERTER(
+                CLUSTER_IDENTIFICATION.out.marker_genes.groupTuple(),
+                'loom',
+                SC__FILE_CONVERTER.out
+            )
+        }
+
+        // Publishing
+        final_published_data = SC__PUBLISH_H5AD(
+            CLUSTER_IDENTIFICATION.out.marker_genes.map { 
+                it -> tuple(it[0], it[1], null)
+            },
+            params.global.project_name+".single_sample.output"
+        )
+
+    emit:
+        filtered_data = params.sc.scanpy.containsKey("filter") ? QC_FILTER.out.filtered : Channel.empty()
+        filtered_loom
+        hvg_data = HVG_SELECTION.out.hvg
+        dr_pca_data = DIM_REDUCTION_PCA.out
+        final_processed_data = CLUSTER_IDENTIFICATION.out.marker_genes
+        final_published_data
+        final_processed_scope_loom
+        reports = ipynbs
+        merged_report
+
+}