From 43b3ac9ff78677ed11f4b11b3e00faf508727279 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 13:38:09 +0100 Subject: [PATCH 01/17] Remove unecessary entries when generating config file Remove star_concatenator and file_concatenator from base utils config Update main nextflow config accordingly --- nextflow.config | 5 +++++ src/utils/conf/base.config | 9 --------- src/utils/conf/h5ad_concatenate.config | 9 +++++++++ src/utils/conf/star_concatenate.config | 8 ++++++++ 4 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 src/utils/conf/h5ad_concatenate.config create mode 100644 src/utils/conf/star_concatenate.config diff --git a/nextflow.config b/nextflow.config index 9ba980d3..04c08563 100644 --- a/nextflow.config +++ b/nextflow.config @@ -63,14 +63,17 @@ profiles { includeConfig 'src/star/star.config' } bbknn { + includeConfig 'src/utils/conf/h5ad_concatenate.config' includeConfig 'src/scanpy/scanpy.config' includeConfig 'src/scanpy/conf/bbknn.config' } mnncorrect { + includeConfig 'src/utils/conf/h5ad_concatenate.config' includeConfig 'src/scanpy/scanpy.config' includeConfig 'src/scanpy/conf/mnncorrect.config' } harmony { + includeConfig 'src/utils/conf/h5ad_concatenate.config' includeConfig 'src/scanpy/scanpy.config' includeConfig 'src/harmony/harmony.config' } @@ -189,9 +192,11 @@ profiles { includeConfig 'conf/test__single_sample_scenic_multiruns.config' } test__bbknn { + includeConfig 'src/utils/conf/h5ad_concatenate.config' includeConfig 'conf/test__bbknn.config' } test__harmony { + includeConfig 'src/utils/conf/h5ad_concatenate.config' includeConfig 'conf/test__harmony.config' } diff --git a/src/utils/conf/base.config b/src/utils/conf/base.config index 3e5426b0..47cc3e0f 100644 --- a/src/utils/conf/base.config +++ b/src/utils/conf/base.config @@ -12,14 +12,5 @@ params { tagCellWithSampleId = true useFilteredMatrix = true } - file_concatenator { - join = 'outer' - iff = '10x_cellranger_mex' - off = 'h5ad' - } - star_concatenator { - stranded = 'no' - off = 'tsv' - } } } diff --git a/src/utils/conf/h5ad_concatenate.config b/src/utils/conf/h5ad_concatenate.config new file mode 100644 index 00000000..140f38cb --- /dev/null +++ b/src/utils/conf/h5ad_concatenate.config @@ -0,0 +1,9 @@ +params { + sc { + file_concatenator { + join = 'outer' + iff = '10x_cellranger_mex' + off = 'h5ad' + } + } +} diff --git a/src/utils/conf/star_concatenate.config b/src/utils/conf/star_concatenate.config new file mode 100644 index 00000000..b75585e5 --- /dev/null +++ b/src/utils/conf/star_concatenate.config @@ -0,0 +1,8 @@ +params { + sc { + star_concatenator { + stranded = 'no' + off = 'tsv' + } + } +} From c18c557bae853e637469f63407bca4b5acb1c16d Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 16:15:26 +0100 Subject: [PATCH 02/17] Use neighborhood_graph from scanpy in single_sample pipeline --- workflows/single_sample.nf | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/workflows/single_sample.nf b/workflows/single_sample.nf index 8cd0d653..3c83deca 100644 --- a/workflows/single_sample.nf +++ b/workflows/single_sample.nf @@ -8,7 +8,9 @@ include '../src/utils/processes/utils.nf' params(params) include QC_FILTER from '../src/scanpy/workflows/qc_filter.nf' params(params) include NORMALIZE_TRANSFORM from '../src/scanpy/workflows/normalize_transform.nf' params(params) include HVG_SELECTION from '../src/scanpy/workflows/hvg_selection.nf' params(params) -include DIM_REDUCTION from '../src/scanpy/workflows/dim_reduction.nf' params(params) +include NEIGHBORHOOD_GRAPH from '../src/scanpy/workflows/neighborhood_graph.nf' params(params) +include DIM_REDUCTION_PCA from '../src/scanpy/workflows/dim_reduction_pca.nf' params(params) +include '../src/scanpy/workflows/dim_reduction.nf' params(params) include '../src/scanpy/processes/cluster.nf' params(params) include CLUSTER_IDENTIFICATION from '../src/scanpy/workflows/cluster_identification.nf' params(params) include SC__H5AD_TO_FILTERED_LOOM from '../src/utils/processes/h5adToLoom.nf' params(params) @@ -36,37 +38,39 @@ workflow single_sample_base { QC_FILTER( data ) NORMALIZE_TRANSFORM( QC_FILTER.out.filtered ) HVG_SELECTION( NORMALIZE_TRANSFORM.out ) - DIM_REDUCTION( HVG_SELECTION.out.scaled ) + DIM_REDUCTION_PCA( HVG_SELECTION.out.scaled ) + NEIGHBORHOOD_GRAPH( DIM_REDUCTION_PCA.out ) + DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out ) CLUSTER_IDENTIFICATION( NORMALIZE_TRANSFORM.out, - DIM_REDUCTION.out.dimred_pca_tsne_umap, + DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, "No Batch Effect Correction" ) - // conversion - //// convert h5ad to X (here we choose: loom format) + // Conversion + // Convert h5ad to X (here we choose: loom format) filteredloom = SC__H5AD_TO_FILTERED_LOOM( QC_FILTER.out.filtered ) - // In benchmark mode, this automatically merge all the results into the resulting loom + // In parameter exploration mode, this automatically merge all the results into the resulting loom scopeloom = FILE_CONVERTER( CLUSTER_IDENTIFICATION.out.marker_genes.groupTuple(), 'loom', QC_FILTER.out.filtered ) - // publishing + // Publishing SC__PUBLISH_H5AD( CLUSTER_IDENTIFICATION.out.marker_genes, params.global.project_name+".single_sample.output" ) - // reporting: + // Reporting: def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) SC__SCANPY__MERGE_REPORTS( QC_FILTER.out.report.mix( samples.combine(UTILS__GENERATE_WORKFLOW_CONFIG_REPORT.out), HVG_SELECTION.out.report, - DIM_REDUCTION.out.report, + DIM_REDUCTION_TSNE_UMAP.out.report, CLUSTER_IDENTIFICATION.out.report ).groupTuple(), "merged_report", From 64f7cdee35ac1c852a6af468de5b728a0284d699 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 16:16:04 +0100 Subject: [PATCH 03/17] Use neighborhood_graph (scanpy) in single_sample_star pipeline --- workflows/single_sample_star.nf | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/workflows/single_sample_star.nf b/workflows/single_sample_star.nf index 5b07b043..a74a07f6 100644 --- a/workflows/single_sample_star.nf +++ b/workflows/single_sample_star.nf @@ -9,7 +9,9 @@ include star as STAR from '../workflows/star.nf' params(params) include QC_FILTER from '../src/scanpy/workflows/qc_filter.nf' params(params) include NORMALIZE_TRANSFORM from '../src/scanpy/workflows/normalize_transform.nf' params(params) include HVG_SELECTION from '../src/scanpy/workflows/hvg_selection.nf' params(params) -include DIM_REDUCTION from '../src/scanpy/workflows/dim_reduction.nf' params(params) +include NEIGHBORHOOD_GRAPH from '../src/scanpy/workflows/neighborhood_graph.nf' params(params) +include DIM_REDUCTION_PCA from '../src/scanpy/workflows/dim_reduction_pca.nf' params(params) +include '../src/scanpy/workflows/dim_reduction.nf' params(params) include '../src/scanpy/processes/cluster.nf' params(params) include CLUSTER_IDENTIFICATION from '../src/scanpy/workflows/cluster_identification.nf' params(params) include FILE_CONVERTER from '../src/utils/workflows/fileConverter.nf' params(params) @@ -33,15 +35,17 @@ workflow single_sample_star { QC_FILTER( data ) NORMALIZE_TRANSFORM( QC_FILTER.out.filtered ) HVG_SELECTION( NORMALIZE_TRANSFORM.out ) - DIM_REDUCTION( HVG_SELECTION.out.scaled ) + DIM_REDUCTION_PCA( HVG_SELECTION.out.scaled ) + NEIGHBORHOOD_GRAPH( DIM_REDUCTION_PCA.out ) + DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out ) CLUSTER_IDENTIFICATION( NORMALIZE_TRANSFORM.out, - DIM_REDUCTION.out.dimred_pca_tsne_umap, + DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, "No Batch Effect Correction" ) - // conversion - //// convert h5ad to X (here we choose: loom format) + // Conversion + // Convert h5ad to X (here we choose: loom format) filteredloom = SC__H5AD_TO_FILTERED_LOOM( QC_FILTER.out.filtered ) scopeloom = FILE_CONVERTER( CLUSTER_IDENTIFICATION.out.marker_genes, @@ -49,19 +53,19 @@ workflow single_sample_star { QC_FILTER.out.filtered ) - // publishing + // Publishing SC__PUBLISH_H5AD( CLUSTER_IDENTIFICATION.out.marker_genes, "single_sample.output" ) - // reporting: + // Reporting: def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) SC__SCANPY__MERGE_REPORTS( QC_FILTER.out.report.mix( samples.combine(UTILS__GENERATE_WORKFLOW_CONFIG_REPORT.out), HVG_SELECTION.out.report, - DIM_REDUCTION.out.report, + DIM_REDUCTION_TSNE_UMAP.out.report, CLUSTER_IDENTIFICATION.out.report ).groupTuple(), "merged_report", From 43573e08d2ef21e3ec0983e122cea781bef34f45 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 17:06:23 +0100 Subject: [PATCH 04/17] Use neighborhood_graph (scanpy) in bbknn pipeline --- workflows/bbknn.nf | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/workflows/bbknn.nf b/workflows/bbknn.nf index 040de7b5..95e04bd6 100644 --- a/workflows/bbknn.nf +++ b/workflows/bbknn.nf @@ -8,7 +8,9 @@ include '../src/utils/processes/utils.nf' params(params.sc.file_concatenator + p include QC_FILTER from '../src/scanpy/workflows/qc_filter.nf' params(params) include NORMALIZE_TRANSFORM from '../src/scanpy/workflows/normalize_transform.nf' params(params + params.global) include HVG_SELECTION from '../src/scanpy/workflows/hvg_selection.nf' params(params + params.global) -include DIM_REDUCTION from '../src/scanpy/workflows/dim_reduction.nf' params(params + params.global) +include NEIGHBORHOOD_GRAPH from '../src/scanpy/workflows/neighborhood_graph.nf' params(params) +include DIM_REDUCTION_PCA from '../src/scanpy/workflows/dim_reduction_pca.nf' params(params + params.global) +include DIM_REDUCTION_TSNE_UMAP from '../src/scanpy/workflows/dim_reduction.nf' params(params + params.global) // CLUSTER_IDENTIFICATION include '../src/scanpy/processes/cluster.nf' params(params) include '../src/scanpy/workflows/cluster_identification.nf' params(params + params.global) // Don't only import a specific process (the function needs also to be imported) @@ -36,25 +38,27 @@ workflow bbknn_base { SC__FILE_CONCATENATOR( QC_FILTER.out.filtered.map{it -> it[1]}.collect() ) NORMALIZE_TRANSFORM( SC__FILE_CONCATENATOR.out ) HVG_SELECTION( NORMALIZE_TRANSFORM.out ) - DIM_REDUCTION( HVG_SELECTION.out.scaled ) + DIM_REDUCTION_PCA( HVG_SELECTION.out.scaled ) + NEIGHBORHOOD_GRAPH( DIM_REDUCTION_PCA.out ) + DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out ) - //// Perform the clustering step w/o batch effect correction (for comparison matter) + // Perform the clustering step w/o batch effect correction (for comparison matter) clusterIdentificationPreBatchEffectCorrection = CLUSTER_IDENTIFICATION( NORMALIZE_TRANSFORM.out, - DIM_REDUCTION.out.dimred_pca_tsne_umap, + DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, "Pre Batch Effect Correction" ) - //// Perform the batch effect correction + // Perform the batch effect correction BEC_BBKNN( NORMALIZE_TRANSFORM.out, //// include only PCA and t-SNE pre-merge dim reductions. Omit UMAP for clarity since it will have to be overwritten by BEC_BBKNN - DIM_REDUCTION.out.dimred_pca_tsne, + DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne, clusterIdentificationPreBatchEffectCorrection.marker_genes ) - // // conversion - //// convert h5ad to X (here we choose: loom format) + // Conversion + // Convert h5ad to X (here we choose: loom format) filteredloom = SC__H5AD_TO_FILTERED_LOOM( SC__FILE_CONCATENATOR.out ) scopeloom = FILE_CONVERTER( BEC_BBKNN.out.data.groupTuple(), @@ -67,7 +71,7 @@ workflow bbknn_base { file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) ) - // collect the reports: + // Collect the reports: ipynbs = project.combine( UTILS__GENERATE_WORKFLOW_CONFIG_REPORT.out ).join( From 58d443f8092a7dd3460f3080717812cfcead70b2 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 17:15:31 +0100 Subject: [PATCH 05/17] Use neighborhood_graph (scanpy) in harmony pipeline --- workflows/bbknn.nf | 2 +- workflows/harmony.nf | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/workflows/bbknn.nf b/workflows/bbknn.nf index 95e04bd6..0c3f85ee 100644 --- a/workflows/bbknn.nf +++ b/workflows/bbknn.nf @@ -52,7 +52,7 @@ workflow bbknn_base { // Perform the batch effect correction BEC_BBKNN( NORMALIZE_TRANSFORM.out, - //// include only PCA and t-SNE pre-merge dim reductions. Omit UMAP for clarity since it will have to be overwritten by BEC_BBKNN + // Include only PCA and t-SNE pre-merge dim reductions. Omit UMAP for clarity since it will have to be overwritten by BEC_BBKNN DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne, clusterIdentificationPreBatchEffectCorrection.marker_genes ) diff --git a/workflows/harmony.nf b/workflows/harmony.nf index 1b68ba97..d0729593 100644 --- a/workflows/harmony.nf +++ b/workflows/harmony.nf @@ -8,7 +8,9 @@ include '../src/utils/processes/utils.nf' params(params.sc.file_concatenator + p include QC_FILTER from '../src/scanpy/workflows/qc_filter.nf' params(params) include NORMALIZE_TRANSFORM from '../src/scanpy/workflows/normalize_transform.nf' params(params + params.global) include HVG_SELECTION from '../src/scanpy/workflows/hvg_selection.nf' params(params + params.global) -include DIM_REDUCTION from '../src/scanpy/workflows/dim_reduction.nf' params(params + params.global) +include NEIGHBORHOOD_GRAPH from '../src/scanpy/workflows/neighborhood_graph.nf' params(params) +include DIM_REDUCTION_PCA from '../src/scanpy/workflows/dim_reduction_pca.nf' params(params + params.global) +include DIM_REDUCTION_TSNE_UMAP from '../src/scanpy/workflows/dim_reduction.nf' params(params + params.global) // CLUSTER_IDENTIFICATION include '../src/scanpy/processes/cluster.nf' params(params + params.global) include '../src/scanpy/workflows/cluster_identification.nf' params(params + params.global) // Don't only import a specific process (the function needs also to be imported) @@ -37,12 +39,14 @@ workflow harmony_base { SC__FILE_CONCATENATOR( QC_FILTER.out.filtered.map{it -> it[1]}.collect() ) NORMALIZE_TRANSFORM( SC__FILE_CONCATENATOR.out ) HVG_SELECTION( NORMALIZE_TRANSFORM.out ) - DIM_REDUCTION( HVG_SELECTION.out.scaled ) + DIM_REDUCTION_PCA( HVG_SELECTION.out.scaled ) + NEIGHBORHOOD_GRAPH( DIM_REDUCTION_PCA.out ) + DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out ) // Perform the clustering step w/o batch effect correction (for comparison matter) clusterIdentificationPreBatchEffectCorrection = CLUSTER_IDENTIFICATION( NORMALIZE_TRANSFORM.out, - DIM_REDUCTION.out.dimred_pca_tsne_umap, + DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, "Pre Batch Effect Correction" ) @@ -50,7 +54,7 @@ workflow harmony_base { BEC_HARMONY( NORMALIZE_TRANSFORM.out, // include only PCA since Harmony will correct this - DIM_REDUCTION.out.dimred_pca.map { it -> tuple(it[0], it[1]) }, + DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap.map { it -> tuple(it[0], it[1]) }, clusterIdentificationPreBatchEffectCorrection.marker_genes ) From 4d0b89a18b5825e1af734779c26fb7ffba5f4124 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 18:54:43 +0100 Subject: [PATCH 06/17] Finalize mnncorrect pipeline Create final merge reports Publish final loom Use neighborhood_graph (scanpy) in bbknn pipeline Add to docs to RtD --- docs/pipelines.rst | 12 ++++++++ workflows/mnncorrect.nf | 66 ++++++++++++++++++++++++++++------------- 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/docs/pipelines.rst b/docs/pipelines.rst index da4958cf..eff098d7 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -182,6 +182,18 @@ The output is a loom file with the results embedded. .. |Harmony Workflow| image:: https://raw.githubusercontent.com/vib-singlecell-nf/vsn-pipelines/master/assets/images/harmony.svg?sanitize=true +**mnncorrect** |mnncorrect| +----------------- + +.. |mnncorrect| image:: https://github.com/vib-singlecell-nf/vsn-pipelines/workflows/mnncorrect/badge.svg + +Runs the ``mnncorrect`` workflow (sample-specific filtering, merging of individual samples, normalization, log-transformation, HVG selection, PCA analysis, batch-effect correction (mnnCorrect), clustering, dimensionality reduction (t-SNE and UMAP)). +The output is a loom file with the results embedded. + +|mnnCorrect Workflow| + +.. |mnnCorrect Workflow| image:: https://raw.githubusercontent.com/vib-singlecell-nf/vsn-pipelines/master/assets/images/mnncorrect.svg?sanitize=true + Input Data Formats ******************* diff --git a/workflows/mnncorrect.nf b/workflows/mnncorrect.nf index 97cb7bd0..35b6b855 100644 --- a/workflows/mnncorrect.nf +++ b/workflows/mnncorrect.nf @@ -3,12 +3,19 @@ nextflow.preview.dsl=2 ////////////////////////////////////////////////////// // Import sub-workflows from the modules: +include '../src/utils/processes/utils.nf' params(params.sc.file_concatenator + params.global + params) + include QC_FILTER from '../src/scanpy/workflows/qc_filter.nf' params(params) -include SC__FILE_CONCATENATOR from '../src/utils/processes/utils.nf' params(params) include NORMALIZE_TRANSFORM from '../src/scanpy/workflows/normalize_transform.nf' params(params) +// include SC__SCANPY__ADJUSTMENT from '../src/scanpy/processes/adjust.nf' params(params) include HVG_SELECTION from '../src/scanpy/workflows/hvg_selection.nf' params(params) -include SC__SCANPY__ADJUSTMENT from '../src/scanpy/processes/adjust.nf' params(params) -include BEC_MNN_CORRECT from '../src/scanpy/workflows/bec_mnn_correct.nf' params(params) +include NEIGHBORHOOD_GRAPH from '../src/scanpy/workflows/neighborhood_graph.nf' params(params) +include DIM_REDUCTION_PCA from '../src/scanpy/workflows/dim_reduction_pca.nf' params(params + params.global) +include DIM_REDUCTION_TSNE_UMAP from '../src/scanpy/workflows/dim_reduction.nf' params(params + params.global) +// CLUSTER_IDENTIFICATION +include '../src/scanpy/processes/cluster.nf' params(params + params.global) +include '../src/scanpy/workflows/cluster_identification.nf' params(params + params.global) // Don't only import a specific process (the function needs also to be imported) +include BEC_MNNCORRECT from '../src/scanpy/workflows/bec_mnncorrect.nf' params(params) include SC__H5AD_TO_FILTERED_LOOM from '../src/utils/processes/h5adToLoom.nf' params(params) include FILE_CONVERTER from '../src/utils/workflows/fileConverter.nf' params(params) @@ -22,42 +29,61 @@ include SC__SCANPY__REPORT_TO_HTML from '../src/scanpy/processes/reports.nf' par workflow mnncorrect { - // run the pipeline + // Run the pipeline data = getTenXChannel( params.data.tenx.cellranger_outs_dir_path ).view() QC_FILTER( data ) // Remove concat SC__FILE_CONCATENATOR( QC_FILTER.out.filtered.map{it -> it[1]}.collect() ) NORMALIZE_TRANSFORM( SC__FILE_CONCATENATOR.out ) HVG_SELECTION( NORMALIZE_TRANSFORM.out ) - BEC_MNN_CORRECT( + DIM_REDUCTION_PCA( HVG_SELECTION.out.scaled ) + NEIGHBORHOOD_GRAPH( DIM_REDUCTION_PCA.out ) + DIM_REDUCTION_TSNE_UMAP( NEIGHBORHOOD_GRAPH.out ) + + // Perform the clustering step w/o batch effect correction (for comparison matter) + clusterIdentificationPreBatchEffectCorrection = CLUSTER_IDENTIFICATION( + NORMALIZE_TRANSFORM.out, + DIM_REDUCTION_TSNE_UMAP.out.dimred_tsne_umap, + "Pre Batch Effect Correction" + ) + + BEC_MNNCORRECT( NORMALIZE_TRANSFORM.out, - HVG_SELECTION.out.scaled + HVG_SELECTION.out.scaled, + clusterIdentificationPreBatchEffectCorrection.marker_genes ) - // SC__SCANPY__ADJUSTMENT( HVG_SELECTION.out.scaled ) - // conversion - //// convert h5ad to X (here we choose: loom format) + // Conversion + // Convert h5ad to X (here we choose: loom format) filteredloom = SC__H5AD_TO_FILTERED_LOOM( SC__FILE_CONCATENATOR.out ) scopeloom = FILE_CONVERTER( - BEC_MNN_CORRECT.out.data, + BEC_MNNCORRECT.out.data.groupTuple(), 'loom', SC__FILE_CONCATENATOR.out, ) - project = BEC_MNN_CORRECT.out.data.map { it -> it[0] } + project = CLUSTER_IDENTIFICATION.out.marker_genes.map { it -> it[0] } UTILS__GENERATE_WORKFLOW_CONFIG_REPORT( file(workflow.projectDir + params.utils.workflow_configuration.report_ipynb) ) - - // collect the reports: - ipynbs = project.combine(UTILS__GENERATE_WORKFLOW_CONFIG_REPORT.out) - .join(HVG_SELECTION.out.report) - .join(BEC_MNN_CORRECT.out.cluster_report) - .join(BEC_MNN_CORRECT.out.bbknn_report) - .map{ tuple( it[0], it.drop(1) ) } - // reporting: + // Collect the reports: + ipynbs = project.combine( + UTILS__GENERATE_WORKFLOW_CONFIG_REPORT.out + ).join( + HVG_SELECTION.out.report + ).join( + BEC_MNNCORRECT.out.cluster_report + ).combine( + BEC_MNNCORRECT.out.mnncorrect_report, + by: 0 + ).map { + tuple( it[0], it.drop(1) ) + } + // Reporting: + def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.sc.scanpy.clustering) ) SC__SCANPY__MERGE_REPORTS( ipynbs, - "merged_report" + "merged_report", + clusteringParams.isBenchmarkMode() ) SC__SCANPY__REPORT_TO_HTML(SC__SCANPY__MERGE_REPORTS.out) From 97e3b8cf8f225b7ef1985ff6e581aff370be134d Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 18:57:01 +0100 Subject: [PATCH 07/17] Add testing for mnncorrect pipeline through GitHub Actions Create yml config Create test config Add test profile to main nextflow.config --- .github/workflows/mnncorrect.yml | 35 ++++++++++++++++++++++++++++++++ conf/test__mnncorrect.config | 28 +++++++++++++++++++++++++ nextflow.config | 4 ++++ 3 files changed, 67 insertions(+) create mode 100644 .github/workflows/mnncorrect.yml create mode 100644 conf/test__mnncorrect.config diff --git a/.github/workflows/mnncorrect.yml b/.github/workflows/mnncorrect.yml new file mode 100644 index 00000000..8a4a8ee2 --- /dev/null +++ b/.github/workflows/mnncorrect.yml @@ -0,0 +1,35 @@ +name: mnncorrect + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + with: + submodules: true + - name: Install Nextflow + run: | + export NXF_VER='19.12.0-edge' + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Get sample data + run: | + mkdir testdata + wget https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/sample_data_tiny.tar.gz + tar xvf sample_data_tiny.tar.gz + cp -r sample_data testdata/sample1 + mv sample_data testdata/sample2 + - name: Run single_sample test + run: | + nextflow run ${GITHUB_WORKSPACE} -profile mnncorrect,test__mnncorrect,docker -entry mnncorrect -ansi-log false + cat .nextflow.log diff --git a/conf/test__mnncorrect.config b/conf/test__mnncorrect.config new file mode 100644 index 00000000..f1dfc916 --- /dev/null +++ b/conf/test__mnncorrect.config @@ -0,0 +1,28 @@ + +params { + global { + project_name = 'bbknn_CI' + } + data { + tenx { + cellranger_outs_dir_path = "testdata/*/outs/" + } + } + sc { + file_annotator { + metaDataFilePath = '' + } + scanpy { + filter { + cellFilterMinNGenes = 1 + } + dim_reduction { + pca { + dimReductionMethod = 'PCA' + nComps = 2 + } + } + } + } +} + diff --git a/nextflow.config b/nextflow.config index 04c08563..3b5c2970 100644 --- a/nextflow.config +++ b/nextflow.config @@ -199,6 +199,10 @@ profiles { includeConfig 'src/utils/conf/h5ad_concatenate.config' includeConfig 'conf/test__harmony.config' } + test__mnncorrect { + includeConfig 'src/utils/conf/h5ad_concatenate.config' + includeConfig 'conf/test__mnncorrect.config' + } } From a297fc0007df5488aba4e6a22d850f0397907e39 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 18:57:31 +0100 Subject: [PATCH 08/17] Update DAGs --- assets/images/mnncorrect.svg | 1541 +++++++++++++++++++++ assets/images/scenic.svg | 532 +++---- assets/images/scenic_multiruns.svg | 1388 ++++++++++--------- assets/images/single_sample.svg | 1261 +++++++++++------ assets/images/single_sample_scenic.svg | 1768 ++++++++++++++---------- 5 files changed, 4458 insertions(+), 2032 deletions(-) create mode 100644 assets/images/mnncorrect.svg diff --git a/assets/images/mnncorrect.svg b/assets/images/mnncorrect.svg new file mode 100644 index 00000000..b1182b72 --- /dev/null +++ b/assets/images/mnncorrect.svg @@ -0,0 +1,1541 @@ + + +pipeline_dag + + + +p0 + +Channel.from + + + +p1 + +view + + + +p0->p1 + + + + + +p2 + + + + +p1->p2 + + + + + +p3 + +Channel.fromPath + + + +p4 + +map + + + +p3->p4 + + + + + +p5 + +view + + + +p4->p5 + + +channel + + + +p6 + +mnncorrect:MNNCORRECT:QC_FILTER:SC__FILE_CONVERTER + + + +p5->p6 + + +data + + + +p7 + +mnncorrect:MNNCORRECT:QC_FILTER:SC__SCANPY__COMPUTE_QC_STATS + + + +p6->p7 + + + + + +p8 + +mnncorrect:MNNCORRECT:QC_FILTER:SC__SCANPY__GENE_FILTER + + + +p7->p8 + + + + + +p11 + +join + + + +p7->p11 + + + + + +p9 + +mnncorrect:MNNCORRECT:QC_FILTER:SC__SCANPY__CELL_FILTER + + + +p8->p9 + + + + + +p18 + +map + + + +p9->p18 + + + + + +p19 + +collect + + + +p18->p19 + + + + + +p10 + + + + +p10->p11 + + + + + +p15 + +mnncorrect:MNNCORRECT:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__GENERATE_DUAL_INPUT_REPORT + + + +p11->p15 + + +data + + + +p16 + +mnncorrect:MNNCORRECT:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__REPORT_TO_HTML + + + +p15->p16 + + + + + +p12 + + + + +p12->p15 + + +ipynb + + + +p13 + + + + +p13->p15 + + +reportTitle + + + +p14 + + + + +p14->p15 + + +isBenchmarkMode + + + +p17 + + + + +p16->p17 + + + + + +p20 + +mnncorrect:MNNCORRECT:SC__FILE_CONCATENATOR + + + +p19->p20 + + + + + +p21 + +mnncorrect:MNNCORRECT:NORMALIZE_TRANSFORM:SC__SCANPY__NORMALIZATION + + + +p20->p21 + + +rawFilteredData + + + +p94 + +mnncorrect:MNNCORRECT:SC__H5AD_TO_FILTERED_LOOM + + + +p20->p94 + + +rawFilteredData + + + +p103 + +combine + + + +p20->p103 + + +rawFilteredData + + + +p22 + +mnncorrect:MNNCORRECT:NORMALIZE_TRANSFORM:SC__SCANPY__DATA_TRANSFORMATION + + + +p21->p22 + + + + + +p23 + +mnncorrect:MNNCORRECT:HVG_SELECTION:SC__SCANPY__FEATURE_SELECTION + + + +p22->p23 + + +normalizedTransformedData + + + +p50 + +join + + + +p22->p50 + + +normalizedTransformedData + + + +p64 + +join + + + +p22->p64 + + +normalizedTransformedData + + + +p24 + +mnncorrect:MNNCORRECT:HVG_SELECTION:SC__SCANPY__FEATURE_SCALING + + + +p23->p24 + + + + + +p27 + +mnncorrect:MNNCORRECT:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT + + + +p24->p27 + + +data + + + +p30 + +map + + + +p24->p30 + + +data + + + +p52 + +map + + + +p24->p52 + + +data + + + +p28 + +mnncorrect:MNNCORRECT:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML + + + +p27->p28 + + + + + +p115 + +join + + + +p27->p115 + + + + + +p25 + + + + +p25->p27 + + +ipynb + + + +p26 + + + + +p26->p27 + + +reportTitle + + + +p29 + + + + +p28->p29 + + + + + +p31 + +mnncorrect:MNNCORRECT:DIM_REDUCTION_PCA:SC__SCANPY__DIM_REDUCTION__PCA + + + +p30->p31 + + +data + + + +p32 + +mnncorrect:MNNCORRECT:NEIGHBORHOOD_GRAPH:SC__SCANPY__NEIGHBORHOOD_GRAPH + + + +p31->p32 + + +data + + + +p33 + +map + + + +p32->p33 + + +data + + + +p34 + +mnncorrect:MNNCORRECT:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__TSNE + + + +p33->p34 + + + + + +p35 + +map + + + +p34->p35 + + + + + +p36 + +mnncorrect:MNNCORRECT:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__UMAP + + + +p35->p36 + + + + + +p37 + +map + + + +p36->p37 + + +data + + + +p43 + +map + + + +p36->p43 + + +data + + + +p40 + +mnncorrect:MNNCORRECT:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT + + + +p37->p40 + + +data + + + +p41 + +mnncorrect:MNNCORRECT:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML + + + +p40->p41 + + + + + +p38 + + + + +p38->p40 + + +ipynb + + + +p39 + + + + +p39->p40 + + +reportTitle + + + +p42 + + + + +p41->p42 + + + + + +p44 + +mnncorrect:MNNCORRECT:CLUSTER_IDENTIFICATION:SC__SCANPY__CLUSTERING + + + +p43->p44 + + + + + +p47 + +mnncorrect:MNNCORRECT:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT + + + +p44->p47 + + +data + + + +p44->p50 + + +data + + + +p48 + +mnncorrect:MNNCORRECT:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML + + + +p47->p48 + + + + + +p45 + + + + +p45->p47 + + +ipynb + + + +p46 + + + + +p46->p47 + + +reportTitle + + + +p49 + + + + +p48->p49 + + + + + +p51 + +mnncorrect:MNNCORRECT:CLUSTER_IDENTIFICATION:SC__SCANPY__MARKER_GENES + + + +p50->p51 + + + + + +p87 + +join + + + +p51->p87 + + +A + + + +p110 + +map + + + +p51->p110 + + +A + + + +p91 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__GENERATE_DUAL_INPUT_REPORT + + + +p87->p91 + + +data + + + +p53 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:SC__SCANPY__BATCH_EFFECT_CORRECTION + + + +p52->p53 + + + + + +p54 + +map + + + +p53->p54 + + +data + + + +p55 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:DIM_REDUCTION_PCA:SC__SCANPY__DIM_REDUCTION__PCA + + + +p54->p55 + + +data + + + +p56 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:NEIGHBORHOOD_GRAPH:SC__SCANPY__NEIGHBORHOOD_GRAPH + + + +p55->p56 + + +data + + + +p57 + +map + + + +p56->p57 + + +data + + + +p58 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:CLUSTER_IDENTIFICATION:SC__SCANPY__CLUSTERING + + + +p57->p58 + + + + + +p61 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT + + + +p58->p61 + + +data + + + +p58->p64 + + +data + + + +p62 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML + + + +p61->p62 + + +cluster_report + + + +p116 + +join + + + +p61->p116 + + +cluster_report + + + +p59 + + + + +p59->p61 + + +ipynb + + + +p60 + + + + +p60->p61 + + +reportTitle + + + +p63 + + + + +p62->p63 + + + + + +p65 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:CLUSTER_IDENTIFICATION:SC__SCANPY__MARKER_GENES + + + +p64->p65 + + + + + +p66 + +map + + + +p65->p66 + + + + + +p71 + +combine + + + +p66->p71 + + + + + +p72 + +map + + + +p71->p72 + + +data + + + +p67 + +Channel.from + + + +p68 + + + + +p67->p68 + + + + + +p69 + + + + +p69->p71 + + + + + +p70 + + + + +p70->p71 + + + + + +p73 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__TSNE + + + +p72->p73 + + + + + +p74 + +map + + + +p73->p74 + + + + + +p75 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__UMAP + + + +p74->p75 + + + + + +p76 + +map + + + +p75->p76 + + +data + + + +p82 + +map + + + +p75->p82 + + +data + + + +p96 + +groupTuple + + + +p75->p96 + + +data + + + +p79 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT + + + +p76->p79 + + +data + + + +p80 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML + + + +p79->p80 + + + + + +p77 + + + + +p77->p79 + + +ipynb + + + +p78 + + + + +p78->p79 + + +reportTitle + + + +p81 + + + + +p80->p81 + + + + + +p84 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:SC__PUBLISH_H5AD + + + +p82->p84 + + + + + +p85 + + + + +p84->p85 + + +B + + + +p83 + + + + +p83->p84 + + +fOutSuffix + + + +p86 + + + + +p86->p87 + + + + + +p92 + +mnncorrect:MNNCORRECT:BEC_MNNCORRECT:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__REPORT_TO_HTML + + + +p91->p92 + + + + + +p88 + + + + +p88->p91 + + +ipynb + + + +p89 + + + + +p89->p91 + + +reportTitle + + + +p90 + + + + +p90->p91 + + +isBenchmarkMode + + + +p93 + + + + +p92->p93 + + + + + +p95 + + + + +p94->p95 + + + + + +p97 + +branch + + + +p96->p97 + + +data + + + +p98 + +view + + + +p97->p98 + + + + + +p108 + +view + + + +p97->p108 + + + + + +p100 + +map + + + +p97->p100 + + + + + +p99 + + + + +p98->p99 + + + + + +p109 + + + + +p108->p109 + + + + + +p101 + + + + +p100->p101 + + + + + +p102 + + + + +p102->p103 + + + + + +p104 + +ifEmpty + + + +p103->p104 + + + + + +p105 + +mnncorrect:MNNCORRECT:FILE_CONVERTER:SC__H5AD_TO_LOOM + + + +p104->p105 + + + + + +p106 + +mnncorrect:MNNCORRECT:FILE_CONVERTER:COMPRESS_HDF5 + + + +p105->p106 + + + + + +p107 + + + + +p106->p107 + + + + + +p114 + +combine + + + +p110->p114 + + +project + + + +p114->p115 + + + + + +p111 + + + + +p112 + +mnncorrect:MNNCORRECT:UTILS__GENERATE_WORKFLOW_CONFIG_REPORT + + + +p111->p112 + + +ipynb + + + +p112->p114 + + + + + +p113 + + + + +p113->p114 + + + + + +p115->p116 + + + + + +p119 + +combine + + + +p116->p119 + + + + + +p120 + +map + + + +p119->p120 + + + + + +p117 + + + + +p117->p119 + + + + + +p118 + + + + +p118->p119 + + + + + +p123 + +mnncorrect:MNNCORRECT:SC__SCANPY__MERGE_REPORTS + + + +p120->p123 + + +ipynbs + + + +p124 + +mnncorrect:MNNCORRECT:SC__SCANPY__REPORT_TO_HTML + + + +p123->p124 + + + + + +p121 + + + + +p121->p123 + + +reportTitle + + + +p122 + + + + +p122->p123 + + +isBenchmarkMode + + + +p125 + + + + +p124->p125 + + + + + \ No newline at end of file diff --git a/assets/images/scenic.svg b/assets/images/scenic.svg index cd9c47be..39f1d54f 100644 --- a/assets/images/scenic.svg +++ b/assets/images/scenic.svg @@ -1,376 +1,416 @@ - + --> + pipeline_dag - + p0 - -Channel.from + +Channel.from p1 - + +view p0->p1 - - -runs + + p2 - -Channel.of + + + + +p1->p2 + + p3 - -view - - - -p2->p3 - - -filteredLoom - - - -p7 - -combine - - - -p2->p7 - - -filteredLoom + +Channel.from p4 - + p3->p4 - - + + +runs p5 - + +Channel.of - + + +p6 + +view + + -p5->p7 - - +p5->p6 + + +filteredLoom - - -p9 - -scenic:SCENIC_WF:GRNBOOST2_WITHOUT_DASK + + +p10 + +combine - + -p7->p9 - - +p5->p10 + + +filteredLoom - + -p6 - +p7 + - + p6->p7 - - + + - - -p14 - -scenic:SCENIC_WF:CISTARGET__MOTIF + + +p8 + - - -p9->p14 - - + + +p8->p10 + + - - -p19 - -scenic:SCENIC_WF:CISTARGET__TRACK + + +p12 + +scenic:SCENIC_WF:GRNBOOST2_WITHOUT_DASK - - -p9->p19 - - + + +p10->p12 + + - - -p8 - + + +p9 + - + -p8->p9 - - -tfs +p9->p10 + + - - -p21 - -scenic:SCENIC_WF:AUCELL__MOTIF + + +p17 + +scenic:SCENIC_WF:CISTARGET__MOTIF - - -p14->p21 - - + + +p12->p17 + + - - -p10 - -Channel.fromPath + + +p22 + +scenic:SCENIC_WF:CISTARGET__TRACK + + + +p12->p22 + + p11 - -collect + - + -p10->p11 - - - - - -p11->p14 - - -motifDB +p11->p12 + + +tfs - - -p12 - + + +p24 + +scenic:SCENIC_WF:AUCELL__MOTIF - - -p12->p14 - - -annotation + + +p17->p24 + + p13 - + +Channel.fromPath + + + +p14 + +collect - + p13->p14 - - -type - - - -p24 - -map + + - - -p21->p24 - - + + +p14->p17 + + +motifsDb p15 - -Channel.fromPath + + + + +p15->p17 + + +annotation p16 - -collect + - + -p15->p16 - - - - - -p16->p19 - - -trackDB +p16->p17 + + +type - - -p23 - -scenic:SCENIC_WF:AUCELL__TRACK + + +p27 + +map - - -p19->p23 - - + + +p24->p27 + + - + -p17 - - - - -p17->p19 - - -annotation +p18 + +Channel.fromPath - + -p18 - +p19 + +collect - + p18->p19 - - -type + + - - -p25 - -map + + +p19->p22 + + +tracksDb - - -p23->p25 - - + + +p26 + +scenic:SCENIC_WF:AUCELL__TRACK + + + +p22->p26 + + p20 - + + + + +p20->p22 + + +annotation - + + +p21 + + + -p20->p21 - - -type +p21->p22 + + +type - - -p27 - -join + + +p28 + +map - - -p24->p27 - - + + +p26->p28 + + - - -p22 - + + +p23 + - + -p22->p23 - - -type +p23->p24 + + +type - + + +p31 + +join + + + +p27->p31 + + + + -p26 - +p25 + - + p25->p26 - - + + +type - - -p28 + + +p29 + + + + +p28->p29 + + + + + +p32 scenic:SCENIC_WF:MERGE_MOTIF_TRACK_LOOMS - - -p27->p28 + + +p31->p32 - - -p29 + + +p30 + + + + +p30->p31 + + + + + +p33 scenic:SCENIC_WF:VISUALIZE - - -p28->p29 + + +p32->p33 - - -p30 + + +p34 scenic:SCENIC_WF:PUBLISH_LOOM - - -p29->p30 + + +p33->p34 - - -p31 + + +p35 - - -p30->p31 + + +p34->p35 diff --git a/assets/images/scenic_multiruns.svg b/assets/images/scenic_multiruns.svg index 13cc8e23..2b5b68b3 100644 --- a/assets/images/scenic_multiruns.svg +++ b/assets/images/scenic_multiruns.svg @@ -1,862 +1,990 @@ + --> pipeline_dag - + p0 - -Channel.from + +Channel.from p1 - + +view p0->p1 - - -runs + + p2 - -Channel.of + + + + +p1->p2 + + p3 - -view - - - -p2->p3 - - -filteredLoom - - - -p7 - -combine - - - -p2->p7 - - -filteredLoom - - - -p33 - -join - - - -p2->p33 - - -filteredLoom - - - -p41 - -join - - - -p2->p41 - - -filteredLoom - - - -p54 - -join - - - -p2->p54 - - -filteredLoom - - - -p62 - -join - - - -p2->p62 - - -filteredLoom + +Channel.from p4 - + p3->p4 - - + + +runs p5 - + +Channel.of + + + +p6 + +view - + -p5->p7 - - +p5->p6 + + +filteredLoom - - -p9 - -scenic:SCENIC_WF:GRNBOOST2_WITHOUT_DASK + + +p10 + +combine - + -p7->p9 - - +p5->p10 + + +filteredLoom - + + +p37 + +join + + + +p5->p37 + + +filteredLoom + + + +p47 + +join + + + +p5->p47 + + +filteredLoom + + + +p62 + +join + + + +p5->p62 + + +filteredLoom + + + +p72 + +join + + + +p5->p72 + + +filteredLoom + + -p6 - +p7 + - + p6->p7 - - + + - - -p14 - -scenic:SCENIC_WF:CISTARGET__MOTIF + + +p8 + - - -p9->p14 - - + + +p8->p10 + + - - -p19 - -scenic:SCENIC_WF:CISTARGET__TRACK + + +p12 + +scenic:SCENIC_WF:GRNBOOST2_WITHOUT_DASK - - -p9->p19 - - + + +p10->p12 + + - - -p8 - + + +p9 + - + -p8->p9 - - -tfs +p9->p10 + + - - -p21 - -scenic:SCENIC_WF:AUCELL__MOTIF - - - -p14->p21 - - -ctx + + +p17 + +scenic:SCENIC_WF:CISTARGET__MOTIF - - -p24 - -map + + +p12->p17 + + - - -p14->p24 - - -ctx + + +p22 + +scenic:SCENIC_WF:CISTARGET__TRACK - - -p10 - -Channel.fromPath + + +p12->p22 + + p11 - -collect + - + -p10->p11 - - +p11->p12 + + +tfs - - -p11->p14 - - -motifDB + + +p24 + +scenic:SCENIC_WF:AUCELL__MOTIF - - -p12 - + + +p17->p24 + + +ctx - - -p12->p14 - - -annotation + + +p27 + +map + + + +p17->p27 + + +ctx p13 - + +Channel.fromPath + + + +p14 + +collect - + p13->p14 - - -type - - - -p28 - -map + + - - -p21->p28 - - -auc + + +p14->p17 + + +motifsDb p15 - -Channel.fromPath + + + + +p15->p17 + + +annotation p16 - -collect + - + -p15->p16 - - +p16->p17 + + +type - - -p16->p19 - - -trackDB - - - -p23 - -scenic:SCENIC_WF:AUCELL__TRACK - - - -p19->p23 - - -ctx - - - -p45 - -map + + +p31 + +map - - -p19->p45 - - -ctx + + +p24->p31 + + +auc - + -p17 - - - - -p17->p19 - - -annotation +p18 + +Channel.fromPath - + -p18 - +p19 + +collect - + p18->p19 - - -type + + - - -p49 - -map + + +p19->p22 + + +tracksDb - - -p23->p49 - - -auc + + +p26 + +scenic:SCENIC_WF:AUCELL__TRACK + + + +p22->p26 + + +ctx + + + +p52 + +map + + + +p22->p52 + + +ctx p20 - + + + + +p20->p22 + + +annotation - + + +p21 + + + -p20->p21 - - -type +p21->p22 + + +type - - -p29 - -groupTuple + + +p56 + +map - - -p28->p29 - - + + +p26->p56 + + +auc - - -p22 - + + +p23 + - + -p22->p23 - - -type +p23->p24 + + +type - - -p50 - -groupTuple + + +p32 + +groupTuple - - -p49->p50 - - + + +p31->p32 + + p25 - -groupTuple + - - -p24->p25 - - + + +p25->p26 + + +type - - -p27 - -scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__MOTIF:AGGR_FEATURES + + +p57 + +groupTuple - - -p25->p27 - - + + +p56->p57 + + - + -p37 - -join +p28 + +groupTuple - - -p27->p37 - - + + +p27->p28 + + - - -p26 - + + +p30 + +scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__MOTIF:AGGR_FEATURES - - -p26->p27 - - -type + + +p28->p30 + + - - -p39 - -scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__MOTIF:FEATURES_TO_REGULONS + + +p42 + +join - - -p37->p39 - - + + +p30->p42 + + - + -p31 - -scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__MOTIF:AGGR_REGULONS +p29 + - - -p29->p31 - - + + +p29->p30 + + +type - - -p32 - + + +p44 + +scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__MOTIF:FEATURES_TO_REGULONS - - -p31->p32 - - + + +p42->p44 + + - - -p30 - + + +p34 + +scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__MOTIF:AGGR_REGULONS - - -p30->p31 - - -type + + +p32->p34 + + - + p35 - -scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__MOTIF:AUCELL + - + -p33->p35 - - +p34->p35 + + + + + +p33 + + + + +p33->p34 + + +type + + + +p39 + +scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__MOTIF:AUCELL + + + +p37->p39 + + - + p36 - + - + -p35->p36 - - - - - -p34 - - - - -p34->p35 - - -type +p36->p37 + + - + p40 - + p39->p40 - - + + - + p38 - + p38->p39 - - -type + + +type - + -p42 - -join +p41 + - + p41->p42 - - - - - -p44 - -scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__MOTIF:SAVE_TO_LOOM + + - - -p42->p44 - - - - + -p67 - -join +p45 + - - -p44->p67 - - + + +p44->p45 + + p43 - + - + p43->p44 - - -type + + +type - - -p68 - -scenic:SCENIC_WF:MERGE_MOTIF_TRACK_LOOMS + + +p49 + +join - - -p67->p68 - - + + +p47->p49 + + p46 - -groupTuple - - - -p45->p46 - - - - - -p48 - -scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__TRACK:AGGR_FEATURES + - - -p46->p48 - - + + +p46->p47 + + - + -p58 - -join +p51 + +scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__MOTIF:SAVE_TO_LOOM - - -p48->p58 - - + + +p49->p51 + + - + -p47 - +p48 + - - -p47->p48 - - -type + + +p48->p49 + + - - -p60 - -scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__TRACK:FEATURES_TO_REGULONS + + +p79 + +join - - -p58->p60 - - + + +p51->p79 + + - - -p52 - -scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__TRACK:AGGR_REGULONS + + +p50 + - - -p50->p52 - - + + +p50->p51 + + +type + + + +p80 + +scenic:SCENIC_WF:MERGE_MOTIF_TRACK_LOOMS + + + +p79->p80 + + p53 - + +groupTuple - + p52->p53 - - + + - - -p51 - + + +p55 + +scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__TRACK:AGGR_FEATURES - - -p51->p52 - - -type + + +p53->p55 + + - - -p56 - -scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__TRACK:AUCELL + + +p67 + +join - + -p54->p56 - - +p55->p67 + + - - -p57 - - - - -p56->p57 - - - - - -p55 - + + +p54 + - - -p55->p56 - - -type + + +p54->p55 + + +type - - -p61 - + + +p69 + +scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__TRACK:FEATURES_TO_REGULONS - - -p60->p61 - - + + +p67->p69 + + - + p59 - + +scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__TRACK:AGGR_REGULONS + + + +p57->p59 + + + + + +p60 + p59->p60 - - -type - - - -p63 - -join - - - -p62->p63 - - + + - - -p65 - -scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__TRACK:SAVE_TO_LOOM + + +p58 + - - -p63->p65 - - + + +p58->p59 + + +type - - -p66 - + + +p61 + - - -p65->p66 - - + + +p61->p62 + + - + p64 - + +scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__TRACK:AUCELL + + + +p62->p64 + + + + + +p65 + p64->p65 - - -type + + - - -p69 - -scenic:SCENIC_WF:VISUALIZE + + +p63 + - - -p68->p69 - - + + +p63->p64 + + +type + + + +p66 + + + + +p66->p67 + + - + p70 - -scenic:SCENIC_WF:PUBLISH_LOOM + p69->p70 - - + + + + + +p68 + + + + +p68->p69 + + +type + + + +p74 + +join + + + +p72->p74 + + - + p71 - - - - -p70->p71 - - + + + + +p71->p72 + + + + + +p76 + +scenic:SCENIC_WF:MULTI_RUNS_TO_LOOM__TRACK:SAVE_TO_LOOM + + + +p74->p76 + + + + + +p73 + + + + +p73->p74 + + + + + +p77 + + + + +p76->p77 + + + + + +p75 + + + + +p75->p76 + + +type + + + +p78 + + + + +p78->p79 + + + + + +p81 + +scenic:SCENIC_WF:VISUALIZE + + + +p80->p81 + + + + + +p82 + +scenic:SCENIC_WF:PUBLISH_LOOM + + + +p81->p82 + + + + + +p83 + + + + +p82->p83 + + \ No newline at end of file diff --git a/assets/images/single_sample.svg b/assets/images/single_sample.svg index b12ff999..ec82c87c 100644 --- a/assets/images/single_sample.svg +++ b/assets/images/single_sample.svg @@ -1,611 +1,1006 @@ - + --> + pipeline_dag - + p0 - -Channel.fromPath + +Channel.from p1 - -map + +view p0->p1 - - + + p2 - -single_sample:SINGLE_SAMPLE:QC_FILTER:SC__FILE_CONVERTER + p1->p2 - - -data + + p3 - -single_sample:SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__COMPUTE_QC_STATS - - - -p2->p3 - - + +Channel.fromPath p4 - -single_sample:SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__GENE_FILTER + +map - + p3->p4 - - -data1 - - - -p6 - -join - - - -p3->p6 - - -data1 + + p5 - -single_sample:SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__CELL_FILTER + +view - + p4->p5 - - + + +channel - + -p12 - -single_sample:SINGLE_SAMPLE:SC__H5AD_TO_FILTERED_LOOM +p6 + +map - - -p5->p12 - - -filtered + + +p5->p6 + + +data - - -p14 - -single_sample:SINGLE_SAMPLE:NORMALIZE_TRANSFORM:SC__SCANPY__NORMALIZATION + + +p9 + +single_sample:SINGLE_SAMPLE:single_sample_base:QC_FILTER:SC__FILE_CONVERTER - - -p5->p14 - - -filtered + + +p5->p9 + + +data - - -p13 - + + +p74 + +combine - - -p12->p13 - - + + +p6->p74 + + +samples + + + +p75 + +mix + + + +p74->p75 + + - + -p9 - -single_sample:SINGLE_SAMPLE:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__GENERATE_DUAL_INPUT_REPORT +p7 + + + + +p8 + +single_sample:SINGLE_SAMPLE:single_sample_base:UTILS__GENERATE_WORKFLOW_CONFIG_REPORT + + + +p7->p8 + + +ipynb - + -p6->p9 - - +p8->p74 + + p10 - -single_sample:SINGLE_SAMPLE:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__REPORT_TO_HTML + +single_sample:SINGLE_SAMPLE:single_sample_base:QC_FILTER:SC__SCANPY__COMPUTE_QC_STATS - -p9->p10 - - - - - -p43 - -mix - - - -p9->p43 - - - - - -p7 - - - - -p7->p9 - - -ipynb - - - -p8 - - - -p8->p9 - - -reportTitle +p9->p10 + + p11 - + +single_sample:SINGLE_SAMPLE:single_sample_base:QC_FILTER:SC__SCANPY__GENE_FILTER - + p10->p11 - - + + - - -p15 - -single_sample:SINGLE_SAMPLE:NORMALIZE_TRANSFORM:SC__SCANPY__DATA_TRANSFORMATION + + +p14 + +join - + -p14->p15 - - +p10->p14 + + - - -p16 - -single_sample:SINGLE_SAMPLE:HVG_SELECTION:SC__SCANPY__FEATURE_SELECTION + + +p12 + +single_sample:SINGLE_SAMPLE:single_sample_base:QC_FILTER:SC__SCANPY__CELL_FILTER - - -p15->p16 - - -data + + +p11->p12 + + - - -p17 - -single_sample:SINGLE_SAMPLE:HVG_SELECTION:SC__SCANPY__FEATURE_SCALING + + +p21 + +single_sample:SINGLE_SAMPLE:single_sample_base:NORMALIZE_TRANSFORM:SC__SCANPY__NORMALIZATION - - -p16->p17 - - + + +p12->p21 + + +rawFilteredData + + + +p53 + +single_sample:SINGLE_SAMPLE:single_sample_base:SC__H5AD_TO_FILTERED_LOOM + + + +p12->p53 + + +rawFilteredData + + + +p63 + +combine + + + +p12->p63 + + +rawFilteredData - - -p20 - -single_sample:SINGLE_SAMPLE:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT + + +p22 + +single_sample:SINGLE_SAMPLE:single_sample_base:NORMALIZE_TRANSFORM:SC__SCANPY__DATA_TRANSFORMATION - - -p17->p20 - - -data + + +p21->p22 + + - - -p23 - -single_sample:SINGLE_SAMPLE:DIM_REDUCTION:SC__SCANPY__DIM_REDUCTION__PCA + + +p13 + - - -p17->p23 - - -data + + +p13->p14 + + - + + +p18 + +single_sample:SINGLE_SAMPLE:single_sample_base:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__GENERATE_DUAL_INPUT_REPORT + + + +p14->p18 + + +data + + -p21 - -single_sample:SINGLE_SAMPLE:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML +p19 + +single_sample:SINGLE_SAMPLE:single_sample_base:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__REPORT_TO_HTML - - -p20->p21 - - + + +p18->p19 + + - - -p20->p43 - - + + +p18->p75 + + - + + +p15 + + + + +p15->p18 + + +ipynb + + -p18 - +p16 + - - -p18->p20 - - -ipynb + + +p16->p18 + + +reportTitle - + -p19 - +p17 + + + + +p17->p18 + + +isBenchmarkMode + + + +p20 + - + p19->p20 - - -reportTitle + + - - -p22 - + + +p23 + +single_sample:SINGLE_SAMPLE:single_sample_base:HVG_SELECTION:SC__SCANPY__FEATURE_SELECTION - - -p21->p22 - - + + +p22->p23 + + +normalizedTransformedData + + + +p51 + +join + + + +p22->p51 + + +normalizedTransformedData - + p24 - -single_sample:SINGLE_SAMPLE:DIM_REDUCTION:SC__SCANPY__DIM_REDUCTION__TSNE + +single_sample:SINGLE_SAMPLE:single_sample_base:HVG_SELECTION:SC__SCANPY__FEATURE_SCALING p23->p24 - - + + - - -p25 - -single_sample:SINGLE_SAMPLE:DIM_REDUCTION:SC__SCANPY__DIM_REDUCTION__UMAP - - - -p24->p25 - - - - + -p28 - -single_sample:SINGLE_SAMPLE:DIM_REDUCTION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT +p27 + +single_sample:SINGLE_SAMPLE:single_sample_base:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT - - -p25->p28 - - -data + + +p24->p27 + + +data - + -p31 - -single_sample:SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:SC__SCANPY__CLUSTERING +p30 + +map - - -p25->p31 - - -data + + +p24->p30 + + +data - + -p29 - -single_sample:SINGLE_SAMPLE:DIM_REDUCTION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML +p28 + +single_sample:SINGLE_SAMPLE:single_sample_base:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML - - -p28->p29 - - + + +p27->p28 + + - - -p28->p43 - - + + +p27->p75 + + - + -p26 - +p25 + - - -p26->p28 - - -ipynb + + +p25->p27 + + +ipynb - + -p27 - +p26 + - - -p27->p28 - - -reportTitle + + +p26->p27 + + +reportTitle - + -p30 - +p29 + - - -p29->p30 - - + + +p28->p29 + + - + -p34 - -single_sample:SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT - - - -p31->p34 - - -data - - - -p37 - -single_sample:SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:SC__SCANPY__MARKER_GENES - - - -p31->p37 - - -data - - - -p35 - -single_sample:SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML - - - -p34->p35 - - +p31 + +single_sample:SINGLE_SAMPLE:single_sample_base:DIM_REDUCTION_PCA:SC__SCANPY__DIM_REDUCTION__PCA - - -p34->p43 - - + + +p30->p31 + + +data p32 - + +single_sample:SINGLE_SAMPLE:single_sample_base:NEIGHBORHOOD_GRAPH:SC__SCANPY__NEIGHBORHOOD_GRAPH - - -p32->p34 - - -ipynb + + +p31->p32 + + +data p33 - + +map + + + +p32->p33 + + +data + + + +p34 + +single_sample:SINGLE_SAMPLE:single_sample_base:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__TSNE p33->p34 - - -reportTitle + + - + +p35 + +map + + + +p34->p35 + + + + + p36 - + +single_sample:SINGLE_SAMPLE:single_sample_base:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__UMAP p35->p36 - - + + - + -p38 - -single_sample:SINGLE_SAMPLE:SC__H5AD_TO_LOOM +p37 + +map + + + +p36->p37 + + +data + + + +p43 + +map + + + +p36->p43 + + +data - + + +p40 + +single_sample:SINGLE_SAMPLE:single_sample_base:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT + + -p37->p38 - - +p37->p40 + + +data - + p41 - -single_sample:SINGLE_SAMPLE:SC__PUBLISH_H5AD + +single_sample:SINGLE_SAMPLE:single_sample_base:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML - - -p37->p41 - - + + +p40->p41 + + + + + +p40->p75 + + + + + +p38 + + + + +p38->p40 + + +ipynb - + p39 - + - - -p38->p39 - - + + +p39->p40 + + +reportTitle - + p42 - + p41->p42 - - - - - -p40 - - - - -p40->p41 - - -fOutSuffix + + - + p44 - -groupTuple + +single_sample:SINGLE_SAMPLE:single_sample_base:CLUSTER_IDENTIFICATION:SC__SCANPY__CLUSTERING - + p43->p44 - - - - - -p46 - -single_sample:SINGLE_SAMPLE:SC__SCANPY__MERGE_REPORTS - - - -p44->p46 - - + + - + p47 - -single_sample:SINGLE_SAMPLE:SC__SCANPY__REPORT_TO_HTML + +single_sample:SINGLE_SAMPLE:single_sample_base:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT - - -p46->p47 - - + + +p44->p47 + + +data + + + +p48 + +single_sample:SINGLE_SAMPLE:single_sample_base:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML + + + +p47->p48 + + + + + +p47->p75 + + - + p45 - + - - -p45->p46 - - -reportTitle + + +p45->p47 + + +ipynb - + -p48 - +p46 + - + + +p46->p47 + + +reportTitle + + + +p49 + + + + +p48->p49 + + + + + +p52 + +single_sample:SINGLE_SAMPLE:single_sample_base:CLUSTER_IDENTIFICATION:SC__SCANPY__MARKER_GENES + + + +p51->p52 + + + + + +p50 + + + -p47->p48 - - +p50->p51 + + + + + +p55 + +groupTuple + + + +p52->p55 + + + + + +p71 + +single_sample:SINGLE_SAMPLE:single_sample_base:SC__PUBLISH_H5AD + + + +p52->p71 + + + + + +p56 + +branch + + + +p55->p56 + + +data + + + +p54 + + + + +p53->p54 + + + + + +p68 + +view + + + +p56->p68 + + + + + +p57 + +view + + + +p56->p57 + + + + + +p59 + +map + + + +p56->p59 + + + + + +p69 + + + + +p68->p69 + + + + + +p58 + + + + +p57->p58 + + + + + +p60 + + + + +p59->p60 + + + + + +p61 + + + + +p61->p63 + + + + + +p64 + +ifEmpty + + + +p63->p64 + + + + + +p62 + + + + +p62->p63 + + + + + +p65 + +single_sample:SINGLE_SAMPLE:single_sample_base:FILE_CONVERTER:SC__H5AD_TO_LOOM + + + +p64->p65 + + + + + +p66 + +single_sample:SINGLE_SAMPLE:single_sample_base:FILE_CONVERTER:COMPRESS_HDF5 + + + +p65->p66 + + + + + +p67 + + + + +p66->p67 + + + + + +p72 + + + + +p71->p72 + + + + + +p70 + + + + +p70->p71 + + +fOutSuffix + + + +p73 + + + + +p73->p74 + + + + + +p76 + +groupTuple + + + +p75->p76 + + + + + +p79 + +single_sample:SINGLE_SAMPLE:single_sample_base:SC__SCANPY__MERGE_REPORTS + + + +p76->p79 + + + + + +p80 + +single_sample:SINGLE_SAMPLE:single_sample_base:SC__SCANPY__REPORT_TO_HTML + + + +p79->p80 + + + + + +p77 + + + + +p77->p79 + + +reportTitle + + + +p78 + + + + +p78->p79 + + +isBenchmarkMode + + + +p81 + + + + +p80->p81 + + \ No newline at end of file diff --git a/assets/images/single_sample_scenic.svg b/assets/images/single_sample_scenic.svg index bbee3e5f..13816322 100644 --- a/assets/images/single_sample_scenic.svg +++ b/assets/images/single_sample_scenic.svg @@ -1,1038 +1,1360 @@ - + --> + pipeline_dag - + p0 - -Channel.from + +Channel.from p1 - + +view p0->p1 - - -runs + + p2 - -Channel.fromPath + + + + +p1->p2 + + p3 - -map - - - -p2->p3 - - + +Channel.from p4 - -single_sample_scenic:SINGLE_SAMPLE:QC_FILTER:SC__FILE_CONVERTER + p3->p4 - - -data + + +runs p5 - -single_sample_scenic:SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__COMPUTE_QC_STATS - - - -p4->p5 - - + +Channel.fromPath p6 - -single_sample_scenic:SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__GENE_FILTER + +map - + p5->p6 - - -data1 - - - -p8 - -join - - - -p5->p8 - - -data1 + + p7 - -single_sample_scenic:SINGLE_SAMPLE:QC_FILTER:SC__SCANPY__CELL_FILTER + +view - + p6->p7 - - + + +channel - + -p14 - -single_sample_scenic:SINGLE_SAMPLE:SC__H5AD_TO_FILTERED_LOOM +p8 + +map - - -p7->p14 - - -filtered + + +p7->p8 + + +data - - -p15 - -single_sample_scenic:SINGLE_SAMPLE:NORMALIZE_TRANSFORM:SC__SCANPY__NORMALIZATION + + +p11 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:QC_FILTER:SC__FILE_CONVERTER - - -p7->p15 - - -filtered + + +p7->p11 + + +data - - -p49 - -view + + +p74 + +combine - - -p14->p49 - - -filteredLoom + + +p8->p74 + + +samples - - -p53 - -combine + + +p75 + +mix - - -p14->p53 - - -filteredLoom + + +p74->p75 + + - + -p11 - -single_sample_scenic:SINGLE_SAMPLE:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__GENERATE_DUAL_INPUT_REPORT +p9 + + + + +p10 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:UTILS__GENERATE_WORKFLOW_CONFIG_REPORT - + + +p9->p10 + + +ipynb + + -p8->p11 - - +p10->p74 + + p12 - -single_sample_scenic:SINGLE_SAMPLE:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__REPORT_TO_HTML + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:QC_FILTER:SC__SCANPY__COMPUTE_QC_STATS - -p11->p12 - - - - - -p43 - -mix - - - -p11->p43 - - - - - -p9 - - - - -p9->p11 - - -ipynb - - - -p10 - - - -p10->p11 - - -reportTitle +p11->p12 + + p13 - + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:QC_FILTER:SC__SCANPY__GENE_FILTER - + p12->p13 - - - - - -p50 - - - - -p49->p50 - - + + - + p16 - -single_sample_scenic:SINGLE_SAMPLE:NORMALIZE_TRANSFORM:SC__SCANPY__DATA_TRANSFORMATION + +join - + -p15->p16 - - +p12->p16 + + - - -p17 - -single_sample_scenic:SINGLE_SAMPLE:HVG_SELECTION:SC__SCANPY__FEATURE_SELECTION + + +p14 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:QC_FILTER:SC__SCANPY__CELL_FILTER - - -p16->p17 - - -data + + +p13->p14 + + - - -p18 - -single_sample_scenic:SINGLE_SAMPLE:HVG_SELECTION:SC__SCANPY__FEATURE_SCALING + + +p23 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:NORMALIZE_TRANSFORM:SC__SCANPY__NORMALIZATION - - -p17->p18 - - + + +p14->p23 + + +rawFilteredData - - -p21 - -single_sample_scenic:SINGLE_SAMPLE:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT + + +p55 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:SC__H5AD_TO_FILTERED_LOOM - - -p18->p21 - - -data + + +p14->p55 + + +rawFilteredData + + + +p64 + +combine + + + +p14->p64 + + +rawFilteredData p24 - -single_sample_scenic:SINGLE_SAMPLE:DIM_REDUCTION:SC__SCANPY__DIM_REDUCTION__PCA + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:NORMALIZE_TRANSFORM:SC__SCANPY__DATA_TRANSFORMATION - - -p18->p24 - - -data + + +p23->p24 + + - + + +p15 + + + + +p15->p16 + + + + + +p20 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__GENERATE_DUAL_INPUT_REPORT + + + +p16->p20 + + +data + + -p22 - -single_sample_scenic:SINGLE_SAMPLE:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML +p21 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:QC_FILTER:GENERATE_DUAL_INPUT_REPORT:SC__SCANPY__REPORT_TO_HTML - - -p21->p22 - - + + +p20->p21 + + - - -p21->p43 - - + + +p20->p75 + + - + + +p17 + + + + +p17->p20 + + +ipynb + + -p19 - +p18 + - - -p19->p21 - - -ipynb + + +p18->p20 + + +reportTitle - + -p20 - +p19 + - - -p20->p21 - - -reportTitle + + +p19->p20 + + +isBenchmarkMode - + -p23 - +p22 + - - -p22->p23 - - + + +p21->p22 + + p25 - -single_sample_scenic:SINGLE_SAMPLE:DIM_REDUCTION:SC__SCANPY__DIM_REDUCTION__TSNE + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:HVG_SELECTION:SC__SCANPY__FEATURE_SELECTION - + p24->p25 - - + + +normalizedTransformedData + + + +p53 + +join + + + +p24->p53 + + +normalizedTransformedData p26 - -single_sample_scenic:SINGLE_SAMPLE:DIM_REDUCTION:SC__SCANPY__DIM_REDUCTION__UMAP + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:HVG_SELECTION:SC__SCANPY__FEATURE_SCALING - + p25->p26 - - + + p29 - -single_sample_scenic:SINGLE_SAMPLE:DIM_REDUCTION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT - + p26->p29 - - -data + + +data p32 - -single_sample_scenic:SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:SC__SCANPY__CLUSTERING + +map - + p26->p32 - - -data + + +data p30 - -single_sample_scenic:SINGLE_SAMPLE:DIM_REDUCTION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:HVG_SELECTION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML - + p29->p30 - - + + - - -p29->p43 - - + + +p29->p75 + + p27 - + - + p27->p29 - - -ipynb + + +ipynb p28 - + - + p28->p29 - - -reportTitle + + +reportTitle p31 - + - + p30->p31 - - + + - + -p35 - -single_sample_scenic:SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT +p33 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:DIM_REDUCTION_PCA:SC__SCANPY__DIM_REDUCTION__PCA + + + +p32->p33 + + +data - + + +p34 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:NEIGHBORHOOD_GRAPH:SC__SCANPY__NEIGHBORHOOD_GRAPH + + -p32->p35 - - -data +p33->p34 + + +data - - -p38 - -single_sample_scenic:SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:SC__SCANPY__MARKER_GENES + + +p35 + +map - - -p32->p38 - - -data + + +p34->p35 + + +data p36 - -single_sample_scenic:SINGLE_SAMPLE:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__TSNE - -p35->p36 - - - - - -p35->p43 - - - - - -p33 - - - - -p33->p35 - - -ipynb - - - -p34 - - - -p34->p35 - - -reportTitle +p35->p36 + + p37 - + +map - + p36->p37 - - + + + + + +p38 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:DIM_REDUCTION_TSNE_UMAP:SC__SCANPY__DIM_REDUCTION__UMAP + + + +p37->p38 + + p39 - -single_sample_scenic:SINGLE_SAMPLE:SC__H5AD_TO_LOOM + +map - + p38->p39 - - + + +data - - -p41 - -single_sample_scenic:SINGLE_SAMPLE:SC__PUBLISH_H5AD + + +p45 + +map - - -p38->p41 - - + + +p38->p45 + + +data - + -p78 - -join - - - -p39->p78 - - -scopeLoom - - - -p79 - -single_sample_scenic:SCENIC_append:APPEND_SCENIC_LOOM +p42 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT - - -p78->p79 - - + + +p39->p42 + + +data - + -p42 - +p43 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:DIM_REDUCTION_TSNE_UMAP:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML - - -p41->p42 - - + + +p42->p43 + + + + + +p42->p75 + + - + p40 - + - - -p40->p41 - - -fOutSuffix + + +p40->p42 + + +ipynb + + + +p41 + + + + +p41->p42 + + +reportTitle - + p44 - -groupTuple + - + p43->p44 - - + + p46 - -single_sample_scenic:SINGLE_SAMPLE:SC__SCANPY__MERGE_REPORTS + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:CLUSTER_IDENTIFICATION:SC__SCANPY__CLUSTERING + + + +p45->p46 + + + + + +p49 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__GENERATE_REPORT + + + +p46->p49 + + +data + + + +p50 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:CLUSTER_IDENTIFICATION:GENERATE_REPORT:SC__SCANPY__REPORT_TO_HTML - + -p44->p46 - - +p49->p50 + + + + + +p49->p75 + + p47 - -single_sample_scenic:SINGLE_SAMPLE:SC__SCANPY__REPORT_TO_HTML - - - -p46->p47 - - - - - -p45 - + - - -p45->p46 - - -reportTitle + + +p47->p49 + + +ipynb p48 - - - - -p47->p48 - - - - - -p55 - -single_sample_scenic:SCENIC_append:SCENIC:GRNBOOST2_WITHOUT_DASK + - - -p53->p55 - - + + +p48->p49 + + +reportTitle - + p51 - + - - -p51->p53 - - + + +p50->p51 + + + + + +p54 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:CLUSTER_IDENTIFICATION:SC__SCANPY__MARKER_GENES + + + +p53->p54 + + p52 - + - + p52->p53 - - + + - - -p60 - -single_sample_scenic:SCENIC_append:SCENIC:CISTARGET__MOTIF - - - -p55->p60 - - - - - -p65 - -single_sample_scenic:SCENIC_append:SCENIC:CISTARGET__TRACK - - - -p55->p65 - - - - + -p54 - - - - -p54->p55 - - -tfs +p56 + +groupTuple - - -p67 - -single_sample_scenic:SCENIC_append:SCENIC:AUCELL__MOTIF + + +p54->p56 + + - - -p60->p67 - - + + +p71 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:SC__PUBLISH_H5AD - - -p56 - -Channel.fromPath + + +p54->p71 + + p57 - -collect + +branch - + p56->p57 - - + + +data - - -p57->p60 - - -motifDB + + +p82 + +view + + + +p55->p82 + + +filteredLoom + + + +p84 + +view + + + +p55->p84 + + +filteredLoom + + + +p88 + +combine + + + +p55->p88 + + +filteredLoom + + + +p83 + + + + +p82->p83 + + p58 - + +view - - -p58->p60 - - -annotation + + +p57->p58 + + - + -p59 - - - - -p59->p60 - - -type - - - -p70 - -map +p68 + +view - - -p67->p70 - - + + +p57->p68 + + - + -p61 - -Channel.fromPath +p60 + +map - - -p62 - -collect + + +p57->p60 + + - - -p61->p62 - - + + +p59 + - - -p62->p65 - - -trackDB + + +p58->p59 + + - + p69 - -single_sample_scenic:SCENIC_append:SCENIC:AUCELL__TRACK + - + -p65->p69 - - +p68->p69 + + - - -p63 - + + +p61 + - - -p63->p65 - - -annotation + + +p60->p61 + + - - -p64 - + + +p65 + +ifEmpty - + p64->p65 - - -type + + - - -p71 - -map + + +p62 + - - -p69->p71 - - + + +p62->p64 + + + + + +p63 + + + + +p63->p64 + + p66 - + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:FILE_CONVERTER:SC__H5AD_TO_LOOM - - -p66->p67 - - -type + + +p65->p66 + + - - -p73 - -join + + +p67 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:FILE_CONVERTER:COMPRESS_HDF5 - - -p70->p73 - - + + +p66->p67 + + - + -p68 - +p105 + +join - - -p68->p69 - - -type + + +p67->p105 + + +scopeLoom + + + +p106 + +single_sample_scenic:SCENIC_append:APPEND_SCENIC_LOOM + + + +p105->p106 + + - + p72 - + - + p71->p72 - - + + - - -p74 - -single_sample_scenic:SCENIC_append:SCENIC:MERGE_MOTIF_TRACK_LOOMS + + +p70 + - - -p73->p74 - - + + +p70->p71 + + +fOutSuffix - + -p75 - -single_sample_scenic:SCENIC_append:SCENIC:VISUALIZE +p73 + - - -p74->p75 - - + + +p73->p74 + + - + p76 - -single_sample_scenic:SCENIC_append:SCENIC:PUBLISH_LOOM + +groupTuple - + p75->p76 - - + + - - -p77 - + + +p79 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:SC__SCANPY__MERGE_REPORTS - - -p76->p77 - - + + +p76->p79 + + - - -p82 - -single_sample_scenic:SCENIC_append:GENERATE_REPORT + + +p80 + +single_sample_scenic:SINGLE_SAMPLE:single_sample_base:SC__SCANPY__REPORT_TO_HTML - - -p79->p82 - - + + +p79->p80 + + - - -p83 - -single_sample_scenic:SCENIC_append:REPORT_TO_HTML + + +p77 + - - -p82->p83 - - + + +p77->p79 + + +reportTitle - + -p80 - +p78 + - + -p80->p82 - - -ipynb +p78->p79 + + +isBenchmarkMode - + p81 - - - - -p81->p82 - - -reportTitle + - - -p84 - - - - -p83->p84 - - + + +p80->p81 + + + + + +p85 + + + + +p84->p85 + + + + + +p86 + + + + +p86->p88 + + + + + +p90 + +single_sample_scenic:SCENIC_append:SCENIC:GRNBOOST2_WITHOUT_DASK + + + +p88->p90 + + + + + +p87 + + + + +p87->p88 + + + + + +p95 + +single_sample_scenic:SCENIC_append:SCENIC:CISTARGET__MOTIF + + + +p90->p95 + + + + + +p89 + + + + +p89->p90 + + +tfs + + + +p97 + +single_sample_scenic:SCENIC_append:SCENIC:AUCELL__MOTIF + + + +p95->p97 + + + + + +p91 + +Channel.fromPath + + + +p92 + +collect + + + +p91->p92 + + + + + +p92->p95 + + +motifsDb + + + +p93 + + + + +p93->p95 + + +annotation + + + +p94 + + + + +p94->p95 + + +type + + + +p98 + +map + + + +p97->p98 + + + + + +p96 + + + + +p96->p97 + + +type + + + +p99 + +single_sample_scenic:SCENIC_append:SCENIC:VISUALIZE + + + +p98->p99 + + + + + +p100 + +single_sample_scenic:SCENIC_append:SCENIC:PUBLISH_LOOM + + + +p99->p100 + + + + + +p102 + +view + + + +p99->p102 + + + + + +p101 + + + + +p100->p101 + + + + + +p103 + + + + +p102->p103 + + +scenicLoom + + + +p104 + + + + +p104->p105 + + + + + +p109 + +single_sample_scenic:SCENIC_append:GENERATE_REPORT + + + +p106->p109 + + + + + +p110 + +single_sample_scenic:SCENIC_append:REPORT_TO_HTML + + + +p109->p110 + + + + + +p107 + + + + +p107->p109 + + +ipynb + + + +p108 + + + + +p108->p109 + + +reportTitle + + + +p111 + + + + +p110->p111 + + \ No newline at end of file From 07e3f73bbc51365612988ef90f4d7216a6d8ae11 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 18:58:35 +0100 Subject: [PATCH 09/17] Fix bug cluster_markers row attributes should use all genes from raw filtered matrix --- src/utils/bin/h5ad_to_loom.py | 14 +++++++------- src/utils/workflows/fileConverter.nf | 18 ++++++------------ 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/utils/bin/h5ad_to_loom.py b/src/utils/bin/h5ad_to_loom.py index b2604b30..4520c758 100755 --- a/src/utils/bin/h5ad_to_loom.py +++ b/src/utils/bin/h5ad_to_loom.py @@ -270,7 +270,7 @@ def read_h5ad(file_path, backed='r'): ################## row_attrs = { - "Gene": np.array(adata.raw.var.index) + "Gene": np.array(raw_filtered_adata.var.index) } # CLUSTER MARKERS @@ -281,15 +281,15 @@ def read_h5ad(file_path, backed='r'): # Initialize cluster_markers = pd.DataFrame( - index=adatas[adata_idx].raw.var.index, + index=raw_filtered_adata.var.index, columns=[str(x) for x in np.arange(num_clusters)] ).fillna(0, inplace=False) cluster_markers_avg_logfc = pd.DataFrame( - index=adatas[adata_idx].raw.var.index, + index=raw_filtered_adata.var.index, columns=[str(x) for x in np.arange(num_clusters)] ).fillna(0, inplace=False) cluster_markers_pval = pd.DataFrame( - index=adatas[adata_idx].raw.var.index, + index=raw_filtered_adata.var.index, columns=[str(x) for x in np.arange(num_clusters)] ).fillna(0, inplace=False) @@ -312,12 +312,12 @@ def read_h5ad(file_path, backed='r'): deg_genes_mask ) gene_names = adatas[adata_idx].uns['rank_genes_groups']['names'][i][sig_and_deg_genes_mask] - cluster_markers.loc[gene_names, i] = 1 - cluster_markers_avg_logfc.loc[gene_names, i] = np.around( + cluster_markers.loc[np.in1d(cluster_markers.index, gene_names), i] = 1 + cluster_markers_avg_logfc.loc[np.in1d(cluster_markers.index, gene_names), i] = np.around( adatas[adata_idx].uns['rank_genes_groups']['logfoldchanges'][i][sig_and_deg_genes_mask], decimals=6 ) - cluster_markers_pval.loc[gene_names, i] = np.around( + cluster_markers_pval.loc[np.in1d(cluster_markers.index, gene_names), i] = np.around( adatas[adata_idx].uns['rank_genes_groups']['pvals_adj'][i][sig_and_deg_genes_mask], decimals=6 ) diff --git a/src/utils/workflows/fileConverter.nf b/src/utils/workflows/fileConverter.nf index db0bc7a0..21007cbf 100644 --- a/src/utils/workflows/fileConverter.nf +++ b/src/utils/workflows/fileConverter.nf @@ -41,19 +41,15 @@ workflow FILE_CONVERTER { if(it[1].size() > 1) { """ ------------------------------------------------------------------ -\u001B[32m -Aggregating multiple .h5ad files to ${it[1][0].baseName}.loom -(w/ additional compression)... -\u001B[0m +\u001B[32m Aggregating multiple .h5ad files to ${it[1][0].baseName}.loom +(w/ additional compression)...\u001B[0m ------------------------------------------------------------------ """ } else { """ ------------------------------------------------------------------ -\u001B[32m -Converting ${it[1][0].baseName}.h5ad to ${it[1][0].baseName}.loom -(w/ additional compression)... -\u001B[0m +\u001B[32m Converting ${it[1][0].baseName}.h5ad to ${it[1][0].baseName}.loom +(w/ additional compression)...\u001B[0m ------------------------------------------------------------------ """ } @@ -67,10 +63,8 @@ Converting ${it[1][0].baseName}.h5ad to ${it[1][0].baseName}.loom convert.none.view { """ ------------------------------------------------------------------ -\u001B[31m -Aborting conversion of ${it[1]} to ${it[1].baseName}.loom -(not implemented) -\u001B[0m +\u001B[31m Aborting conversion of ${it[1]} to ${it[1].baseName}.loom +(not implemented) \u001B[0m ------------------------------------------------------------------ """ } From 2889005e83c3d53a22e860eb2f6d4bea1818a0b3 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 18:59:04 +0100 Subject: [PATCH 10/17] Inform user on seed set by default --- main.nf | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/main.nf b/main.nf index 4298d816..d11cfdb0 100644 --- a/main.nf +++ b/main.nf @@ -2,6 +2,18 @@ import static groovy.json.JsonOutput.* nextflow.preview.dsl=2 +if(!params.global.containsKey('seed')) { + params.seed = workflow.manifest.version.replaceAll("\\.","").toInteger() + + Channel.from('').view { + """ +------------------------------------------------------------------ +\u001B[32m No seed detected in the config \u001B[0m +\u001B[32m To ensure reproducibility the seed has been set to ${params.seed} \u001B[0m +------------------------------------------------------------------ + """ + } +} // run multi-sample with bbknn, output a scope loom file workflow bbknn { From f4397719e7b2ebff92fc93f07b3827fc067a4d1f Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 19:10:19 +0100 Subject: [PATCH 11/17] Update pcacv, scanpy, scenic #43 --- src/pcacv | 2 +- src/scanpy | 2 +- src/scenic | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pcacv b/src/pcacv index 0a607611..d29f877a 160000 --- a/src/pcacv +++ b/src/pcacv @@ -1 +1 @@ -Subproject commit 0a6076118b999ffe52ae835e79a99f134bdbc60b +Subproject commit d29f877a786af734c09c1cb622be1e0208fcd471 diff --git a/src/scanpy b/src/scanpy index 3e387387..a2dee3f9 160000 --- a/src/scanpy +++ b/src/scanpy @@ -1 +1 @@ -Subproject commit 3e387387427bd686c3ee8219733cd8fff7041a80 +Subproject commit a2dee3f97836de8ed872b8bb470172447bb71b8f diff --git a/src/scenic b/src/scenic index b0f5249f..edc5ca7e 160000 --- a/src/scenic +++ b/src/scenic @@ -1 +1 @@ -Subproject commit b0f5249f9c4a3ae209dd1d3a5ae08d873145190c +Subproject commit edc5ca7e0a2194991fa03fb6ead8af36fc807bcf From 981677da0f33ccf2eee05e296b96ee1f19a82af3 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 20:24:09 +0100 Subject: [PATCH 12/17] Implement #96 for single_sample pipeline Create configs Rename h5ad channel to file channel since the implementation is generic and will work for csv and tsv Create the (tsv|csv)_single_samples pipelines in main.nf Add data profiles for tsv and csv input data --- main.nf | 24 +++++++++++++++++++++++- nextflow.config | 6 ++++++ src/channels/conf/csv.config | 13 +++++++++++++ src/channels/conf/tsv.config | 13 +++++++++++++ src/channels/{h5ad.nf => file.nf} | 10 +--------- src/utils/processes/files.nf | 12 +++++++++++- 6 files changed, 67 insertions(+), 11 deletions(-) create mode 100644 src/channels/conf/csv.config create mode 100644 src/channels/conf/tsv.config rename src/channels/{h5ad.nf => file.nf} (61%) diff --git a/main.nf b/main.nf index d11cfdb0..8fdc3dff 100644 --- a/main.nf +++ b/main.nf @@ -112,7 +112,7 @@ workflow single_sample_cellranger { workflow h5ad_single_sample { - include getChannel as getH5ADChannel from './src/channels/h5ad' params(params) + include getChannel as getH5ADChannel from './src/channels/file' params(params) include single_sample as SINGLE_SAMPLE from './workflows/single_sample' params(params) data = getH5ADChannel( params.data.h5ad.file_paths, @@ -121,6 +121,28 @@ workflow h5ad_single_sample { } +workflow tsv_single_sample { + + include getChannel as getTSVChannel from './src/channels/file' params(params) + include single_sample as SINGLE_SAMPLE from './workflows/single_sample' params(params) + data = getTSVChannel( + params.data.tsv.file_paths, + params.data.tsv.suffix + ).view() | SINGLE_SAMPLE + +} + +workflow csv_single_sample { + + include getChannel as getCSVChannel from './src/channels/file' params(params) + include single_sample as SINGLE_SAMPLE from './workflows/single_sample' params(params) + data = getCSVChannel( + params.data.csv.file_paths, + params.data.csv.suffix + ).view() | SINGLE_SAMPLE + +} + workflow star { diff --git a/nextflow.config b/nextflow.config index 3b5c2970..c7ffc315 100644 --- a/nextflow.config +++ b/nextflow.config @@ -125,6 +125,12 @@ profiles { h5ad { includeConfig 'src/channels/conf/h5ad.config' } + tsv { + includeConfig 'src/channels/conf/tsv.config' + } + csv { + includeConfig 'src/channels/conf/csv.config' + } sra { includeConfig 'src/channels/conf/sra.config' includeConfig 'src/utils/conf/sra_metadata.config' diff --git a/src/channels/conf/csv.config b/src/channels/conf/csv.config new file mode 100644 index 00000000..5ebc4b09 --- /dev/null +++ b/src/channels/conf/csv.config @@ -0,0 +1,13 @@ +params { + data { + csv { + file_paths = '' + suffix = '.csv' + } + } + sc { + file_converter { + iff = 'csv' + } + } +} diff --git a/src/channels/conf/tsv.config b/src/channels/conf/tsv.config new file mode 100644 index 00000000..406ae44c --- /dev/null +++ b/src/channels/conf/tsv.config @@ -0,0 +1,13 @@ +params { + data { + h5ad { + file_paths = '' + suffix = '.tsv' + } + } + sc { + file_converter { + iff = 'tsv' + } + } +} diff --git a/src/channels/h5ad.nf b/src/channels/file.nf similarity index 61% rename from src/channels/h5ad.nf rename to src/channels/file.nf index 09127524..3b678bd2 100644 --- a/src/channels/h5ad.nf +++ b/src/channels/file.nf @@ -1,14 +1,6 @@ nextflow.preview.dsl=2 -def extractSample(path, suffix) { - if(!path.endsWith(".h5ad")) - throw new Exception("Wrong channel used for data: "+ path) - // Extract the sample name based on the given path and on the given suffix - suffix = suffix.replace(".","\\.") - pattern = /(.+)\/(.+)${suffix}/ - (full, parentDir, id) = (path =~ pattern)[0] - return id -} +include '../utils/processes/files.nf' workflow getChannel { diff --git a/src/utils/processes/files.nf b/src/utils/processes/files.nf index 431c39ba..8c6bfd41 100644 --- a/src/utils/processes/files.nf +++ b/src/utils/processes/files.nf @@ -1,4 +1,14 @@ def getBaseName(file) { (full, filename, process, ext) = ( file.getName() =~ /(.+)\.SC(.+)\.(.+)/)[0] return filename -} \ No newline at end of file +} + +def extractSample(path, suffix) { + if(!path.endsWith(".h5ad")) + throw new Exception("Wrong channel used for data: "+ path) + // Extract the sample name based on the given path and on the given suffix + suffix = suffix.replace(".","\\.") + pattern = /(.+)\/(.+)${suffix}/ + (full, parentDir, id) = (path =~ pattern)[0] + return id +} From 4baa4613e88d8e34469e747cf3fe1d5f9c8179e0 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 21:06:38 +0100 Subject: [PATCH 13/17] Improve renaming samples when downloading SRA --- src/utils/workflows/downloadFromSRA.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/utils/workflows/downloadFromSRA.nf b/src/utils/workflows/downloadFromSRA.nf index 905b2cc8..62c978bd 100644 --- a/src/utils/workflows/downloadFromSRA.nf +++ b/src/utils/workflows/downloadFromSRA.nf @@ -68,8 +68,11 @@ workflow DOWNLOAD_FROM_SRA { header:true, sep: '\t' ).map { - // Replace all special characters by underscores - row -> tuple(row.run_accession, row.sample_name.replaceAll("[, ]", "_")) + // Remove ending characters (])), all special characters ([]()), /) by underscores + row -> tuple( + row.run_accession, + row.sample_name​.replaceAll("[\\])]\$","")​​​​​​​​​​​​.​replaceAll("[)(\\[\\], /]","_")​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​​ + ) } // Download and compress all the SRA runs defined in the metadata data = DOWNLOAD_FASTQS_FROM_SRA_ACC_ID( From 8190b49ac204d489b8f3a0969d0232ab8188c552 Mon Sep 17 00:00:00 2001 From: dweemx Date: Fri, 14 Feb 2020 21:09:16 +0100 Subject: [PATCH 14/17] Add docs to RtT Update features page with a new section "setting the seed" Update pipelines page with a new sections TSV, CSV --- docs/features.rst | 33 +++++++++++++++++++++++++++------ docs/pipelines.rst | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/docs/features.rst b/docs/features.rst index d4aae019..449d378b 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -1,10 +1,31 @@ Features ========= -Change log fold change and FDR thresholds for markers stored in SCope loom --------------------------------------------------------------------------- +Set the seed +------------ +Some steps in the pipelines are nondeterministic. To be able that the results are reproducible in time, by default a seed is set to: -By default, log fold change and FDR thresholds are set to 0 and 0.05 respectively. +.. code:: groovy + + workflow.manifest.version.replaceAll("\\.","").toInteger() + +The seed is a number derived from the the version of the pipeline used at the time of the analysis run. +To override the seed (integer) you have edit the nextflow.config file with: + +.. code:: groovy + + params { + global { + seed = [your-custom-seed] + } + } + +This filter will only be applied on the final loom file of the VSN-Pipelines. All the intermediate files prior to the loom file will still contain all of them the markers. + +Change log fold change (logFC) and false discovery rate (FDR) thresholds for the marker genes stored in the final SCope loom +---------------------------------------------------------------------------------------------------------------------------- + +By default, the logFC and FDR thresholds are set to 0 and 0.05 respectively. If you want to change those thresholds applied on the markers genes, edit the ``nextflow.config`` with the following entries, .. code:: groovy @@ -22,8 +43,8 @@ If you want to change those thresholds applied on the markers genes, edit the `` This filter will only be applied on the final loom file of the VSN-Pipelines. All the intermediate files prior to the loom file will still contain all of them the markers. -Select the optimal number of principal components -------------------------------------------------- +Automated selection of the optimal number of principal components +----------------------------------------------------------------- When generating the config using ``nextflow config`` (see above), add the ``pcacv`` profile. @@ -87,7 +108,7 @@ The latest version only implements this feature for the following pipelines: - ``single_sample`` - ``bbknn`` -Since ``v0.9.0``, it is possible to explore several combinations of parameters. The current version (``v0.9.0``) of the VSN-Pipelines allows to explore the following parameters: +Since ``v0.9.0``, it is possible to explore several combinations of parameters. The latest version of the VSN-Pipelines allows to explore the following parameters: - ``params.sc.scanpy.clustering`` diff --git a/docs/pipelines.rst b/docs/pipelines.rst index eff098d7..fbd958a0 100644 --- a/docs/pipelines.rst +++ b/docs/pipelines.rst @@ -265,3 +265,49 @@ In the generated .config file, make sure the ``file_paths`` parameter is set wit Make sure that ``sc.file_converter.iff`` is set to ``h5ad``. Currently H5AD input is only implemented in the ``h5ad_single_sample`` entry point. + +TSV +--- +:: + + -profiles tsv + + +In the generated .config file, make sure the ``file_paths`` parameter is set with the paths to the ``.tsv`` files:: + + [...] + h5ad { + file_paths = "data/1k_pbmc_v*_chemistry_SUFFIX.SC__FILE_CONVERTER.tsv" + suffix = "_SUFFIX.SC__FILE_CONVERTER.tsv" + } + [...] + +- The ``suffix`` parameter is used to infer the sample name from the file paths (it is removed from the input file path to derive a sample name). +- The ``file_paths`` accepts glob patterns and also comma separated paths. + +Make sure that ``sc.file_converter.iff`` is set to ``tsv``. + +Currently H5AD input is only implemented in the ``tsv_single_sample`` entry point. + +CSV +--- +:: + + -profiles csv + + +In the generated .config file, make sure the ``file_paths`` parameter is set with the paths to the ``.csv`` files:: + + [...] + h5ad { + file_paths = "data/1k_pbmc_v*_chemistry_SUFFIX.SC__FILE_CONVERTER.csv" + suffix = "_SUFFIX.SC__FILE_CONVERTER.csv" + } + [...] + +- The ``suffix`` parameter is used to infer the sample name from the file paths (it is removed from the input file path to derive a sample name). +- The ``file_paths`` accepts glob patterns and also comma separated paths. + +Make sure that ``sc.file_converter.iff`` is set to ``csv``. + +Currently H5AD input is only implemented in the ``csv_single_sample`` entry point. \ No newline at end of file From cab1b734872f26dd0edb9a92bcc95a39ceccb285 Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 17 Feb 2020 13:09:34 +0100 Subject: [PATCH 15/17] Add workflow and profiles for vib-singlecell-nf/cellranger#1 Update cellranger tool --- main.nf | 7 +++++++ nextflow.config | 10 ++++++++++ src/cellranger | 2 +- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 8fdc3dff..af701152 100644 --- a/main.nf +++ b/main.nf @@ -101,6 +101,13 @@ workflow cellranger { } +workflow cellranger_metadata { + + include CELLRANGER_COUNT_WITH_METADATA from './src/cellranger/workflows/cellRangerCountWithMetadata' params(params) + CELLRANGER_COUNT_WITH_METADATA(file(params.sc.cellranger.count.metadata)) + +} + // runs mkfastq, CellRanger count, then single_sample: workflow single_sample_cellranger { diff --git a/nextflow.config b/nextflow.config index c7ffc315..ba77218d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -114,9 +114,19 @@ profiles { includeConfig 'src/star/star.config' includeConfig 'src/dropletutils/dropletutils.config' } + cellranger { includeConfig 'src/cellranger/cellranger.config' } + cellranger_count { + includeConfig 'src/cellranger/conf/base.config' + includeConfig 'src/cellranger/conf/count.config' + } + cellranger_count_metadata { + includeConfig 'src/cellranger/conf/base.config' + includeConfig 'src/cellranger/conf/count.config' + includeConfig 'src/cellranger/conf/count_metadata.config' + } // data profiles tenx { diff --git a/src/cellranger b/src/cellranger index 2e473b88..6449cdbf 160000 --- a/src/cellranger +++ b/src/cellranger @@ -1 +1 @@ -Subproject commit 2e473b884e2a7b7e3a92b30eff80aed5a5d61fa1 +Subproject commit 6449cdbf4693cdb2619c60dacf0fb01b8ad7fbf7 From 68d5e2ac2894e73e5594007f5725ba15826bf497 Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 17 Feb 2020 13:43:42 +0100 Subject: [PATCH 16/17] Update scenic tool Fix docker formet bug in scenic multiruns configs --- src/scenic | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scenic b/src/scenic index edc5ca7e..c66acabe 160000 --- a/src/scenic +++ b/src/scenic @@ -1 +1 @@ -Subproject commit edc5ca7e0a2194991fa03fb6ead8af36fc807bcf +Subproject commit c66acabee47f373f65c75b754174d50cb5140f19 From 74cdd7dff31370efe9319d5ac0150ec79a192ad2 Mon Sep 17 00:00:00 2001 From: dweemx Date: Mon, 17 Feb 2020 13:51:16 +0100 Subject: [PATCH 17/17] Bump version from 0.10.0 to 0.11.0 --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index ba77218d..9814a0d7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,7 +3,7 @@ manifest { name = 'vib-singlecell-nf/vsn-pipelines' description = 'A repository of pipelines for single-cell data in Nextflow DSL2' homePage = 'https://github.com/vib-singlecell-nf/vsn-pipelines' - version = '0.10.0' + version = '0.11.0' mainScript = 'main.nf' defaultBranch = 'master' nextflowVersion = '!19.12.0-edge' // with ! prefix, stop execution if current version does not match required version.