Skip to content

Commit

Permalink
Param renames, adapted workflow to do the correct checks; fixed a bug…
Browse files Browse the repository at this point in the history
… in the gprofiler2 module. It is now the same as that in my nf-core/modules PR but I still need to 'officially' install the updated module once the modules PR is merged
  • Loading branch information
WackerO committed Dec 8, 2023
1 parent 8519c11 commit a1fcc1c
Show file tree
Hide file tree
Showing 11 changed files with 50 additions and 49 deletions.
12 changes: 7 additions & 5 deletions assets/differentialabundance_report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ params:
report_author: NULL,
report_description: NULL,
report_scree: NULL
gene_set_files: NULL
gene_sets_files: NULL
report_round_digits: NULL
observations_type: NULL
observations: NULL # GSE156533.samplesheet.csv
Expand Down Expand Up @@ -145,7 +145,7 @@ params:
gprofiler2_sources: NULL
gprofiler2_evcodes: NULL
gprofiler2_max_qval: NULL
gprofiler2_gost_token: NULL
gprofiler2_token: NULL
gprofiler2_background_file: NULL
gprofiler2_background_column: NULL
gprofiler2_domain_scope: NULL
Expand Down Expand Up @@ -894,7 +894,7 @@ if (any(unlist(params[paste0(possible_gene_set_methods, '_run')]))){
if (unlist(params[paste0(gene_set_method, '_run')])){
cat("\n#### ", toupper(gene_set_method) ," {.tabset}\n")
if (gene_set_method == 'gsea') {
for (gmt_file in simpleSplit(params$gene_set_files)) {
for (gmt_file in simpleSplit(params$gene_sets_files)) {
gmt_name <- basename(tools::file_path_sans_ext(gmt_file))
cat("\n##### ", gmt_name ," {.tabset}\n")
Expand All @@ -911,8 +911,10 @@ if (any(unlist(params[paste0(possible_gene_set_methods, '_run')]))){
} else if (gene_set_method == 'gprofiler2') {
enrichment_files <- grep("gprofiler2", list.files(params$input_dir), value=T, fixed=T)
tsv_files <- grep(".tsv", enrichment_files, fixed=T)
if (length(tsv_files)) {
tsv_files <- grep("all_enriched_pathways.tsv", enrichment_files, value=T, fixed=T)
# Make sure to grab only non-empty files
if (length(tsv_files) && any(file.size(tsv_files) != 0L)) {
cat(paste0("\nThis section contains the results tables of the pathway analysis which was done with the R package gprofiler2. The differential fraction is the number of differential genes in a pathway divided by that pathway's size, i.e. the number of genes annotated for the pathway.",
ifelse(params$gprofiler2_significant, paste0(" Enrichment was only considered if significant, i.e. adjusted p-value <= ", params$gprofiler2_max_qval, "."), "Enrichment was also considered if not significant."), "\n"))
Expand Down
3 changes: 2 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,8 @@ process {
"--palette_name \"${params.gprofiler2_palette_name}\"",
((meta.blocking == null) ? '' : "--blocking_variables $meta.blocking"),
((params.differential_feature_id_column == null) ? '' : "--de_id_column \"${params.differential_feature_id_column}\""),
((params.gprofiler2_gost_token == null) ? '' : "--gost_token \"${params.gprofiler2_gost_token}\""),
((params.gprofiler2_token == null) ? '' : "--token \"${params.gprofiler2_token}\""),
((params.gprofiler2_organism == null) ? '' : "--organism \"${params.gprofiler2_organism}\""),
((params.gprofiler2_background_column == null) ? '' : "--background_column \"${params.gprofiler2_background_column}\""),
((params.gprofiler2_sources == null) ? '' : "--sources \"${params.gprofiler2_sources}\"")
].join(' ').trim() }
Expand Down
2 changes: 0 additions & 2 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,4 @@ params {

// Activate gprofiler2
gprofiler2_run = true
gprofiler2_organism = 'mmusculus'
gene_set_files = '/home-link/iivow01/git/differentialabundance/testdata/combo_gprofiler_hallmark_mmusculus.gmt'
}
2 changes: 1 addition & 1 deletion conf/test_affy.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@ params {

// Activate GSEA
gsea_run = true
gene_set_files = 'https://raw.githubusercontent.com/nf-core/test-datasets/differentialabundance/testdata/h.all.v2022.1.Hs.symbols.gmt'
gene_sets_files = 'https://raw.githubusercontent.com/nf-core/test-datasets/differentialabundance/testdata/h.all.v2022.1.Hs.symbols.gmt'
}
9 changes: 2 additions & 7 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,6 @@ params {
report_description = "This is a full-sized test dataset contributed by Oskar Wacker"

// Activate GSEA
gsea_run = false
// gene_set_files = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt'

// Activate gprofiler2
gprofiler2_run = true
gprofiler2_organism = 'mmusculus'
gprofiler2_sources = 'KEGG,REAC'
gsea_run = true
gsea_gene_sets = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt'
}
2 changes: 1 addition & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ The organism (mmusculus for Mus musculus, hsapiens for Homo sapiens etc.) is req

```bash
--gsea_run true \
--gene_set_files gene_sets.gmt
--gene_sets_files gene_sets.gmt
```

## Running the pipeline
Expand Down
1 change: 0 additions & 1 deletion modules/nf-core/gprofiler2/gost/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 16 additions & 10 deletions modules/nf-core/gprofiler2/gost/templates/gprofiler2_gost.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ params {
gprofiler2_sources = null
gprofiler2_evcodes = false
gprofiler2_max_qval = 0.05
gprofiler2_gost_token = null
gprofiler2_token = null
gprofiler2_background_file = null
gprofiler2_background_column = null
gprofiler2_domain_scope = 'annotated'
Expand All @@ -183,7 +183,7 @@ params {
shinyngs_shinyapps_app_name = null

// Gene set options
gene_set_files = null
gene_sets_files = null

// References
genome = null
Expand Down
14 changes: 7 additions & 7 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -868,7 +868,7 @@
"gprofiler2_organism": {
"type": "string",
"description": "Short name of the organism that is analyzed, e.g. hsapiens for homo sapiens.",
"help_text": "Set this to the short organism name consisting of the first letter of the genus and the full species name, e.g. hsapiens for Homo sapiens, mmusculus for Mus musculus."
"help_text": "Set this to the short organism name consisting of the first letter of the genus and the full species name, e.g. hsapiens for Homo sapiens, mmusculus for Mus musculus. This has lowest priority and will be overridden by --gprofiler2_token and --gene_sets_files."
},
"gprofiler2_significant": {
"type": "boolean",
Expand All @@ -890,8 +890,8 @@
},
"gprofiler2_sources": {
"type": "string",
"description": "On which source databases to run the gprofiler query.",
"help_text": "GO, GO:MF, GO:BP, GO:CC, KEGG, REAC, WP, TF, MIRNA, HPA, CORUM, HP, or any comma-reparated combination thereof, e.g. 'KEGG,REAC'."
"description": "On which source databases to run the gprofiler query",
"help_text": "GO, GO:MF, GO:BP, GO:CC, KEGG, REAC, WP, TF, MIRNA, HPA, CORUM, HP, or any comma-reparated combination thereof, e.g. 'KEGG,REAC'. This works if --gprofiler2_organism is used; if a GMT file is provided with --gene_sets_files, should also work; the module will then remove any lines not starting with any of the source names. Does not work for --gprofiler2_token as g:Profiler will not filter such a run."
},
"gprofiler2_evcodes": {
"type": "boolean",
Expand All @@ -904,10 +904,10 @@
"default": 0.05,
"description": "Maximum q value used for significance testing."
},
"gprofiler2_gost_token": {
"gprofiler2_token": {
"type": "string",
"description": "Token that should be used as a query.",
"help_text": "For reproducibility, instead of querying the online databases, you can provide a token, e.g. from a previous pipeline run or from a manual query on https://biit.cs.ut.ee/gprofiler/gost."
"help_text": "For reproducibility, instead of querying the online databases, you can provide a token, e.g. from a previous pipeline run or from a manual query on https://biit.cs.ut.ee/gprofiler/gost. This has highest priority and will override --gprofiler2_organism and --gene_sets_files."
},
"gprofiler2_background_file": {
"type": "string",
Expand Down Expand Up @@ -986,10 +986,10 @@
"fa_icon": "fas fa-cogs",
"description": "Files and options used by gene set analysis modules.",
"properties": {
"gene_set_files": {
"gene_sets_files": {
"type": "string",
"default": "None",
"description": "Gene sets in GMT or GMX-format; for GSEA: multiple comma-separated input files are possible. For gprofiler2: One GMT file is possible.",
"description": "Gene sets in GMT or GMX-format; for GSEA: multiple comma-separated input files in either format are possible. For gprofiler2: A single file in GMT format is possible; this has second highest priority and will override --gprofiler2_organism.",
"fa_icon": "fas fa-bars"
}
}
Expand Down
24 changes: 12 additions & 12 deletions workflows/differentialabundance.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ if (params.study_type == 'affy_array'){
if (params.gsea_run) {
error("Cannot run GSEA for maxquant data; please set --gsea_run to false.")
}
if (params.gprofiler2_run){
error("gprofiler2 pathway analysis is not yet possible with maxquant input data; please set --gprofiler2_run false and rerun pipeline!")
}
if (!params.matrix) {
error("Input matrix not specified!")
}
Expand Down Expand Up @@ -67,19 +70,17 @@ if (params.study_type == 'affy_array'){
if (params.transcript_length_matrix) { ch_transcript_lengths = Channel.of([ exp_meta, file(params.transcript_length_matrix, checkIfExists: true)]).first() } else { ch_transcript_lengths = [[],[]] }
if (params.control_features) { ch_control_features = Channel.of([ exp_meta, file(params.control_features, checkIfExists: true)]).first() } else { ch_control_features = [[],[]] }
if (params.gsea_run) {
if (params.gene_set_files){
gene_set_files = params.gene_set_files.split(",")
ch_gene_sets = Channel.of(gene_set_files).map { file(it, checkIfExists: true) }
if (params.gene_sets_files){
gene_sets_files = params.gene_sets_files.split(",")
ch_gene_sets = Channel.of(gene_sets_files).map { file(it, checkIfExists: true) }
} else {
error("GSEA activated but gene set file not specified!")
}
}
if (params.gprofiler2_run) {
if (params.study_type == 'maxquant'){
error("gprofiler2 pathway analysis is not yet possible with maxquant input data; please set --gprofiler2_run false and rerun pipeline!")
}
if (!params.gprofiler2_organism){
error("gprofiler2 pathway analysis activated but organism not specified!")
if (!params.gprofiler2_token && !params.gene_sets_files && !params.gprofiler2_organism){
} else {
error("To run gprofiler2, please provide a run token, GMT file or organism!")
}
}

Expand Down Expand Up @@ -478,7 +479,7 @@ workflow DIFFERENTIALABUNDANCE {

// For gprofiler2, use only features that are considered differential
ch_filtered_diff = FILTER_DIFFTABLE.out.filtered
ch_organism = Channel.value(params.gprofiler2_organism)

if (!params.gprofiler2_background_file) {
// If param not set, use empty list as "background"
ch_background = []
Expand All @@ -492,16 +493,15 @@ workflow DIFFERENTIALABUNDANCE {
} else {
ch_background = Channel.from(file(params.gprofiler2_background_file, checkIfExists: true))
}
if (!params.gene_set_files) {
if (!params.gene_sets_files) {
ch_gene_sets = []
} else {
ch_gene_sets = Channel.value(params.gene_set_files)
ch_gene_sets = Channel.value(params.gene_sets_files)
}

GPROFILER2_GOST(
ch_contrasts,
ch_filtered_diff,
ch_organism,
ch_gene_sets,
ch_background
)
Expand Down

0 comments on commit a1fcc1c

Please sign in to comment.