From 9caa45b8476b0fa87d4e53fe801bb4cd097cc477 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Thu, 14 Nov 2024 13:48:50 +0000 Subject: [PATCH 1/6] compare rows before and after merging and print the warnings if necessary --- assets/differentialabundance_report.Rmd | 58 ++++++++++++++++++++----- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd index 240c3ffe..ab176d4c 100644 --- a/assets/differentialabundance_report.Rmd +++ b/assets/differentialabundance_report.Rmd @@ -370,10 +370,17 @@ differential_files <- lapply(contrasts$id, function(d){ file.path(params$input_dir, paste0(gsub(' |;', '_', d), differential_file_suffix)) }) -differential_results <- lapply(differential_files, function(diff_file){ - if (! file.exists(diff_file)){ +# Initialize vector to store warning messages before merging tables +warnings_list <- c() + +# Read differential results and merge with features table +results <- lapply(differential_files, function(diff_file) { + warnings <- c() # Initialize local warning vector + + if (!file.exists(diff_file)) { stop(paste("Differential file", diff_file, "does not exist")) } + diff <- read_differential( diff_file, feature_id_column = params$differential_feature_id_column, @@ -382,19 +389,43 @@ differential_results <- lapply(differential_files, function(diff_file){ qval_column = params$differential_qval_column ) - # If fold changes are not logged already, log them (we assume they're logged - # later on) - - if (! params$differential_foldchanges_logged){ + # If fold changes are not logged already, log them + if (!params$differential_foldchanges_logged) { diff[[params$differential_fc_column]] <- log2(diff[[params$differential_fc_column]]) } # Annotate differential tables if possible - if (! is.null(params$features)){ - diff <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column) + if (!is.null(params$features)) { + + # Merge tables + diff_features <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column) + + # Get number of rows before and after merging + length_diff <- as.numeric(nrow(diff)) + length_features <- as.numeric(nrow(diff_features)) + + # Compare numbers and report + if (length_diff != length_features) { + warnings <- c(warnings, + paste0( + '

WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were lost after merging with features table (', basename(params$features), ').\n', + 'Rows in diff table: ', length_diff, '.\n', + 'Rows in merged table: ', length_features, '.

\n' + ) + ) + } + } else { + diff_features <- diff } - diff + + # Return both the results and the local warnings + list(diff_features = diff_features, warnings = warnings) }) + +# Separate differential_results and warnings_list from results +differential_results <- lapply(results, `[[`, "diff_features") +warnings_list <- unlist(lapply(results, `[[`, "warnings")) + names(differential_results) <- contrasts$id ``` @@ -787,7 +818,6 @@ foo <- lapply(names(p_value_types), function(pvt){ ``` ```{r, echo=FALSE, results='asis', eval = FALSE} - differential_summary_string <- paste( paste( lapply( @@ -806,7 +836,13 @@ cat(differential_summary_string) ### Differential `r params$features_type` details -```{r, echo=FALSE, results='asis'} +```{r, echo=FALSE, results='asis', warning=FALSE, message=FALSE} + +# Display all warnings related to number of rows +if (length(warnings_list) > 0) { + for (warning in warnings_list) { cat(warning) } +} + for (i in 1:nrow(contrasts)){ cat("\n#### ", contrast_descriptions[i], " {.tabset}\n") From c7205bf664781b258a9c84c49555e15adc701c70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Thu, 14 Nov 2024 14:29:19 +0000 Subject: [PATCH 2/6] change color --- assets/differentialabundance_report.Rmd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd index ab176d4c..b8011c26 100644 --- a/assets/differentialabundance_report.Rmd +++ b/assets/differentialabundance_report.Rmd @@ -399,6 +399,8 @@ results <- lapply(differential_files, function(diff_file) { # Merge tables diff_features <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column) + # test + diff_features <- diff_features[1:50,] # Get number of rows before and after merging length_diff <- as.numeric(nrow(diff)) @@ -408,7 +410,7 @@ results <- lapply(differential_files, function(diff_file) { if (length_diff != length_features) { warnings <- c(warnings, paste0( - '

WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were lost after merging with features table (', basename(params$features), ').\n', + '

WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were lost after merging with features table (', basename(params$features), ').\n', 'Rows in diff table: ', length_diff, '.\n', 'Rows in merged table: ', length_features, '.

\n' ) From 993d74f3b2bdf638ee5e2f90cdb7d39455d46604 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Thu, 14 Nov 2024 15:08:50 +0000 Subject: [PATCH 3/6] Update changelog and remove test lines --- CHANGELOG.md | 1 + assets/differentialabundance_report.Rmd | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bbc2f5c..1ca662c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#345](https://github.com/nf-core/differentialabundance/pull/345)] - Plot differentially expressed genes by gene biotype ([@atrigila](https://github.com/atrigila), review by [@grst](https://github.com/grst)) - [[#343](https://github.com/nf-core/differentialabundance/pull/343)] - Add pipeline-level nf-tests ([@atrigila](https://github.com/atrigila), review by [@pinin4fjords](https://github.com/pinin4fjords) and [@nschcolnicov](https://github.com/nschcolnicov)) - [[#286](https://github.com/nf-core/differentialabundance/pull/286)] - Integration of limma voom for rnaseq data ([@KamilMaliszArdigen](https://github.com/KamilMaliszArdigen), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#354](https://github.com/nf-core/differentialabundance/pull/354)] - Warning message within the R Markdown report to control when genes don't have annotation data ([@alanmmobbs93](https://github.com/alanmmobbs93)). Review by [@WackerO](https://github.com/WackerO) and [@pinin4fjords](https://github.com/pinin4fjords). ### Fixed diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd index b8011c26..d2be3b2f 100644 --- a/assets/differentialabundance_report.Rmd +++ b/assets/differentialabundance_report.Rmd @@ -399,8 +399,6 @@ results <- lapply(differential_files, function(diff_file) { # Merge tables diff_features <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column) - # test - diff_features <- diff_features[1:50,] # Get number of rows before and after merging length_diff <- as.numeric(nrow(diff)) From 4382441f013904143966f25cdef54dba1a9492ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= <64787947+alanmmobbs93@users.noreply.github.com> Date: Fri, 15 Nov 2024 12:14:37 -0300 Subject: [PATCH 4/6] Update assets/differentialabundance_report.Rmd Co-authored-by: Jonathan Manning --- assets/differentialabundance_report.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd index d2be3b2f..f6c2143f 100644 --- a/assets/differentialabundance_report.Rmd +++ b/assets/differentialabundance_report.Rmd @@ -408,7 +408,7 @@ results <- lapply(differential_files, function(diff_file) { if (length_diff != length_features) { warnings <- c(warnings, paste0( - '

WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were lost after merging with features table (', basename(params$features), ').\n', + '

WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were absent from the features table (', basename(params$features), ') and lost on merge.\n', 'Rows in diff table: ', length_diff, '.\n', 'Rows in merged table: ', length_features, '.

\n' ) From e7a54f45ca7e5cb182fc592ca3075c07e77a9638 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Fri, 15 Nov 2024 18:18:01 +0000 Subject: [PATCH 5/6] differentiate between more or less rows and report IDs --- assets/differentialabundance_report.Rmd | 38 +++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd index f6c2143f..e3eb0256 100644 --- a/assets/differentialabundance_report.Rmd +++ b/assets/differentialabundance_report.Rmd @@ -401,19 +401,47 @@ results <- lapply(differential_files, function(diff_file) { diff_features <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column) # Get number of rows before and after merging - length_diff <- as.numeric(nrow(diff)) - length_features <- as.numeric(nrow(diff_features)) + rows_diff <- as.numeric(nrow(diff)) + rows_diff_features <- as.numeric(nrow(diff_features)) + + # Check that all IDs were conserved + conserved_ids <- all( diff[[params$differential_feature_id_column]] %in% diff_features[[params$features_id_col]] ) + + ## Check if all IDs are present + if (!conserved_ids) { + missing_ids <- setdiff(diff[[params$differential_feature_id_column]], diff_features[[params$features_id_col]]) + warnings <- c(warnings, + paste0( + '

WARNING:', length(missing_ids),' IDs from the differential expressed table (', basename(diff_file), ') were absent from the features table (', basename(params$features), ') and lost on merge.\n', + 'Missing IDs in diff table: ', paste(missing_ids, collapse = ' '), '.\n', + 'Rows in merged table: ', rows_diff_features, '.

\n' + ) + ) + } # Compare numbers and report - if (length_diff != length_features) { + ## Check if features_diff has fewer rows, it would indicate lost of info + if ( rows_diff_features < rows_diff ) { warnings <- c(warnings, paste0( '

WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were absent from the features table (', basename(params$features), ') and lost on merge.\n', - 'Rows in diff table: ', length_diff, '.\n', - 'Rows in merged table: ', length_features, '.

\n' + 'Rows in diff table: ', rows_diff, '.\n', + 'Rows in merged table: ', rows_diff_features, '.

\n' ) ) } + + ## Check if features_diff has more rows, it could indicate duplications + if ( rows_diff_features > rows_diff ) { + warnings <- c(warnings, + paste0( + '

WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were duplicated on feature table (', basename(params$features), ').\n', + 'Rows in diff table: ', rows_diff, '.\n', + 'Rows in merged table: ', rows_diff_features, '.

\n' + ) + ) + } + } else { diff_features <- diff } From e7fd0b49c335c9f6b45887013d58a5dc675a5474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alan=20M=C3=B6bbs?= Date: Wed, 20 Nov 2024 12:18:48 +0000 Subject: [PATCH 6/6] Reduce code --- assets/differentialabundance_report.Rmd | 86 +++++++++---------------- 1 file changed, 30 insertions(+), 56 deletions(-) diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd index e3eb0256..8babb4ba 100644 --- a/assets/differentialabundance_report.Rmd +++ b/assets/differentialabundance_report.Rmd @@ -375,11 +375,7 @@ warnings_list <- c() # Read differential results and merge with features table results <- lapply(differential_files, function(diff_file) { - warnings <- c() # Initialize local warning vector - - if (!file.exists(diff_file)) { - stop(paste("Differential file", diff_file, "does not exist")) - } + if (!file.exists(diff_file)) stop(paste("Differential file", diff_file, "does not exist")) diff <- read_differential( diff_file, @@ -389,65 +385,43 @@ results <- lapply(differential_files, function(diff_file) { qval_column = params$differential_qval_column ) - # If fold changes are not logged already, log them + # Log transform fold changes if not already logged if (!params$differential_foldchanges_logged) { diff[[params$differential_fc_column]] <- log2(diff[[params$differential_fc_column]]) } - # Annotate differential tables if possible + # Annotate differential table if features table is provided if (!is.null(params$features)) { - - # Merge tables - diff_features <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column) - - # Get number of rows before and after merging - rows_diff <- as.numeric(nrow(diff)) - rows_diff_features <- as.numeric(nrow(diff_features)) - - # Check that all IDs were conserved - conserved_ids <- all( diff[[params$differential_feature_id_column]] %in% diff_features[[params$features_id_col]] ) - - ## Check if all IDs are present - if (!conserved_ids) { - missing_ids <- setdiff(diff[[params$differential_feature_id_column]], diff_features[[params$features_id_col]]) - warnings <- c(warnings, - paste0( - '

WARNING:', length(missing_ids),' IDs from the differential expressed table (', basename(diff_file), ') were absent from the features table (', basename(params$features), ') and lost on merge.\n', - 'Missing IDs in diff table: ', paste(missing_ids, collapse = ' '), '.\n', - 'Rows in merged table: ', rows_diff_features, '.

\n' - ) + ## Merge Differential expression table on features table + merged <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column) + + ## Get number of missing rows + n_missing <- length(setdiff(diff[[params$differential_feature_id_column]], merged[[params$features_id_col]])) + + ## Create warnings if necessary + warnings <- c( + ## Missing IDs + if (n_missing > 0) sprintf( + '

WARNING: %d IDs from the differential table (%s) were lost on merge with features table (%s).

', + n_missing, basename(diff_file), basename(params$features) + ), + ## Check whether there are fewer rows, missing data + if (nrow(merged) < nrow(diff)) sprintf( + '

WARNING: Rows were lost on merge (%s -> %s). Original: %d, Merged: %d.

', + basename(diff_file), basename(params$features), nrow(diff), nrow(merged) + ), + ## Check whether there are more rows, possible duplications + if (nrow(merged) > nrow(diff)) sprintf( + '

WARNING: Rows were duplicated on merge (%s -> %s). Original: %d, Merged: %d.

', + basename(diff_file), basename(params$features), nrow(diff), nrow(merged) ) - } - - # Compare numbers and report - ## Check if features_diff has fewer rows, it would indicate lost of info - if ( rows_diff_features < rows_diff ) { - warnings <- c(warnings, - paste0( - '

WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were absent from the features table (', basename(params$features), ') and lost on merge.\n', - 'Rows in diff table: ', rows_diff, '.\n', - 'Rows in merged table: ', rows_diff_features, '.

\n' - ) - ) - } - - ## Check if features_diff has more rows, it could indicate duplications - if ( rows_diff_features > rows_diff ) { - warnings <- c(warnings, - paste0( - '

WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were duplicated on feature table (', basename(params$features), ').\n', - 'Rows in diff table: ', rows_diff, '.\n', - 'Rows in merged table: ', rows_diff_features, '.

\n' - ) - ) - } - + ) } else { - diff_features <- diff + merged <- diff + warnings <- character(0) } - - # Return both the results and the local warnings - list(diff_features = diff_features, warnings = warnings) + ## Collect results + list(diff_features = merged, warnings = warnings) }) # Separate differential_results and warnings_list from results