From 9caa45b8476b0fa87d4e53fe801bb4cd097cc477 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alan=20M=C3=B6bbs?=
Date: Thu, 14 Nov 2024 13:48:50 +0000
Subject: [PATCH 1/6] compare rows before and after merging and print the
warnings if necessary
---
assets/differentialabundance_report.Rmd | 58 ++++++++++++++++++++-----
1 file changed, 47 insertions(+), 11 deletions(-)
diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd
index 240c3ffe..ab176d4c 100644
--- a/assets/differentialabundance_report.Rmd
+++ b/assets/differentialabundance_report.Rmd
@@ -370,10 +370,17 @@ differential_files <- lapply(contrasts$id, function(d){
file.path(params$input_dir, paste0(gsub(' |;', '_', d), differential_file_suffix))
})
-differential_results <- lapply(differential_files, function(diff_file){
- if (! file.exists(diff_file)){
+# Initialize vector to store warning messages before merging tables
+warnings_list <- c()
+
+# Read differential results and merge with features table
+results <- lapply(differential_files, function(diff_file) {
+ warnings <- c() # Initialize local warning vector
+
+ if (!file.exists(diff_file)) {
stop(paste("Differential file", diff_file, "does not exist"))
}
+
diff <- read_differential(
diff_file,
feature_id_column = params$differential_feature_id_column,
@@ -382,19 +389,43 @@ differential_results <- lapply(differential_files, function(diff_file){
qval_column = params$differential_qval_column
)
- # If fold changes are not logged already, log them (we assume they're logged
- # later on)
-
- if (! params$differential_foldchanges_logged){
+ # If fold changes are not logged already, log them
+ if (!params$differential_foldchanges_logged) {
diff[[params$differential_fc_column]] <- log2(diff[[params$differential_fc_column]])
}
# Annotate differential tables if possible
- if (! is.null(params$features)){
- diff <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column)
+ if (!is.null(params$features)) {
+
+ # Merge tables
+ diff_features <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column)
+
+ # Get number of rows before and after merging
+ length_diff <- as.numeric(nrow(diff))
+ length_features <- as.numeric(nrow(diff_features))
+
+ # Compare numbers and report
+ if (length_diff != length_features) {
+ warnings <- c(warnings,
+ paste0(
+ 'WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were lost after merging with features table (', basename(params$features), ').\n',
+ 'Rows in diff table: ', length_diff, '.\n',
+ 'Rows in merged table: ', length_features, '.
\n'
+ )
+ )
+ }
+ } else {
+ diff_features <- diff
}
- diff
+
+ # Return both the results and the local warnings
+ list(diff_features = diff_features, warnings = warnings)
})
+
+# Separate differential_results and warnings_list from results
+differential_results <- lapply(results, `[[`, "diff_features")
+warnings_list <- unlist(lapply(results, `[[`, "warnings"))
+
names(differential_results) <- contrasts$id
```
@@ -787,7 +818,6 @@ foo <- lapply(names(p_value_types), function(pvt){
```
```{r, echo=FALSE, results='asis', eval = FALSE}
-
differential_summary_string <- paste(
paste(
lapply(
@@ -806,7 +836,13 @@ cat(differential_summary_string)
### Differential `r params$features_type` details
-```{r, echo=FALSE, results='asis'}
+```{r, echo=FALSE, results='asis', warning=FALSE, message=FALSE}
+
+# Display all warnings related to number of rows
+if (length(warnings_list) > 0) {
+ for (warning in warnings_list) { cat(warning) }
+}
+
for (i in 1:nrow(contrasts)){
cat("\n#### ", contrast_descriptions[i], " {.tabset}\n")
From c7205bf664781b258a9c84c49555e15adc701c70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alan=20M=C3=B6bbs?=
Date: Thu, 14 Nov 2024 14:29:19 +0000
Subject: [PATCH 2/6] change color
---
assets/differentialabundance_report.Rmd | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd
index ab176d4c..b8011c26 100644
--- a/assets/differentialabundance_report.Rmd
+++ b/assets/differentialabundance_report.Rmd
@@ -399,6 +399,8 @@ results <- lapply(differential_files, function(diff_file) {
# Merge tables
diff_features <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column)
+ # test
+ diff_features <- diff_features[1:50,]
# Get number of rows before and after merging
length_diff <- as.numeric(nrow(diff))
@@ -408,7 +410,7 @@ results <- lapply(differential_files, function(diff_file) {
if (length_diff != length_features) {
warnings <- c(warnings,
paste0(
- 'WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were lost after merging with features table (', basename(params$features), ').\n',
+ '
WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were lost after merging with features table (', basename(params$features), ').\n',
'Rows in diff table: ', length_diff, '.\n',
'Rows in merged table: ', length_features, '.
\n'
)
From 993d74f3b2bdf638ee5e2f90cdb7d39455d46604 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alan=20M=C3=B6bbs?=
Date: Thu, 14 Nov 2024 15:08:50 +0000
Subject: [PATCH 3/6] Update changelog and remove test lines
---
CHANGELOG.md | 1 +
assets/differentialabundance_report.Rmd | 2 --
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6bbc2f5c..1ca662c3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [[#345](https://github.com/nf-core/differentialabundance/pull/345)] - Plot differentially expressed genes by gene biotype ([@atrigila](https://github.com/atrigila), review by [@grst](https://github.com/grst))
- [[#343](https://github.com/nf-core/differentialabundance/pull/343)] - Add pipeline-level nf-tests ([@atrigila](https://github.com/atrigila), review by [@pinin4fjords](https://github.com/pinin4fjords) and [@nschcolnicov](https://github.com/nschcolnicov))
- [[#286](https://github.com/nf-core/differentialabundance/pull/286)] - Integration of limma voom for rnaseq data ([@KamilMaliszArdigen](https://github.com/KamilMaliszArdigen), review by [@pinin4fjords](https://github.com/pinin4fjords))
+- [[#354](https://github.com/nf-core/differentialabundance/pull/354)] - Warning message within the R Markdown report to control when genes don't have annotation data ([@alanmmobbs93](https://github.com/alanmmobbs93)). Review by [@WackerO](https://github.com/WackerO) and [@pinin4fjords](https://github.com/pinin4fjords).
### Fixed
diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd
index b8011c26..d2be3b2f 100644
--- a/assets/differentialabundance_report.Rmd
+++ b/assets/differentialabundance_report.Rmd
@@ -399,8 +399,6 @@ results <- lapply(differential_files, function(diff_file) {
# Merge tables
diff_features <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column)
- # test
- diff_features <- diff_features[1:50,]
# Get number of rows before and after merging
length_diff <- as.numeric(nrow(diff))
From 4382441f013904143966f25cdef54dba1a9492ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alan=20M=C3=B6bbs?=
<64787947+alanmmobbs93@users.noreply.github.com>
Date: Fri, 15 Nov 2024 12:14:37 -0300
Subject: [PATCH 4/6] Update assets/differentialabundance_report.Rmd
Co-authored-by: Jonathan Manning
---
assets/differentialabundance_report.Rmd | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd
index d2be3b2f..f6c2143f 100644
--- a/assets/differentialabundance_report.Rmd
+++ b/assets/differentialabundance_report.Rmd
@@ -408,7 +408,7 @@ results <- lapply(differential_files, function(diff_file) {
if (length_diff != length_features) {
warnings <- c(warnings,
paste0(
- 'WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were lost after merging with features table (', basename(params$features), ').\n',
+ '
WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were absent from the features table (', basename(params$features), ') and lost on merge.\n',
'Rows in diff table: ', length_diff, '.\n',
'Rows in merged table: ', length_features, '.
\n'
)
From e7a54f45ca7e5cb182fc592ca3075c07e77a9638 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alan=20M=C3=B6bbs?=
Date: Fri, 15 Nov 2024 18:18:01 +0000
Subject: [PATCH 5/6] differentiate between more or less rows and report IDs
---
assets/differentialabundance_report.Rmd | 38 +++++++++++++++++++++----
1 file changed, 33 insertions(+), 5 deletions(-)
diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd
index f6c2143f..e3eb0256 100644
--- a/assets/differentialabundance_report.Rmd
+++ b/assets/differentialabundance_report.Rmd
@@ -401,19 +401,47 @@ results <- lapply(differential_files, function(diff_file) {
diff_features <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column)
# Get number of rows before and after merging
- length_diff <- as.numeric(nrow(diff))
- length_features <- as.numeric(nrow(diff_features))
+ rows_diff <- as.numeric(nrow(diff))
+ rows_diff_features <- as.numeric(nrow(diff_features))
+
+ # Check that all IDs were conserved
+ conserved_ids <- all( diff[[params$differential_feature_id_column]] %in% diff_features[[params$features_id_col]] )
+
+ ## Check if all IDs are present
+ if (!conserved_ids) {
+ missing_ids <- setdiff(diff[[params$differential_feature_id_column]], diff_features[[params$features_id_col]])
+ warnings <- c(warnings,
+ paste0(
+ 'WARNING:', length(missing_ids),' IDs from the differential expressed table (', basename(diff_file), ') were absent from the features table (', basename(params$features), ') and lost on merge.\n',
+ 'Missing IDs in diff table: ', paste(missing_ids, collapse = ' '), '.\n',
+ 'Rows in merged table: ', rows_diff_features, '.
\n'
+ )
+ )
+ }
# Compare numbers and report
- if (length_diff != length_features) {
+ ## Check if features_diff has fewer rows, it would indicate lost of info
+ if ( rows_diff_features < rows_diff ) {
warnings <- c(warnings,
paste0(
'WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were absent from the features table (', basename(params$features), ') and lost on merge.\n',
- 'Rows in diff table: ', length_diff, '.\n',
- 'Rows in merged table: ', length_features, '.
\n'
+ 'Rows in diff table: ', rows_diff, '.\n',
+ 'Rows in merged table: ', rows_diff_features, '.
\n'
)
)
}
+
+ ## Check if features_diff has more rows, it could indicate duplications
+ if ( rows_diff_features > rows_diff ) {
+ warnings <- c(warnings,
+ paste0(
+ 'WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were duplicated on feature table (', basename(params$features), ').\n',
+ 'Rows in diff table: ', rows_diff, '.\n',
+ 'Rows in merged table: ', rows_diff_features, '.
\n'
+ )
+ )
+ }
+
} else {
diff_features <- diff
}
From e7fd0b49c335c9f6b45887013d58a5dc675a5474 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alan=20M=C3=B6bbs?=
Date: Wed, 20 Nov 2024 12:18:48 +0000
Subject: [PATCH 6/6] Reduce code
---
assets/differentialabundance_report.Rmd | 86 +++++++++----------------
1 file changed, 30 insertions(+), 56 deletions(-)
diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd
index e3eb0256..8babb4ba 100644
--- a/assets/differentialabundance_report.Rmd
+++ b/assets/differentialabundance_report.Rmd
@@ -375,11 +375,7 @@ warnings_list <- c()
# Read differential results and merge with features table
results <- lapply(differential_files, function(diff_file) {
- warnings <- c() # Initialize local warning vector
-
- if (!file.exists(diff_file)) {
- stop(paste("Differential file", diff_file, "does not exist"))
- }
+ if (!file.exists(diff_file)) stop(paste("Differential file", diff_file, "does not exist"))
diff <- read_differential(
diff_file,
@@ -389,65 +385,43 @@ results <- lapply(differential_files, function(diff_file) {
qval_column = params$differential_qval_column
)
- # If fold changes are not logged already, log them
+ # Log transform fold changes if not already logged
if (!params$differential_foldchanges_logged) {
diff[[params$differential_fc_column]] <- log2(diff[[params$differential_fc_column]])
}
- # Annotate differential tables if possible
+ # Annotate differential table if features table is provided
if (!is.null(params$features)) {
-
- # Merge tables
- diff_features <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column)
-
- # Get number of rows before and after merging
- rows_diff <- as.numeric(nrow(diff))
- rows_diff_features <- as.numeric(nrow(diff_features))
-
- # Check that all IDs were conserved
- conserved_ids <- all( diff[[params$differential_feature_id_column]] %in% diff_features[[params$features_id_col]] )
-
- ## Check if all IDs are present
- if (!conserved_ids) {
- missing_ids <- setdiff(diff[[params$differential_feature_id_column]], diff_features[[params$features_id_col]])
- warnings <- c(warnings,
- paste0(
- 'WARNING:', length(missing_ids),' IDs from the differential expressed table (', basename(diff_file), ') were absent from the features table (', basename(params$features), ') and lost on merge.\n',
- 'Missing IDs in diff table: ', paste(missing_ids, collapse = ' '), '.\n',
- 'Rows in merged table: ', rows_diff_features, '.
\n'
- )
+ ## Merge Differential expression table on features table
+ merged <- merge(features, diff, by.x = params$features_id_col, by.y = params$differential_feature_id_column)
+
+ ## Get number of missing rows
+ n_missing <- length(setdiff(diff[[params$differential_feature_id_column]], merged[[params$features_id_col]]))
+
+ ## Create warnings if necessary
+ warnings <- c(
+ ## Missing IDs
+ if (n_missing > 0) sprintf(
+ 'WARNING: %d IDs from the differential table (%s) were lost on merge with features table (%s).
',
+ n_missing, basename(diff_file), basename(params$features)
+ ),
+ ## Check whether there are fewer rows, missing data
+ if (nrow(merged) < nrow(diff)) sprintf(
+ 'WARNING: Rows were lost on merge (%s -> %s). Original: %d, Merged: %d.
',
+ basename(diff_file), basename(params$features), nrow(diff), nrow(merged)
+ ),
+ ## Check whether there are more rows, possible duplications
+ if (nrow(merged) > nrow(diff)) sprintf(
+ 'WARNING: Rows were duplicated on merge (%s -> %s). Original: %d, Merged: %d.
',
+ basename(diff_file), basename(params$features), nrow(diff), nrow(merged)
)
- }
-
- # Compare numbers and report
- ## Check if features_diff has fewer rows, it would indicate lost of info
- if ( rows_diff_features < rows_diff ) {
- warnings <- c(warnings,
- paste0(
- 'WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were absent from the features table (', basename(params$features), ') and lost on merge.\n',
- 'Rows in diff table: ', rows_diff, '.\n',
- 'Rows in merged table: ', rows_diff_features, '.
\n'
- )
- )
- }
-
- ## Check if features_diff has more rows, it could indicate duplications
- if ( rows_diff_features > rows_diff ) {
- warnings <- c(warnings,
- paste0(
- 'WARNING: Some rows from the differential expressed table (', basename(diff_file), ') were duplicated on feature table (', basename(params$features), ').\n',
- 'Rows in diff table: ', rows_diff, '.\n',
- 'Rows in merged table: ', rows_diff_features, '.
\n'
- )
- )
- }
-
+ )
} else {
- diff_features <- diff
+ merged <- diff
+ warnings <- character(0)
}
-
- # Return both the results and the local warnings
- list(diff_features = diff_features, warnings = warnings)
+ ## Collect results
+ list(diff_features = merged, warnings = warnings)
})
# Separate differential_results and warnings_list from results