Merge pull request #8 from ricoderks/dev

Dev
ricoderks · Jun 30, 2021 · 152e6e2 · 152e6e2
2 parents 0d2db3b + 5828996
commit 152e6e2
Show file tree

Hide file tree

Showing 17 changed files with 425 additions and 343 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: lipidomics
 Type: Package
 Title: Lipidomics workflow
-Version: 0.4.1
+Version: 0.5
 Author: Rico Derks
 Maintainer: Rico Derks <[email protected]>
 Description: Lipidomics workflow to proces the result files (export alignment)
@@ -16,10 +16,11 @@ Imports:
     ggplot2,
     ggrepel,
     grDevices,
+    heatmaply,
     janitor,
     magrittr,
     methods,
-    pheatmap,
+    openxlsx,
     plotly,
     purrr,
     readr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -7,16 +7,18 @@ importFrom(DT,DTOutput)
 importFrom(DT,renderDT)
 importFrom(dplyr,across)
 importFrom(dplyr,arrange)
+importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
+importFrom(dplyr,desc)
 importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
 importFrom(dplyr,group_by)
 importFrom(dplyr,if_else)
 importFrom(dplyr,left_join)
-importFrom(dplyr,matches)
 importFrom(dplyr,mutate)
 importFrom(dplyr,n)
 importFrom(dplyr,pull)
+importFrom(dplyr,relocate)
 importFrom(dplyr,rename)
 importFrom(dplyr,select)
 importFrom(dplyr,slice)
@@ -52,14 +54,17 @@ importFrom(ggplot2,scale_y_continuous)
 importFrom(ggplot2,theme)
 importFrom(ggplot2,theme_minimal)
 importFrom(ggrepel,geom_text_repel)
+importFrom(grDevices,colorRamp)
 importFrom(grDevices,rainbow)
+importFrom(heatmaply,heatmaply)
 importFrom(janitor,clean_names)
 importFrom(janitor,make_clean_names)
 importFrom(magrittr,"%>%")
-importFrom(pheatmap,pheatmap)
+importFrom(openxlsx,write.xlsx)
 importFrom(plotly,add_bars)
 importFrom(plotly,add_heatmap)
 importFrom(plotly,add_markers)
+importFrom(plotly,colorbar)
 importFrom(plotly,config)
 importFrom(plotly,event_data)
 importFrom(plotly,event_register)
@@ -102,12 +107,15 @@ importFrom(stats,sd)
 importFrom(stringr,str_extract)
 importFrom(stringr,str_replace)
 importFrom(stringr,str_split)
+importFrom(tibble,column_to_rownames)
 importFrom(tibble,tibble)
 importFrom(tidyr,everything)
 importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,separate)
 importFrom(tidyr,unnest)
+importFrom(tidyselect,everything)
+importFrom(tidyselect,last_col)
 importFrom(tidyselect,matches)
 importFrom(tools,file_ext)
 importFrom(utils,head)
diff --git a/R/bubblePlotServer.R b/R/bubblePlotServer.R
@@ -110,8 +110,8 @@ bubblePlotServer <- function(id, lipid_data, pattern, title) {
                        scales = "free") +
             labs(x = "Retention time [minutes]",
                  y = expression(italic("m/z"))) +
-            guides(color = FALSE,
-                   size = FALSE) +
+            guides(color = "none",
+                   size = "none") +
             coord_cartesian(xlim = ranges$x,
                             ylim = ranges$y) +
             theme_cpm() +

diff --git a/R/compare_samples_heatmap.R b/R/compare_samples_heatmap.R
@@ -3,27 +3,29 @@
 #' @description Create a correlation heatmap of all samples..
 #'
 #' @param lipid_data tibble with all the lipid data
+#' @param cent_scale logical, apply center and scaling
 #' @param z what to show as intensity of the heatmap
+#' @param clust apply clustering yes/no, default is no
+#' @param sample_group dataframe with grouping information
 #'
 #' @return plotly object
 #'
 #' @importFrom magrittr %>%
-#' @importFrom dplyr filter group_by mutate ungroup case_when
+#' @importFrom dplyr filter group_by mutate ungroup case_when desc
 #' @importFrom rlang .data
+#' @importFrom tidyr pivot_wider
+#' @importFrom tidyselect matches
 #' @importFrom plotly plot_ly add_heatmap layout
+#' @importFrom heatmaply heatmaply
+#' @importFrom tibble column_to_rownames
 #'
 #' @author Rico Derks
 #'
-compare_samples_heatmap <- function(lipid_data, z) {
+compare_samples_heatmap <- function(lipid_data, cent_scale, z, clust = FALSE, sample_group = NULL) {
   lipid_data <- lipid_data %>%
     # only select the samples
     filter(.data$sample_type == "sample",
            .data$keep == TRUE) %>%
-    # scale "row wise" i.e. lipid
-    group_by(.data$my_id) %>%
-    # keep in mind, scale always returns a matrix
-    mutate(scaled_area = scale(.data$area)[, 1]) %>%
-    ungroup() %>%
     # total area normalisation
     group_by(.data$sample_name) %>%
     mutate(norm_area = .data$area / sum(.data$area)) %>%
@@ -34,37 +36,61 @@ compare_samples_heatmap <- function(lipid_data, z) {
       order_yaxis = paste(.data$LipidClass, .data$ShortLipidName, sep = "_"),
       # what to plot
       plot_z = case_when(
-        z == "zscore" ~ .data$scaled_area,
         z == "raw" ~ .data$area,
         z == "totnorm" ~ .data$norm_area
       ))
 
+  if(cent_scale == TRUE) {
+    lipid_data <- lipid_data %>%
+      # # scale "row wise" i.e. lipid
+      group_by(.data$my_id) %>%
+      # keep in mind, scale always returns a matrix
+      mutate(plot_z = scale(.data$plot_z)[, 1]) %>%
+      ungroup()
+  }
+
   legend_name <- case_when(
-    z == "zscore" ~ "z-score",
     z == "raw" ~ "Raw data",
     z == "totnorm" ~ "Tot. area norm."
   )
 
-  p <- lipid_data %>%
-    plot_ly(x = ~sample_name,
-            y = ~ShortLipidName,
-            colorbar = list(title = legend_name)) %>%
-    add_heatmap(z = ~plot_z,
-                text = ~LipidClass,
-                colorscale = "Rainbow",
-                hovertemplate = paste(
-                  "%{xaxis.title.text}: %{x}<br>",
-                  "%{yaxis.title.text}: %{y}<br>",
-                  "Lipid class: %{text}<br>",
-                  "Value: %{z:.3f}",
-                  "<extra></extra>" # needed to remove the trace box
-                ),
-                # order the y-axis according to lipid class and then lipid
-                yaxis = list(type = "category",
-                             categoryorder = "array",
-                             categoryarray =  ~order_yaxis)) %>%
-    layout(yaxis = list(title = list(text = "Short lipid name")),
-           xaxis = list(title = list(text = "Sample name")))
+  # need to make the data wide for heatmaply
+  plot_data <- lipid_data %>%
+    pivot_wider(id_cols = c(.data$ShortLipidName, .data$LipidClass),
+                names_from = .data$sample_name,
+                values_from = .data$plot_z) %>%
+    arrange(desc(.data$LipidClass), .data$ShortLipidName) %>%
+    column_to_rownames(var = "ShortLipidName")
+
+  if(is.null(sample_group)){
+    p <- heatmaply(x = plot_data %>%
+                     select(-.data$LipidClass),
+                   dendrogram = ifelse(clust == TRUE, "both", "none"),
+                   scale = "none",
+                   colors = rainbow(n = 100,
+                                    alpha = 0.5),
+                   xlab = "Sample name",
+                   ylab = "Lipid",
+                   fontsize_row = 6)
+  } else {
+    # extra the sample group info
+    col_group <- lipid_data %>%
+      select(.data$sample_name, matches(paste0("^", sample_group, "$"))) %>%
+      distinct(.data$sample_name,
+               .keep_all = TRUE) %>%
+      select(-.data$sample_name)
+
+    p <- heatmaply(x = plot_data %>%
+                     select(-.data$LipidClass),
+                   dendrogram = ifelse(clust == TRUE, "both", "none"),
+                   scale = "none",
+                   colors = rainbow(n = 100,
+                                    alpha = 0.5),
+                   xlab = "Sample name",
+                   ylab = "Lipid",
+                   fontsize_row = 6,
+                   col_side_colors = col_group)
+  }
 
   return(p)
 }
diff --git a/R/cor_heatmap.R b/R/cor_heatmap.R
@@ -2,38 +2,34 @@
 #'
 #' @description Create a correlation heatmap of all samples..
 #'
-#' @param df tibble in tidy format
+#' @param lipid_data tibble with all the lipid data
 #'
-#' @return pheatmap object
+#' @return plotly object
 #'
 #' @importFrom magrittr %>%
-#' @importFrom dplyr select matches
+#' @importFrom dplyr select
+#' @importFrom tidyselect matches
 #' @importFrom stringr str_extract
 #' @importFrom rlang .data
-#' @importFrom pheatmap pheatmap
+#' @importFrom plotly plot_ly colorbar
 #' @importFrom stats cor
-#'
+#' @importFrom grDevices colorRamp
 #'
 #' @author Rico Derks
 #'
-cor_heatmap <- function(df) {
-  df_m <- df %>%
+cor_heatmap <- function(lipid_data) {
+  df_m <- lipid_data %>%
     select(matches("([qQ][cC]pool|[sS]ample)"))
 
+  # calculate the correlation
   cormat <- cor(df_m)
 
-  # Define which pheno data columns should be highlighted in the plot
-  ann <- data.frame(sample_type = str_extract(string = colnames(df_m),
-                                              pattern = "([qQ][cC]pool|[sS]ample)"))
-  rownames(ann) <- colnames(df_m)
-
-  # show heatmap
-  p <- pheatmap(cormat,
-           annotation = ann,
-           cluster_cols = FALSE,
-           cluster_rows = FALSE,
-           fontsize_row = 8,
-           fontsize_col = 6)
+  p <- plot_ly(x = colnames(df_m),
+              y = colnames(df_m),
+              z = cormat,
+              type = "heatmap",
+              colors = colorRamp(c("blue", "red"))) %>%
+    colorbar(limits = c(-1, 1))
 
   return(p)
 }
diff --git a/R/cor_heatmap2.R b/R/cor_heatmap2.R
diff --git a/R/do_pca.R b/R/do_pca.R
@@ -38,65 +38,59 @@ do_pca <- function(lipid_data, observations = c("all", "samples"), normalization
 
   # need to make the data wide
   # samples as rows and lipids as columns
-  lipid_data_wide <- lipid_data %>%
+  lipid_data_prep <- lipid_data %>%
     filter(.data$keep == TRUE) %>%
     # select which sample type to keep
     filter(.data$sample_type %in% select_obs) %>%
     # total area normalisation
     group_by(.data$sample_name) %>%
     mutate(norm_area = .data$area / sum(.data$area)) %>%
     ungroup() %>%
-    # select what to use for PCA
+    # select which normalization to use for PCA
     mutate(value = case_when(
       normalization == "raw" ~ .data$area,
       normalization == "tot_area" ~ .data$norm_area
     )) %>%
-    # make wide
+    # do transformations and select which transformation to keep
+    mutate(value = case_when(
+      transformation == "none" ~ .data$value,
+      transformation == "log10" ~ log10(.data$value + 1) # the +1 is correct for any zero's
+    )) %>%
+    # do the centering and scaling per variable
+    group_by(.data$my_id) %>%
+    mutate(value = .data$value - mean(.data$value),
+           uv_value = .data$value / sd(.data$value),
+           par_value = .data$value / sqrt(sd(.data$value))) %>%
+    ungroup() %>%
+    # select which scaling to use
+    mutate(value = case_when(
+      scaling == "none" ~ .data$value,
+      scaling == "uv" ~ .data$uv_value,
+      scaling == "pareto" ~ .data$par_value
+    ))
+
+  # return the preprocessed data
+  pca_data$preprocess_data <- lipid_data_prep %>%
+    select(.data$sample_name, .data$ShortLipidName, .data$LipidClass, .data$value)
+
+  # make wide
+  lipid_data_wide <- lipid_data_prep %>%
     select(.data$sample_name, .data$sample_type, .data$ShortLipidName, .data$value) %>%
     pivot_wider(names_from = .data$ShortLipidName,
                 values_from = .data$value) %>%
+    # this is slow
     clean_names()
 
-  # transformation
-  switch(transformation,
-         "none" = lipid_data_wide <- lipid_data_wide,
-         "log10" = lipid_data_wide <- lipid_data_wide %>%
-           mutate(across(where(is.numeric), ~ log10(.x + 1))) # the +1 is to avoid problems with 0's
-  )
-
-  # center
-  lipid_data_wide <- lipid_data_wide %>%
-    mutate(across(where(is.numeric), ~ .x - mean(.x)))
-
-  # scale
-  switch(scaling,
-         "none" = lipid_data_wide <- lipid_data_wide,
-         "uv" = lipid_data_wide <- lipid_data_wide %>%
-           mutate(across(where(is.numeric), ~ .x / sd(.x))),
-         "pareto" = lipid_data_wide <- lipid_data_wide %>%
-           mutate(across(where(is.numeric), ~ .x / sqrt(sd(.x))))
-  )
-
-  # return the preprocessed data
-  pca_data$preprocess_data <- lipid_data_wide %>%
-    pivot_longer(cols = -c(.data$sample_name, .data$sample_type),
-                 names_to = "lipid",
-                 values_to = "value") %>%
-    left_join(y = lipid_data %>%
-                select(.data$ShortLipidName, .data$LipidClass) %>%
-                distinct(.data$ShortLipidName, .keep_all = TRUE) %>%
-                mutate(clean_lipid_names = make_clean_names(.data$ShortLipidName)),
-              by = c("lipid" = "clean_lipid_names"))
-
   # set up the recipe, preprocessing etc.
   pca_rec <- recipe(~.,
                     data = lipid_data_wide) %>%
     # define id variables
     update_role(.data$sample_name, .data$sample_type,
                 new_role = "id") %>%
     step_pca(all_predictors(),
-             num_comp = num_pc)
+             num_comp = 5)
 
+  # do the pca
   pca_prep <- prep(pca_rec)
 
   # get the explained variance

diff --git a/R/merge_data.R b/R/merge_data.R
@@ -12,7 +12,6 @@
 #' @return Returns a merged data frame in tidy format
 #'
 #' @importFrom dplyr mutate left_join if_else
-#' @importFrom tidyselect matches
 #' @importFrom tidyr pivot_longer
 #' @importFrom stringr str_extract str_replace
 #' @importFrom magrittr %>%