Skip to content

Commit

Permalink
Merge pull request #8 from ricoderks/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
ricoderks authored Jun 30, 2021
2 parents 0d2db3b + 5828996 commit 152e6e2
Show file tree
Hide file tree
Showing 17 changed files with 425 additions and 343 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: lipidomics
Type: Package
Title: Lipidomics workflow
Version: 0.4.1
Version: 0.5
Author: Rico Derks
Maintainer: Rico Derks <[email protected]>
Description: Lipidomics workflow to proces the result files (export alignment)
Expand All @@ -16,10 +16,11 @@ Imports:
ggplot2,
ggrepel,
grDevices,
heatmaply,
janitor,
magrittr,
methods,
pheatmap,
openxlsx,
plotly,
purrr,
readr,
Expand Down
12 changes: 10 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@ importFrom(DT,DTOutput)
importFrom(DT,renderDT)
importFrom(dplyr,across)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,desc)
importFrom(dplyr,distinct)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,if_else)
importFrom(dplyr,left_join)
importFrom(dplyr,matches)
importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,pull)
importFrom(dplyr,relocate)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(dplyr,slice)
Expand Down Expand Up @@ -52,14 +54,17 @@ importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_minimal)
importFrom(ggrepel,geom_text_repel)
importFrom(grDevices,colorRamp)
importFrom(grDevices,rainbow)
importFrom(heatmaply,heatmaply)
importFrom(janitor,clean_names)
importFrom(janitor,make_clean_names)
importFrom(magrittr,"%>%")
importFrom(pheatmap,pheatmap)
importFrom(openxlsx,write.xlsx)
importFrom(plotly,add_bars)
importFrom(plotly,add_heatmap)
importFrom(plotly,add_markers)
importFrom(plotly,colorbar)
importFrom(plotly,config)
importFrom(plotly,event_data)
importFrom(plotly,event_register)
Expand Down Expand Up @@ -102,12 +107,15 @@ importFrom(stats,sd)
importFrom(stringr,str_extract)
importFrom(stringr,str_replace)
importFrom(stringr,str_split)
importFrom(tibble,column_to_rownames)
importFrom(tibble,tibble)
importFrom(tidyr,everything)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,separate)
importFrom(tidyr,unnest)
importFrom(tidyselect,everything)
importFrom(tidyselect,last_col)
importFrom(tidyselect,matches)
importFrom(tools,file_ext)
importFrom(utils,head)
4 changes: 2 additions & 2 deletions R/bubblePlotServer.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ bubblePlotServer <- function(id, lipid_data, pattern, title) {
scales = "free") +
labs(x = "Retention time [minutes]",
y = expression(italic("m/z"))) +
guides(color = FALSE,
size = FALSE) +
guides(color = "none",
size = "none") +
coord_cartesian(xlim = ranges$x,
ylim = ranges$y) +
theme_cpm() +
Expand Down
84 changes: 55 additions & 29 deletions R/compare_samples_heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,29 @@
#' @description Create a correlation heatmap of all samples..
#'
#' @param lipid_data tibble with all the lipid data
#' @param cent_scale logical, apply center and scaling
#' @param z what to show as intensity of the heatmap
#' @param clust apply clustering yes/no, default is no
#' @param sample_group dataframe with grouping information
#'
#' @return plotly object
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr filter group_by mutate ungroup case_when
#' @importFrom dplyr filter group_by mutate ungroup case_when desc
#' @importFrom rlang .data
#' @importFrom tidyr pivot_wider
#' @importFrom tidyselect matches
#' @importFrom plotly plot_ly add_heatmap layout
#' @importFrom heatmaply heatmaply
#' @importFrom tibble column_to_rownames
#'
#' @author Rico Derks
#'
compare_samples_heatmap <- function(lipid_data, z) {
compare_samples_heatmap <- function(lipid_data, cent_scale, z, clust = FALSE, sample_group = NULL) {
lipid_data <- lipid_data %>%
# only select the samples
filter(.data$sample_type == "sample",
.data$keep == TRUE) %>%
# scale "row wise" i.e. lipid
group_by(.data$my_id) %>%
# keep in mind, scale always returns a matrix
mutate(scaled_area = scale(.data$area)[, 1]) %>%
ungroup() %>%
# total area normalisation
group_by(.data$sample_name) %>%
mutate(norm_area = .data$area / sum(.data$area)) %>%
Expand All @@ -34,37 +36,61 @@ compare_samples_heatmap <- function(lipid_data, z) {
order_yaxis = paste(.data$LipidClass, .data$ShortLipidName, sep = "_"),
# what to plot
plot_z = case_when(
z == "zscore" ~ .data$scaled_area,
z == "raw" ~ .data$area,
z == "totnorm" ~ .data$norm_area
))

if(cent_scale == TRUE) {
lipid_data <- lipid_data %>%
# # scale "row wise" i.e. lipid
group_by(.data$my_id) %>%
# keep in mind, scale always returns a matrix
mutate(plot_z = scale(.data$plot_z)[, 1]) %>%
ungroup()
}

legend_name <- case_when(
z == "zscore" ~ "z-score",
z == "raw" ~ "Raw data",
z == "totnorm" ~ "Tot. area norm."
)

p <- lipid_data %>%
plot_ly(x = ~sample_name,
y = ~ShortLipidName,
colorbar = list(title = legend_name)) %>%
add_heatmap(z = ~plot_z,
text = ~LipidClass,
colorscale = "Rainbow",
hovertemplate = paste(
"%{xaxis.title.text}: %{x}<br>",
"%{yaxis.title.text}: %{y}<br>",
"Lipid class: %{text}<br>",
"Value: %{z:.3f}",
"<extra></extra>" # needed to remove the trace box
),
# order the y-axis according to lipid class and then lipid
yaxis = list(type = "category",
categoryorder = "array",
categoryarray = ~order_yaxis)) %>%
layout(yaxis = list(title = list(text = "Short lipid name")),
xaxis = list(title = list(text = "Sample name")))
# need to make the data wide for heatmaply
plot_data <- lipid_data %>%
pivot_wider(id_cols = c(.data$ShortLipidName, .data$LipidClass),
names_from = .data$sample_name,
values_from = .data$plot_z) %>%
arrange(desc(.data$LipidClass), .data$ShortLipidName) %>%
column_to_rownames(var = "ShortLipidName")

if(is.null(sample_group)){
p <- heatmaply(x = plot_data %>%
select(-.data$LipidClass),
dendrogram = ifelse(clust == TRUE, "both", "none"),
scale = "none",
colors = rainbow(n = 100,
alpha = 0.5),
xlab = "Sample name",
ylab = "Lipid",
fontsize_row = 6)
} else {
# extra the sample group info
col_group <- lipid_data %>%
select(.data$sample_name, matches(paste0("^", sample_group, "$"))) %>%
distinct(.data$sample_name,
.keep_all = TRUE) %>%
select(-.data$sample_name)

p <- heatmaply(x = plot_data %>%
select(-.data$LipidClass),
dendrogram = ifelse(clust == TRUE, "both", "none"),
scale = "none",
colors = rainbow(n = 100,
alpha = 0.5),
xlab = "Sample name",
ylab = "Lipid",
fontsize_row = 6,
col_side_colors = col_group)
}

return(p)
}
34 changes: 15 additions & 19 deletions R/cor_heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,34 @@
#'
#' @description Create a correlation heatmap of all samples..
#'
#' @param df tibble in tidy format
#' @param lipid_data tibble with all the lipid data
#'
#' @return pheatmap object
#' @return plotly object
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr select matches
#' @importFrom dplyr select
#' @importFrom tidyselect matches
#' @importFrom stringr str_extract
#' @importFrom rlang .data
#' @importFrom pheatmap pheatmap
#' @importFrom plotly plot_ly colorbar
#' @importFrom stats cor
#'
#' @importFrom grDevices colorRamp
#'
#' @author Rico Derks
#'
cor_heatmap <- function(df) {
df_m <- df %>%
cor_heatmap <- function(lipid_data) {
df_m <- lipid_data %>%
select(matches("([qQ][cC]pool|[sS]ample)"))

# calculate the correlation
cormat <- cor(df_m)

# Define which pheno data columns should be highlighted in the plot
ann <- data.frame(sample_type = str_extract(string = colnames(df_m),
pattern = "([qQ][cC]pool|[sS]ample)"))
rownames(ann) <- colnames(df_m)

# show heatmap
p <- pheatmap(cormat,
annotation = ann,
cluster_cols = FALSE,
cluster_rows = FALSE,
fontsize_row = 8,
fontsize_col = 6)
p <- plot_ly(x = colnames(df_m),
y = colnames(df_m),
z = cormat,
type = "heatmap",
colors = colorRamp(c("blue", "red"))) %>%
colorbar(limits = c(-1, 1))

return(p)
}
33 changes: 0 additions & 33 deletions R/cor_heatmap2.R

This file was deleted.

64 changes: 29 additions & 35 deletions R/do_pca.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,65 +38,59 @@ do_pca <- function(lipid_data, observations = c("all", "samples"), normalization

# need to make the data wide
# samples as rows and lipids as columns
lipid_data_wide <- lipid_data %>%
lipid_data_prep <- lipid_data %>%
filter(.data$keep == TRUE) %>%
# select which sample type to keep
filter(.data$sample_type %in% select_obs) %>%
# total area normalisation
group_by(.data$sample_name) %>%
mutate(norm_area = .data$area / sum(.data$area)) %>%
ungroup() %>%
# select what to use for PCA
# select which normalization to use for PCA
mutate(value = case_when(
normalization == "raw" ~ .data$area,
normalization == "tot_area" ~ .data$norm_area
)) %>%
# make wide
# do transformations and select which transformation to keep
mutate(value = case_when(
transformation == "none" ~ .data$value,
transformation == "log10" ~ log10(.data$value + 1) # the +1 is correct for any zero's
)) %>%
# do the centering and scaling per variable
group_by(.data$my_id) %>%
mutate(value = .data$value - mean(.data$value),
uv_value = .data$value / sd(.data$value),
par_value = .data$value / sqrt(sd(.data$value))) %>%
ungroup() %>%
# select which scaling to use
mutate(value = case_when(
scaling == "none" ~ .data$value,
scaling == "uv" ~ .data$uv_value,
scaling == "pareto" ~ .data$par_value
))

# return the preprocessed data
pca_data$preprocess_data <- lipid_data_prep %>%
select(.data$sample_name, .data$ShortLipidName, .data$LipidClass, .data$value)

# make wide
lipid_data_wide <- lipid_data_prep %>%
select(.data$sample_name, .data$sample_type, .data$ShortLipidName, .data$value) %>%
pivot_wider(names_from = .data$ShortLipidName,
values_from = .data$value) %>%
# this is slow
clean_names()

# transformation
switch(transformation,
"none" = lipid_data_wide <- lipid_data_wide,
"log10" = lipid_data_wide <- lipid_data_wide %>%
mutate(across(where(is.numeric), ~ log10(.x + 1))) # the +1 is to avoid problems with 0's
)

# center
lipid_data_wide <- lipid_data_wide %>%
mutate(across(where(is.numeric), ~ .x - mean(.x)))

# scale
switch(scaling,
"none" = lipid_data_wide <- lipid_data_wide,
"uv" = lipid_data_wide <- lipid_data_wide %>%
mutate(across(where(is.numeric), ~ .x / sd(.x))),
"pareto" = lipid_data_wide <- lipid_data_wide %>%
mutate(across(where(is.numeric), ~ .x / sqrt(sd(.x))))
)

# return the preprocessed data
pca_data$preprocess_data <- lipid_data_wide %>%
pivot_longer(cols = -c(.data$sample_name, .data$sample_type),
names_to = "lipid",
values_to = "value") %>%
left_join(y = lipid_data %>%
select(.data$ShortLipidName, .data$LipidClass) %>%
distinct(.data$ShortLipidName, .keep_all = TRUE) %>%
mutate(clean_lipid_names = make_clean_names(.data$ShortLipidName)),
by = c("lipid" = "clean_lipid_names"))

# set up the recipe, preprocessing etc.
pca_rec <- recipe(~.,
data = lipid_data_wide) %>%
# define id variables
update_role(.data$sample_name, .data$sample_type,
new_role = "id") %>%
step_pca(all_predictors(),
num_comp = num_pc)
num_comp = 5)

# do the pca
pca_prep <- prep(pca_rec)

# get the explained variance
Expand Down
1 change: 0 additions & 1 deletion R/merge_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#' @return Returns a merged data frame in tidy format
#'
#' @importFrom dplyr mutate left_join if_else
#' @importFrom tidyselect matches
#' @importFrom tidyr pivot_longer
#' @importFrom stringr str_extract str_replace
#' @importFrom magrittr %>%
Expand Down
Loading

0 comments on commit 152e6e2

Please sign in to comment.