Skip to content

Commit

Permalink
Merge pull request #7 from FridleyLab/oospina_dev
Browse files Browse the repository at this point in the history
multiple fixes
  • Loading branch information
oospina authored Mar 22, 2024
2 parents b9c4c77 + 40a6826 commit d58d553
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 33 deletions.
2 changes: 1 addition & 1 deletion R/STList.R
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,7 @@ process_lists = function(counts_df_list, coords_df_list){
# If no counts in the entire FOV, mark for removal
if(sum(coords_df_list[[name_i]][['total_counts']]) < 1){
rm_fov = append(rm_fov, name_i)
warning(paste0('No counts present in FOV ', name_i, '. Removong from data set.\n'))
warning(paste0('No counts present in FOV ', name_i, '. Removing from data set.\n'))
}
}

Expand Down
119 changes: 107 additions & 12 deletions R/distribution_plots.R
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
##
#' @title per_unit_counts: Generates distribution plots of spot/cell meta data or gene expression
#' @description Generates violin plots, boxplots, or density plots of continuous
#' variables in the spatial meta data or of gene expression to make comparisons
#' across samples
#' @description Generates violin plots, boxplots, or density plots of variables in the
#' spatial meta data or of gene expression
#' @details The function allows to visualize the distribution of spot/cell total
#' counts, total genes, or expression of specific genes across all samples for
#' comparative purposes.
#' comparative purposes. It also allows grouping of gene expression values by
#' categorical variables (e.g., clusters).
#'
#' @param x an STlist
#' @param plot_meta vector of variables in `x@spatial_meta` to plot distributions.
#' If 'total_counts', the function plots the counts per spot/cell. If 'total_genes',
#' the function plots the number of genes per spot/cell are plotted.
#' @param genes vector of genes to plot expression distribution
#' the function plots the number of genes per spot/cell are plotted
#' @param genes vector of genes to plot expression distribution. If used in conjunction
#' with `plot_meta`, the expression values are grouped using that variable
#' @param samples samples to include in the plot. Default (NULL) includes all samples
#' @param data_type one of 'tr' or 'raw', to plot transformed or raw counts
#' @param color_pal a string of a color palette from `khroma` or `RColorBrewer`, or a
#' vector with colors
#' @param plot_type one of "violin", "box", or "density" (violin plots, box plots, or
#' density plots respectively)
#' density plots respectively). If `plot_meta` and `gene` are used together, then
#' density plots are disabled
#' @param ptsize the size of points in the plots
#' @param ptalpha the transparency of points (violin/box plot) or curves (density plots)
#'
Expand All @@ -28,7 +30,8 @@
#
distribution_plots = function(x=NULL, plot_meta=NULL, genes=NULL, samples=NULL, data_type='tr',
color_pal='okabeito', plot_type='violin', ptsize=0.5, ptalpha=0.5){
#require('ggplot2')

require('magrittr')

# Define samples to plot if NULL or numeric
if(is.null(samples)){
Expand All @@ -39,7 +42,8 @@ distribution_plots = function(x=NULL, plot_meta=NULL, genes=NULL, samples=NULL,

# Define if plotting meta data or genes
if(!is.null(plot_meta) & !is.null(genes)){
stop("Plots can be generated only for plot_meta OR genes at a time.")
plist = cluster_gene_plots(x=x, plot_meta=plot_meta, genes=genes, samples=samples, data_type=data_type,
plot_type=plot_type, color_pal=color_pal, ptsize=ptsize, ptalpha=ptalpha)
} else if(!is.null(plot_meta)){
plist = spot_plots(x=x, plot_meta=plot_meta, samples=samples,
plot_type=plot_type, color_pal=color_pal, ptsize=ptsize, ptalpha=ptalpha)
Expand All @@ -57,7 +61,7 @@ distribution_plots = function(x=NULL, plot_meta=NULL, genes=NULL, samples=NULL,
# Helpers ----------------------------------------------------------------------

##
# violin_plots_spot
# spot_plots
#

spot_plots = function(x=NULL, plot_meta=NULL, samples=NULL,
Expand Down Expand Up @@ -115,7 +119,7 @@ spot_plots = function(x=NULL, plot_meta=NULL, samples=NULL,


##
# violin_plots_gene
# gene_plots
#

gene_plots = function(x=NULL, genes=NULL, samples=NULL, data_type='tr',
Expand Down Expand Up @@ -153,7 +157,7 @@ gene_plots = function(x=NULL, genes=NULL, samples=NULL, data_type='tr',
gene_cols = color_parse(color_pal, n_cats=length(samples))

# Create plot
p_list[[gene]] =
#p_list[[gene]] =
if(plot_type == 'box'){
p_list[[gene]] = ggplot2::ggplot(df_tmp, ggplot2::aes(x=samplename, y=.data[['geneexpr']], color=samplename)) +
ggplot2::geom_boxplot(outlier.size=ptsize)
Expand Down Expand Up @@ -185,3 +189,94 @@ gene_plots = function(x=NULL, genes=NULL, samples=NULL, data_type='tr',
}


##
# cluster_gene_plots
#

cluster_gene_plots = function(x=NULL, plot_meta=NULL, genes=NULL, samples=NULL, data_type='tr',
plot_type='violin', color_pal='roma', ptsize=0.5, ptalpha=0.5){

if(length(plot_meta) != 1){
cat(paste0('Only one metadata variable can be plotted at a time. Plotting ', plot_meta[1], '.'))
plot_meta = plot_meta[1]
}

# Check if plot_meta is available and is a discrete variable. Remove sample, otherwise
for(i in samples){
if(!plot_meta %in% colnames(x@spatial_meta[[i]])){
warning(paste0('Variable ', plot_meta, ' is not present in sample ', i, '.'))
samples = grep(paste0('^', i, '$'), samples, value=T, invert=T)
} else if(!plyr::is.discrete(x@spatial_meta[[i]][[plot_meta]]) | length(unique(x@spatial_meta[[i]][[plot_meta]])) > 30){
warning(paste0('Variable ', plot_meta, ' seems continuous or has more than 30 categories and will be skipped for sample ', i, '.'))
samples = grep(paste0('^', i, '$'), samples, value=T, invert=T)
}
}
if(length(samples) < 1){
stop('All samples skipped.')
}

# Check plot type
if(!plot_type %in% c('box', 'violin')){
cat("Only 'box' or 'violin' are valid choices. Defaulting to 'box'.")
plot_type = 'box'
}

# Extract data slot
if(data_type == 'tr'){
expr_tmp = x@tr_counts
p_title = 'Normalized expression - '
ax_title = 'Normalized expression'
} else {
expr_tmp = x@counts
p_title = 'Raw expression - '
ax_title = 'Counts'
}

p_list = list()
# Loop through list of genes
for(gene in genes){
# Subset samples if gene not in all samples
samples_tmp = samples
df_tmp = tibble::tibble()
for(i in samples){
if(!(gene %in% rownames(expr_tmp[[i]]))){
cat(paste0('Sample ', i, ' does not contain ', gene, '.\n'))
samples_tmp = grep(paste0('^', i, '$'), samples_tmp, value=T, invert=T)
} else{
df_tmp = dplyr::bind_rows(df_tmp,
dplyr::left_join(tibble::tibble(libname=colnames(expr_tmp[[i]]),
geneexpr=expr_tmp[[i]][gene, ],
samplename=i),
x@spatial_meta[[i]][, c('libname', plot_meta)], by='libname'))
}
}

# Define color palette
gene_cols = color_parse(color_pal, n_cats=length(unique(df_tmp[[plot_meta]])))
names(gene_cols) = unique(df_tmp[[plot_meta]])

# Create plot
p_list[[gene]] = ggplot2::ggplot(df_tmp, ggplot2::aes(x=samplename, y=.data[['geneexpr']], color=.data[[plot_meta]]))
if(plot_type == 'box'){
p_list[[gene]] = p_list[[gene]] +
ggplot2::geom_boxplot(outlier.size=ptsize)
} else if(plot_type == 'violin'){
p_list[[gene]] = p_list[[gene]] +
ggplot2::geom_violin(ggplot2::aes(fill=.data[[plot_meta]])) +
ggforce::geom_sina(size=ptsize) +
ggplot2::scale_fill_manual(values=gene_cols)
}
p_list[[gene]] = p_list[[gene]] +
ggplot2::scale_color_manual(values=gene_cols) +
ggplot2::ggtitle(paste0(p_title, gene, '\n', plot_meta)) +
ggplot2::ylab(ax_title) +
ggplot2::xlab(NULL) +
ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30, hjust=1),
panel.border=ggplot2::element_rect(fill=NA, color='black')#,
#legend.title=ggplot2::element_blank()
)
}

return(p_list)
}

2 changes: 2 additions & 0 deletions R/filter_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ filter_data = function(x=NULL,
col_expr_reads = colSums(df_tmp[grepl(spot_pct_expr, rownames(df_tmp)), ])
col_expr_percent = col_expr_reads/col_total_reads
rm(col_expr_reads) # Clean env
# Force NaNs tp zero, which result from zero as denominator (i.e., zero counts in the cell)
col_expr_percent[is.na(col_expr_percent)] = 0

# If no maximum counts set by user, then make it the max counts of each spot/cell
if(is.null(spot_maxreads)){
Expand Down
50 changes: 32 additions & 18 deletions R/plot_spatial_meta.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,29 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL,
stop('No metadata column or clustering parameters were specified. Or specified parameters do not exist in metadata.')
}

# Set default color if NULL input
if(is.null(color_pal)){
color_pal = 'light'
}
# Define color palettes for each meta variable
master_col_pal = vector('list', length(plot_meta))
names(master_col_pal) = plot_meta
for(metacol in plot_meta){
cat_vec_tmp = c()
for(s in samples){
cat_vec_tmp = unique(c(cat_vec_tmp, unique(x@spatial_meta[[s]][[metacol]])))
}
color_pal_tmp = color_pal
if(is.numeric(x@spatial_meta[[s]][[metacol]])){
color_pal_tmp = 'sunset'
}
master_col_pal[[metacol]] = color_parse(color_pal_tmp, length(cat_vec_tmp))
names(master_col_pal[[metacol]]) = cat_vec_tmp

rm(cat_vec_tmp, color_pal_tmp) # Clean env
}

# Make list to store plots a
plot_list = list()
for(s in samples){
# Extract metadata for specific sample
Expand All @@ -97,14 +120,6 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL,
}

for(metacol in plot_meta){
# Set default color if NULL input
if(is.null(color_pal)){
color_pal = 'light'
if(is.numeric(x@spatial_meta[[s]][[metacol]])){
color_pal = 'sunset'
}
}

df_tmp2 = df_tmp %>%
dplyr::select(libname, ypos, xpos, meta:=!!metacol)

Expand All @@ -115,10 +130,10 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL,
dplyr::mutate(meta=as.factor(meta))

# Create color palette.
meta_cols = color_parse(color_pal, n_cats=length(unique(df_tmp2[['meta']])))
names(meta_cols) = unique(df_tmp2[['meta']])
if(any(names(meta_cols) == 'No_Data')){
meta_cols[names(meta_cols) == 'No_Data'] = 'gray50'
# meta_cols = color_parse(color_pal, n_cats=length(unique(df_tmp2[['meta']])))
# names(meta_cols) = unique(df_tmp2[['meta']])
if(any(df_tmp2[['meta']] == 'No_Data') & any(!grepl('No_Data', names(master_col_pal[[metacol]])))){
master_col_pal[[metacol]] = append(master_col_pal[[metacol]], c(No_Data='gray50'))
}
}

Expand All @@ -144,7 +159,7 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL,
ggplot2::geom_point(ggplot2::aes(x=xpos, y=ypos, color=meta), size=ptsize)
# Assign color palette to plot for categorical or numerical variable
if(is.factor(df_tmp2[['meta']])){
p = p + ggplot2::scale_color_manual(values=c(meta_cols))
p = p + ggplot2::scale_color_manual(values=master_col_pal[[metacol]])
} else{
if(!is.null(color_pal)){
# Get color palette and number of colors needed.
Expand All @@ -163,19 +178,18 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL,

if(!is.numeric(df_tmp2[['meta']])){ # Test if it's not numeric and make legend spots/dots larger
p = p +
ggplot2::guides(color=guide_legend(override.aes=list(size=ptsize+1)))
ggplot2::guides(color=ggplot2::guide_legend(override.aes=list(size=ptsize+1)))
}
p = p +
labs(color=title_leg, title=title_p) + ggplot2::theme_void()
p = p + ggplot2::labs(color=title_leg, title=title_p) + ggplot2::theme_void()

if(visium){
p = p + ggplot2::scale_y_reverse() + ggplot2::coord_fixed(ratio=1)
} else{
p = p + ggplot2::coord_fixed(ratio=1)
}
#p = p + ggplot2::theme(legend.title=ggplot2::element_blank()) # MAY 09, 2023 PUT META DATA NAME ON LEGEND TITLE, NOT PLOT TITLE
p = p + ggplot2::theme(legend.title=element_text(size=txsize),
plot.title=element_text(size=txsize+2))
p = p + ggplot2::theme(legend.title=ggplot2::element_text(size=txsize),
plot.title=ggplot2::element_text(size=txsize+2))

plot_list[[paste0(s, '_', metacol)]] = p
}
Expand Down
3 changes: 1 addition & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ color_parse = function(color_pal=NULL, n_cats=NULL){
# Test if input is a Khroma name or RColorBrewer.
# If so, create palette.
if(color_pal[1] %in% khroma_cols){
p_palette = khroma::colour(color_pal[1], force=T)
cat_cols = as.vector(p_palette(n_cats))
cat_cols = as.vector(khroma::colour(color_pal[1], force=T)(n_cats))
}else if(color_pal[1] %in% rownames(RColorBrewer::brewer.pal.info)){
cat_cols = colorRampPalette(RColorBrewer::brewer.pal(n_cats, color_pal[1]))(n_cats)
}else{ # Test if user provided a vector of colors.
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# spatialGE

<img align="left" src="logo.png" height="100" width="90" />

An R package for the visualization and analysis of spatially-resolved transcriptomics data,
such as those generated with 10X Visium. The **spatialGE** package features a data object
(STlist: Spatial Transctiptomics List) to store data and results from multiple tissue sections,
Expand Down

0 comments on commit d58d553

Please sign in to comment.