From 8012b965630933450e0255072072fefd013e1819 Mon Sep 17 00:00:00 2001 From: oospina Date: Wed, 20 Mar 2024 08:28:17 -0400 Subject: [PATCH 01/11] distribution_plots groups by category --- R/distribution_plots.R | 119 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 12 deletions(-) mode change 100755 => 100644 R/distribution_plots.R diff --git a/R/distribution_plots.R b/R/distribution_plots.R old mode 100755 new mode 100644 index 2533682..fe4db77 --- a/R/distribution_plots.R +++ b/R/distribution_plots.R @@ -1,23 +1,25 @@ ## #' @title per_unit_counts: Generates distribution plots of spot/cell meta data or gene expression -#' @description Generates violin plots, boxplots, or density plots of continuous -#' variables in the spatial meta data or of gene expression to make comparisons -#' across samples +#' @description Generates violin plots, boxplots, or density plots of variables in the +#' spatial meta data or of gene expression #' @details The function allows to visualize the distribution of spot/cell total #' counts, total genes, or expression of specific genes across all samples for -#' comparative purposes. +#' comparative purposes. It also allows grouping of gene expression values by +#' categorical variables (e.g., clusters). #' #' @param x an STlist #' @param plot_meta vector of variables in `x@spatial_meta` to plot distributions. #' If 'total_counts', the function plots the counts per spot/cell. If 'total_genes', -#' the function plots the number of genes per spot/cell are plotted. -#' @param genes vector of genes to plot expression distribution +#' the function plots the number of genes per spot/cell are plotted +#' @param genes vector of genes to plot expression distribution. If used in conjunction +#' with `plot_meta`, the expression values are grouped using that variable #' @param samples samples to include in the plot. Default (NULL) includes all samples #' @param data_type one of 'tr' or 'raw', to plot transformed or raw counts #' @param color_pal a string of a color palette from `khroma` or `RColorBrewer`, or a #' vector with colors #' @param plot_type one of "violin", "box", or "density" (violin plots, box plots, or -#' density plots respectively) +#' density plots respectively). If `plot_meta` and `gene` are used together, then +#' density plots are disabled #' @param ptsize the size of points in the plots #' @param ptalpha the transparency of points (violin/box plot) or curves (density plots) #' @@ -28,7 +30,8 @@ # distribution_plots = function(x=NULL, plot_meta=NULL, genes=NULL, samples=NULL, data_type='tr', color_pal='okabeito', plot_type='violin', ptsize=0.5, ptalpha=0.5){ - #require('ggplot2') + + require('magrittr') # Define samples to plot if NULL or numeric if(is.null(samples)){ @@ -39,7 +42,8 @@ distribution_plots = function(x=NULL, plot_meta=NULL, genes=NULL, samples=NULL, # Define if plotting meta data or genes if(!is.null(plot_meta) & !is.null(genes)){ - stop("Plots can be generated only for plot_meta OR genes at a time.") + plist = cluster_gene_plots(x=x, plot_meta=plot_meta, genes=genes, samples=samples, data_type=data_type, + plot_type=plot_type, color_pal=color_pal, ptsize=ptsize, ptalpha=ptalpha) } else if(!is.null(plot_meta)){ plist = spot_plots(x=x, plot_meta=plot_meta, samples=samples, plot_type=plot_type, color_pal=color_pal, ptsize=ptsize, ptalpha=ptalpha) @@ -57,7 +61,7 @@ distribution_plots = function(x=NULL, plot_meta=NULL, genes=NULL, samples=NULL, # Helpers ---------------------------------------------------------------------- ## -# violin_plots_spot +# spot_plots # spot_plots = function(x=NULL, plot_meta=NULL, samples=NULL, @@ -115,7 +119,7 @@ spot_plots = function(x=NULL, plot_meta=NULL, samples=NULL, ## -# violin_plots_gene +# gene_plots # gene_plots = function(x=NULL, genes=NULL, samples=NULL, data_type='tr', @@ -153,7 +157,7 @@ gene_plots = function(x=NULL, genes=NULL, samples=NULL, data_type='tr', gene_cols = color_parse(color_pal, n_cats=length(samples)) # Create plot - p_list[[gene]] = + #p_list[[gene]] = if(plot_type == 'box'){ p_list[[gene]] = ggplot2::ggplot(df_tmp, ggplot2::aes(x=samplename, y=.data[['geneexpr']], color=samplename)) + ggplot2::geom_boxplot(outlier.size=ptsize) @@ -185,3 +189,94 @@ gene_plots = function(x=NULL, genes=NULL, samples=NULL, data_type='tr', } +## +# cluster_gene_plots +# + +cluster_gene_plots = function(x=NULL, plot_meta=NULL, genes=NULL, samples=NULL, data_type='tr', + plot_type='violin', color_pal='roma', ptsize=0.5, ptalpha=0.5){ + + if(length(plot_meta) != 1){ + cat(paste0('Only one metadata variable can be plotted at a time. Plotting ', plot_meta[1], '.')) + plot_meta = plot_meta[1] + } + + # Check if plot_meta is available and is a discrete variable. Remove sample, otherwise + for(i in samples){ + if(!plot_meta %in% colnames(x@spatial_meta[[i]])){ + warning(paste0('Variable ', plot_meta, ' is not present in sample ', i, '.')) + samples = grep(paste0('^', i, '$'), samples, value=T, invert=T) + } else if(!plyr::is.discrete(x@spatial_meta[[i]][[plot_meta]]) | length(unique(x@spatial_meta[[i]][[plot_meta]])) > 30){ + warning(paste0('Variable ', plot_meta, ' seems continuous or has more than 30 categories and will be skipped for sample ', i, '.')) + samples = grep(paste0('^', i, '$'), samples, value=T, invert=T) + } + } + if(length(samples) < 1){ + stop('All samples skipped.') + } + + # Check plot type + if(!plot_type %in% c('box', 'violin')){ + cat("Only 'box' or 'violin' are valid choices. Defaulting to 'box'.") + plot_type = 'box' + } + + # Extract data slot + if(data_type == 'tr'){ + expr_tmp = x@tr_counts + p_title = 'Normalized expression - ' + ax_title = 'Normalized expression' + } else { + expr_tmp = x@counts + p_title = 'Raw expression - ' + ax_title = 'Counts' + } + + p_list = list() + # Loop through list of genes + for(gene in genes){ + # Subset samples if gene not in all samples + samples_tmp = samples + df_tmp = tibble::tibble() + for(i in samples){ + if(!(gene %in% rownames(expr_tmp[[i]]))){ + cat(paste0('Sample ', i, ' does not contain ', gene, '.\n')) + samples_tmp = grep(paste0('^', i, '$'), samples_tmp, value=T, invert=T) + } else{ + df_tmp = dplyr::bind_rows(df_tmp, + dplyr::left_join(tibble::tibble(libname=colnames(expr_tmp[[i]]), + geneexpr=expr_tmp[[i]][gene, ], + samplename=i), + x@spatial_meta[[i]][, c('libname', plot_meta)], by='libname')) + } + } + + # Define color palette + gene_cols = color_parse(color_pal, n_cats=length(unique(df_tmp[[plot_meta]]))) + names(gene_cols) = unique(df_tmp[[plot_meta]]) + + # Create plot + p_list[[gene]] = ggplot2::ggplot(df_tmp, ggplot2::aes(x=samplename, y=.data[['geneexpr']], color=.data[[plot_meta]])) + if(plot_type == 'box'){ + p_list[[gene]] = p_list[[gene]] + + ggplot2::geom_boxplot(outlier.size=ptsize) + } else if(plot_type == 'violin'){ + p_list[[gene]] = p_list[[gene]] + + ggplot2::geom_violin(ggplot2::aes(fill=.data[[plot_meta]])) + + ggforce::geom_sina(size=ptsize) + + ggplot2::scale_fill_manual(values=gene_cols) + } + p_list[[gene]] = p_list[[gene]] + + ggplot2::scale_color_manual(values=gene_cols) + + ggplot2::ggtitle(paste0(p_title, gene, '\n', plot_meta)) + + ggplot2::ylab(ax_title) + + ggplot2::xlab(NULL) + + ggplot2::theme(axis.text.x=ggplot2::element_text(angle=30, hjust=1), + panel.border=ggplot2::element_rect(fill=NA, color='black')#, + #legend.title=ggplot2::element_blank() + ) + } + + return(p_list) +} + From 7517ff73fd97feaa98fe07d870b208b8f87f5714 Mon Sep 17 00:00:00 2001 From: oospina Date: Wed, 20 Mar 2024 15:14:33 -0400 Subject: [PATCH 02/11] fix shared color palette for spatial meta data --- R/STList.R | 2 +- R/plot_spatial_meta.R | 50 +++++++++++++++++++++++++++---------------- R/utils.R | 3 +-- 3 files changed, 34 insertions(+), 21 deletions(-) diff --git a/R/STList.R b/R/STList.R index 567c1ac..c88af59 100644 --- a/R/STList.R +++ b/R/STList.R @@ -811,7 +811,7 @@ process_lists = function(counts_df_list, coords_df_list){ # If no counts in the entire FOV, mark for removal if(sum(coords_df_list[[name_i]][['total_counts']]) < 1){ rm_fov = append(rm_fov, name_i) - warning(paste0('No counts present in FOV ', name_i, '. Removong from data set.\n')) + warning(paste0('No counts present in FOV ', name_i, '. Removing from data set.\n')) } } diff --git a/R/plot_spatial_meta.R b/R/plot_spatial_meta.R index 51fb940..0d1387f 100755 --- a/R/plot_spatial_meta.R +++ b/R/plot_spatial_meta.R @@ -81,6 +81,29 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL, stop('No metadata column or clustering parameters were specified. Or specified parameters do not exist in metadata.') } + # Set default color if NULL input + if(is.null(color_pal)){ + color_pal = 'light' + } + # Define color palettes for each meta variable + master_col_pal = vector('list', length(plot_meta)) + names(master_col_pal) = plot_meta + for(metacol in plot_meta){ + cat_vec_tmp = c() + for(s in samples){ + cat_vec_tmp = unique(c(cat_vec_tmp, unique(x@spatial_meta[[s]][[metacol]]))) + } + color_pal_tmp = color_pal + if(is.numeric(x@spatial_meta[[s]][[metacol]])){ + color_pal_tmp = 'sunset' + } + master_col_pal[[metacol]] = color_parse(color_pal_tmp, length(cat_vec_tmp)) + names(master_col_pal[[metacol]]) = cat_vec_tmp + + rm(cat_vec_tmp, color_pal_tmp) # Clean env + } + + # Make list to store plots a plot_list = list() for(s in samples){ # Extract metadata for specific sample @@ -97,14 +120,6 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL, } for(metacol in plot_meta){ - # Set default color if NULL input - if(is.null(color_pal)){ - color_pal = 'light' - if(is.numeric(x@spatial_meta[[s]][[metacol]])){ - color_pal = 'sunset' - } - } - df_tmp2 = df_tmp %>% dplyr::select(libname, ypos, xpos, meta:=!!metacol) @@ -115,10 +130,10 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL, dplyr::mutate(meta=as.factor(meta)) # Create color palette. - meta_cols = color_parse(color_pal, n_cats=length(unique(df_tmp2[['meta']]))) - names(meta_cols) = unique(df_tmp2[['meta']]) - if(any(names(meta_cols) == 'No_Data')){ - meta_cols[names(meta_cols) == 'No_Data'] = 'gray50' + # meta_cols = color_parse(color_pal, n_cats=length(unique(df_tmp2[['meta']]))) + # names(meta_cols) = unique(df_tmp2[['meta']]) + if(any(df_tmp2[['meta']] == 'No_Data') & any(!grepl('No_Data', names(master_col_pal[[metacol]])))){ + master_col_pal[[metacol]] = append(master_col_pal[[metacol]], c(No_Data='gray50')) } } @@ -144,7 +159,7 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL, ggplot2::geom_point(ggplot2::aes(x=xpos, y=ypos, color=meta), size=ptsize) # Assign color palette to plot for categorical or numerical variable if(is.factor(df_tmp2[['meta']])){ - p = p + ggplot2::scale_color_manual(values=c(meta_cols)) + p = p + ggplot2::scale_color_manual(values=master_col_pal[[metacol]]) } else{ if(!is.null(color_pal)){ # Get color palette and number of colors needed. @@ -163,10 +178,9 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL, if(!is.numeric(df_tmp2[['meta']])){ # Test if it's not numeric and make legend spots/dots larger p = p + - ggplot2::guides(color=guide_legend(override.aes=list(size=ptsize+1))) + ggplot2::guides(color=ggplot2::guide_legend(override.aes=list(size=ptsize+1))) } - p = p + - labs(color=title_leg, title=title_p) + ggplot2::theme_void() + p = p + ggplot2::labs(color=title_leg, title=title_p) + ggplot2::theme_void() if(visium){ p = p + ggplot2::scale_y_reverse() + ggplot2::coord_fixed(ratio=1) @@ -174,8 +188,8 @@ plot_spatial_meta = function(x, samples=NULL, ks='dtc', ws=NULL, deepSplit=NULL, p = p + ggplot2::coord_fixed(ratio=1) } #p = p + ggplot2::theme(legend.title=ggplot2::element_blank()) # MAY 09, 2023 PUT META DATA NAME ON LEGEND TITLE, NOT PLOT TITLE - p = p + ggplot2::theme(legend.title=element_text(size=txsize), - plot.title=element_text(size=txsize+2)) + p = p + ggplot2::theme(legend.title=ggplot2::element_text(size=txsize), + plot.title=ggplot2::element_text(size=txsize+2)) plot_list[[paste0(s, '_', metacol)]] = p } diff --git a/R/utils.R b/R/utils.R index 340eede..1405b79 100755 --- a/R/utils.R +++ b/R/utils.R @@ -56,8 +56,7 @@ color_parse = function(color_pal=NULL, n_cats=NULL){ # Test if input is a Khroma name or RColorBrewer. # If so, create palette. if(color_pal[1] %in% khroma_cols){ - p_palette = khroma::colour(color_pal[1], force=T) - cat_cols = as.vector(p_palette(n_cats)) + cat_cols = as.vector(khroma::colour(color_pal[1], force=T)(n_cats)) }else if(color_pal[1] %in% rownames(RColorBrewer::brewer.pal.info)){ cat_cols = colorRampPalette(RColorBrewer::brewer.pal(n_cats, color_pal[1]))(n_cats) }else{ # Test if user provided a vector of colors. From 86906a7049dc9678e7eeed9241189d87ae19f99c Mon Sep 17 00:00:00 2001 From: oospina Date: Thu, 21 Mar 2024 09:45:46 -0400 Subject: [PATCH 03/11] logo in readme --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index ddb6b4c..8c9d531 100755 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +

+ +

+ # spatialGE An R package for the visualization and analysis of spatially-resolved transcriptomics data, From 5dcd84f581fcd932b878f1bd92982474721d9d38 Mon Sep 17 00:00:00 2001 From: oospina Date: Thu, 21 Mar 2024 09:46:15 -0400 Subject: [PATCH 04/11] logo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8c9d531..1a9ead8 100755 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

- +

# spatialGE From b37aefc918653a8c75db52d7c8e267c36223677f Mon Sep 17 00:00:00 2001 From: oospina Date: Thu, 21 Mar 2024 09:49:31 -0400 Subject: [PATCH 05/11] logo in readme --- README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/README.md b/README.md index 1a9ead8..97876f0 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,4 @@ -

- -

- + # spatialGE An R package for the visualization and analysis of spatially-resolved transcriptomics data, From e8850a8c0080c9d6dbe1b8a63601d3650f6f9bb0 Mon Sep 17 00:00:00 2001 From: oospina Date: Thu, 21 Mar 2024 09:52:10 -0400 Subject: [PATCH 06/11] logo in readme --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 97876f0..28952d1 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ - + + # spatialGE An R package for the visualization and analysis of spatially-resolved transcriptomics data, From b64ebe4f1696742179ecb1d23b37f646c80806ff Mon Sep 17 00:00:00 2001 From: oospina Date: Thu, 21 Mar 2024 09:54:02 -0400 Subject: [PATCH 07/11] logo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 28952d1..2f2f8b7 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ - + # spatialGE From e47a553ace6a36b9430351db88a7fb771e2e18dc Mon Sep 17 00:00:00 2001 From: oospina Date: Thu, 21 Mar 2024 09:55:31 -0400 Subject: [PATCH 08/11] logo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f2f8b7..f22d521 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ - + # spatialGE From d26ae4309ffc1bc5fc6cdb9b4e60eec5e17d79a9 Mon Sep 17 00:00:00 2001 From: oospina Date: Thu, 21 Mar 2024 09:56:46 -0400 Subject: [PATCH 09/11] logo in readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f22d521..6764d1b 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ - - # spatialGE + + An R package for the visualization and analysis of spatially-resolved transcriptomics data, such as those generated with 10X Visium. The **spatialGE** package features a data object (STlist: Spatial Transctiptomics List) to store data and results from multiple tissue sections, From 8a032e07dcfcbe89f31fe10c6f6f3b0dad0f6bad Mon Sep 17 00:00:00 2001 From: oospina Date: Thu, 21 Mar 2024 09:58:08 -0400 Subject: [PATCH 10/11] logo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6764d1b..0eba5bd 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # spatialGE - + An R package for the visualization and analysis of spatially-resolved transcriptomics data, such as those generated with 10X Visium. The **spatialGE** package features a data object From 40a6826ec10b57ef3b0821a3754f55d7cd815880 Mon Sep 17 00:00:00 2001 From: oospina Date: Fri, 22 Mar 2024 14:17:59 -0400 Subject: [PATCH 11/11] fix crash filter_data due to cells with zero total counts --- R/filter_data.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/filter_data.R b/R/filter_data.R index 92b2476..08e59a6 100755 --- a/R/filter_data.R +++ b/R/filter_data.R @@ -167,6 +167,8 @@ filter_data = function(x=NULL, col_expr_reads = colSums(df_tmp[grepl(spot_pct_expr, rownames(df_tmp)), ]) col_expr_percent = col_expr_reads/col_total_reads rm(col_expr_reads) # Clean env + # Force NaNs tp zero, which result from zero as denominator (i.e., zero counts in the cell) + col_expr_percent[is.na(col_expr_percent)] = 0 # If no maximum counts set by user, then make it the max counts of each spot/cell if(is.null(spot_maxreads)){