From dd0b0df7345c490d93035e9815d2f9ee59de65e6 Mon Sep 17 00:00:00 2001 From: dtm2451 Date: Fri, 8 Dec 2023 12:26:48 -0500 Subject: [PATCH 1/2] initialize 'calcNeighborMetadataDiversity()' and 'dittoNeighborDiversityPlot()' --- DESCRIPTION | 1 + NAMESPACE | 2 + R/dittoNeighborDiv.R | 219 ++++++++++++++++++++++ R/utils-defaulting.R | 14 ++ man/calcNeighborMetadataDiversity.Rd | 58 ++++++ man/dittoNeighborDiversityPlot.Rd | 265 +++++++++++++++++++++++++++ 6 files changed, 559 insertions(+) create mode 100644 R/dittoNeighborDiv.R create mode 100644 man/calcNeighborMetadataDiversity.Rd create mode 100644 man/dittoNeighborDiversityPlot.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 0a3c7d3..35d3169 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,6 +40,7 @@ Imports: methods, Suggests: plotly, testthat, Seurat (>= 2.2), + SeuratObject, DESeq2, edgeR, ggplot.multistats, diff --git a/NAMESPACE b/NAMESPACE index de1903d..f01e9ab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(Lighten) export(Simulate) export(addDimReduction) export(addPrcomp) +export(calcNeighborMetadataDiversity) export(demux.SNP.summary) export(demux.calls.summary) export(dittoBarPlot) @@ -15,6 +16,7 @@ export(dittoDimPlot) export(dittoDotPlot) export(dittoFreqPlot) export(dittoHeatmap) +export(dittoNeighborDiversityPlot) export(dittoPlot) export(dittoPlotVarsAcrossGroups) export(dittoRidgeJitter) diff --git a/R/dittoNeighborDiv.R b/R/dittoNeighborDiv.R new file mode 100644 index 0000000..7640bdd --- /dev/null +++ b/R/dittoNeighborDiv.R @@ -0,0 +1,219 @@ +#' Shows data overlayed on a tsne, pca, or similar type of plot +#' @import ggplot2 +#' +#' @param var String name of a "gene" or "metadata" (or "ident" for a Seurat \code{object}) to use for coloring the plots. +#' This is the data that will be displayed for each cell/sample. Discrete or continuous data both work. +#' +#' Alternatively, a string vector naming multiple genes or metadata, OR a vector of the same length as there are cells/samples in the \code{object} which provides per-cell data directly. +#' @param size Number which sets the size of data points. Default = 0.1 here to enable seeing more cells in dense regions. +#' @param opacity Number between 0 and 1, which defaults to 0.8 here, and can be increased or lowered to make cells less or more transparent, respectively. +#' @param min Number which sets the value associated with the minimum color. Defaults to 0 here. +#' @param data.out Logical. When set to \code{TRUE}, changes the output, from the plot alone, to a list containing +#' the calculated neighborhood diversity metadata ("diversity") either as vector or data.frame depending on how many metadata were given to \code{var}, +#' the plot ("p"), +#' a data.frame containing the underlying data for target cells ("Target_data"), +#' and a data.frame containing the underlying data for non-target cells ("Others_data"). +#' @inheritParams dittoDimPlot +#' +#' @return A ggplot or plotly object where neighborhood diversity of \code{var}-values among cells' 'nearby' nearest neighbors is overlayed, via color, onto a tSNE, PCA, UMAP, ..., plot of choice. +#' +#' Alternatively, if \code{data.out=TRUE}, a list containing four slots is output: +#' the calculated neighborhood diversity metadata ("diversity") either as vector or data.frame depending on how many metadata were given to \code{var}, +#' the plot (named 'p'), +#' a data.table containing the underlying data for target cells (named 'Target_data'), +#' and a data.table containing the underlying data for non-target cells (named 'Others_data'). +#' +#' Alternatively, if \code{do.hover} is set to \code{TRUE}, the plot is coverted from ggplot to plotly & +#' cell/sample information, determined by the \code{hover.data} input, is retrieved, added to the dataframe, and displayed upon hovering the cursor over the plot. +#' +#' @details +#' These plotters start by making use of \code{\link{calcNeighborMetadataDiversity}} +#' +#' +#' @seealso +#' \code{\link{calcNeighborMetadataDiversity}} for details on the neighborhood diversity calculation +#' \code{\link{dittoDimPlot}} and \code{\link{dittoDimHex}} for additional details about the other options as these are the plotters used after diversity calculations complete. +#' +#' @author Daniel Bunis +#' @export +#' @examples +#' example(importDittoBulk, echo = FALSE) +#' myRNA +#' +#' # Temporary Seurat code for calculating neighbors +#' dittoSeq:::.error_if_no_Seurat() +#' myRNA <- Seurat::as.Seurat(myRNA) +#' myRNA <- Seurat::FindNeighbors(myRNA, reduction = "pca", dims = 1:5, return.neighbor = TRUE) +#' +#' # (Using bigger size than the default for these examples because the example data has so few cells) +#' dittoNeighborDiversityPlot(myRNA, "groups", size = 1) +#' + +dittoNeighborDiversityPlot <- function( + object, + var, + neighbors = .default_neighbors(object), + distances, + quantile = 0.9, + reduction.use = .default_reduction(object), + size = 0.1, + opacity = 1, + dim.1 = 1, + dim.2 = 2, + cells.use = NULL, + shape.by = NULL, + split.by = NULL, + split.adjust = list(), + extra.vars = NULL, + multivar.split.dir = c("col", "row"), + show.others = TRUE, + split.show.all.others = TRUE, + split.nrow = NULL, + split.ncol = NULL, + color.panel = dittoColors(), + colors = seq_along(color.panel), + shape.panel = c(16,15,17,23,25,8), + min.color = "#F0E442", + max.color = "#0072B2", + min = 0, + max = NA, + order = c("unordered", "increasing", "decreasing", "randomize"), + main = "make", + sub = NULL, + xlab = "make", + ylab = "make", + rename.var.groups = NULL, + rename.shape.groups = NULL, + theme = theme_bw(), + show.axes.numbers = TRUE, + show.grid.lines = if (is.character(reduction.use)) { !grepl("umap|tsne", tolower(reduction.use)) } else {TRUE}, + do.hover = FALSE, + hover.data = var, + add.trajectory.lineages = NULL, + add.trajectory.curves = NULL, + trajectory.cluster.meta, + trajectory.arrow.size = 0.15, + do.contour = FALSE, + contour.color = "black", + contour.linetype = 1, + legend.show = TRUE, + legend.size = 5, + legend.title = "make", + legend.breaks = waiver(), + legend.breaks.labels = waiver(), + shape.legend.size = 5, + shape.legend.title = shape.by, + do.raster = FALSE, + raster.dpi = 300, + data.out = FALSE) { + + var_use <- c() + for (this_var in var) { + this_var_use <- paste0(var, "_diversity") + object[[this_var_use]] <- calcNeighborMetadataDiversity( + object, var, neighbors, distances, quantile + ) + var_use <- c(var_use, this_var_use) + } + div_out <- getMetas(object, names.only = FALSE)[,var_use] + + # Make dataframes and plot + p.df <- dittoDimPlot( + object, var_use, reduction.use, size, opacity, dim.1, dim.2, cells.use, + shape.by, split.by, split.adjust, extra.vars, multivar.split.dir, + show.others, split.show.all.others, split.nrow, split.ncol, + assay = NA, slot = NA, adjustment = NULL, swap.rownames = NULL, + color.panel, colors, shape.panel, min.color, max.color, min, max, order, + main, sub, xlab, ylab, rename.var.groups, rename.shape.groups, theme, + show.axes.numbers, show.grid.lines, + do.letter = FALSE, do.ellipse = FALSE, do.label = FALSE, + labels.size = 5, labels.highlight = TRUE, labels.repel = TRUE, + labels.split.by = split.by, labels.repel.adjust = list(), + do.hover, hover.data = c(var, paste0(var, "_diversity")), + hover.assay = NA, hover.slot = NA, hover.adjustment = NULL, + add.trajectory.lineages, add.trajectory.curves, trajectory.cluster.meta, + trajectory.arrow.size, do.contour, contour.color, contour.linetype, + legend.show, legend.size, legend.title, legend.breaks, + legend.breaks.labels, shape.legend.size, shape.legend.title, + do.raster, raster.dpi, data.out = TRUE) + p <- p.df$plot + Target_data <- p.df$Target_data + Others_data <- p.df$Others_data + + ### RETURN the PLOT ### + if (data.out) { + list( + diversity = div_out, + plot = p, + Target_data = Target_data, + Others_data = Others_data) + } else { + p + } +} + +#' Shows data overlayed on a tsne, pca, or similar type of plot +#' @import ggplot2 +#' +#' @param var String name of a "gene" or "metadata" (or "ident" for a Seurat \code{object}) to use for coloring the plots. +#' This is the data that will be displayed for each cell/sample. Discrete or continuous data both work. +#' +#' Alternatively, a string vector naming multiple genes or metadata, OR a vector of the same length as there are cells/samples in the \code{object} which provides per-cell data directly. +#' @param neighbors a single string giving either the name of a Neighbors slot of the (Seurat) \code{object}, +#' OR or matrix with cells in its rows and indexes of neighbors in its columns +#' @param distances not needed when \code{neighbors} is directed to a Neighbors slot of the (Seurat) \code{object}, +#' Otherwise, must be given a matrix with cells in its rows and distance measures to each neighbor in its columns +#' @return A named numeric vector of diversity counts, the same length as the number of cells in \code{object} which can be added to the object as cell metadata, and named by the cell names of the \code{object}. +#' +#' @details +#' If given a Seurat \code{object} and \code{neighbors} is given (default) a string value representing a Neighbors object slot name. +#' It then extracts the \code{neighbors}-matrix and \code{distances}-matrix from this object. +#' +#' Otherwise, it uses the \code{neighbors} and \code{distances} inputs for these purposes. +#' +#' To calculate neighbors' Diversity: +#' +#' First, the distance cutoff for neighbors deemed close-enough is determined based on the given \code{quantile} of \code{distance}-values. +#' +#' Then, function then extracts the given \code{var} metadata from the object. +#' +#' Finally, it loops through each cell (row) of the neighbors and distances matrices, +#' totaling the number of distinct var-values associated with the cell's neighbors that are within the threshold distance. +#' +#' Cell names are then added directly before the vector is output. +#' +#' @seealso +#' \code{\link{dittoDimPlot}} and \code{\link{dittoDimHex}} for additional details about the other options as these are the plotters used after diversity calculations complete. +#' +#' @author Daniel Bunis +#' @export +#' @examples +#' example(importDittoBulk, echo = FALSE) +calcNeighborMetadataDiversity <- function(object, var, neighbors = .default_neighbors(object), distances, quantile = 0.9) { + + if (is.character(neighbors)) { + .error_if_no_Seurat() + neighbor_object <- SeuratObject::Neighbors(object, neighbors) + neighbors <- neighbor_object@nn.idx + distances <- neighbor_object@nn.dist + } + if (!nrow(neighbors)==ncol(object)) { + stop("The number of cells in 'object' does not match the number of cells tracked in the given 'neighbors' data.") + } + if (!nrow(distances)==ncol(object)) { + stop("The number of cells in 'object' does not match the number of cells tracked in the given 'distances' data.") + + } + threshold <- quantile(distances, 0.9) + + compar <- meta(var, object) + + OUT <- vapply( + seq_len(ncol(object)), + function(i) { + length(unique(compar[neighbors[i,distances[i,]<=threshold]])) + }, + double(1)) + names(OUT) <- .all_cells(object) + OUT +} diff --git a/R/utils-defaulting.R b/R/utils-defaulting.R index 9717258..370d141 100644 --- a/R/utils-defaulting.R +++ b/R/utils-defaulting.R @@ -116,3 +116,17 @@ use <- .preferred_or_first(opts, c("umap","tsne","pca")) use } + +.default_neighbors <- function(object) { + # Sets the default for calcNeighborMetadataDiversity and related plotters' 'neighbors' input + if (is(object, "Seurat")) { + opts <- Seurat::Neighbors(object) + if (is.null(opts)) { + stop("No Neighbors slots in 'object'. Add one, or provide 'neighbors' and 'distances' inputs directly.") + } + use <- .preferred_or_first(opts, c("weighted.nn")) + return(use) + } else { + stop("Auto-extraction of neighbors data cannot be performed for this 'object'-type. Please fill in the 'neighbors' and 'distances' inputs directly.") + } +} \ No newline at end of file diff --git a/man/calcNeighborMetadataDiversity.Rd b/man/calcNeighborMetadataDiversity.Rd new file mode 100644 index 0000000..f3d303d --- /dev/null +++ b/man/calcNeighborMetadataDiversity.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dittoNeighborDiv.R +\name{calcNeighborMetadataDiversity} +\alias{calcNeighborMetadataDiversity} +\title{Shows data overlayed on a tsne, pca, or similar type of plot} +\usage{ +calcNeighborMetadataDiversity( + object, + var, + neighbors = .default_neighbors(object), + distances, + quantile = 0.9 +) +} +\arguments{ +\item{var}{String name of a "gene" or "metadata" (or "ident" for a Seurat \code{object}) to use for coloring the plots. +This is the data that will be displayed for each cell/sample. Discrete or continuous data both work. + +Alternatively, a string vector naming multiple genes or metadata, OR a vector of the same length as there are cells/samples in the \code{object} which provides per-cell data directly.} + +\item{neighbors}{a single string giving either the name of a Neighbors slot of the (Seurat) \code{object}, +OR or matrix with cells in its rows and indexes of neighbors in its columns} + +\item{distances}{not needed when \code{neighbors} is directed to a Neighbors slot of the (Seurat) \code{object}, +Otherwise, must be given a matrix with cells in its rows and distance measures to each neighbor in its columns} +} +\value{ +A named numeric vector of diversity counts, the same length as the number of cells in \code{object} which can be added to the object as cell metadata, and named by the cell names of the \code{object}. +} +\description{ +Shows data overlayed on a tsne, pca, or similar type of plot +} +\details{ +If given a Seurat \code{object} and \code{neighbors} is given (default) a string value representing a Neighbors object slot name. +It then extracts the \code{neighbors}-matrix and \code{distances}-matrix from this object. + +Otherwise, it uses the \code{neighbors} and \code{distances} inputs for these purposes. + +To calculate neighbors' Diversity: + +First, the distance cutoff for neighbors deemed close-enough is determined based on the given \code{quantile} of \code{distance}-values. + +Then, function then extracts the given \code{var} metadata from the object. + +Finally, it loops through each cell (row) of the neighbors and distances matrices, +totaling the number of distinct var-values associated with the cell's neighbors that are within the threshold distance. + +Cell names are then added directly before the vector is output. +} +\examples{ +example(importDittoBulk, echo = FALSE) +} +\seealso{ +\code{\link{dittoDimPlot}} and \code{\link{dittoDimHex}} for additional details about the other options as these are the plotters used after diversity calculations complete. +} +\author{ +Daniel Bunis +} diff --git a/man/dittoNeighborDiversityPlot.Rd b/man/dittoNeighborDiversityPlot.Rd new file mode 100644 index 0000000..af66f11 --- /dev/null +++ b/man/dittoNeighborDiversityPlot.Rd @@ -0,0 +1,265 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dittoNeighborDiv.R +\name{dittoNeighborDiversityPlot} +\alias{dittoNeighborDiversityPlot} +\title{Shows data overlayed on a tsne, pca, or similar type of plot} +\usage{ +dittoNeighborDiversityPlot( + object, + var, + neighbors = .default_neighbors(object), + distances, + quantile = 0.9, + reduction.use = .default_reduction(object), + size = 0.1, + opacity = 1, + dim.1 = 1, + dim.2 = 2, + cells.use = NULL, + shape.by = NULL, + split.by = NULL, + split.adjust = list(), + extra.vars = NULL, + multivar.split.dir = c("col", "row"), + show.others = TRUE, + split.show.all.others = TRUE, + split.nrow = NULL, + split.ncol = NULL, + color.panel = dittoColors(), + colors = seq_along(color.panel), + shape.panel = c(16, 15, 17, 23, 25, 8), + min.color = "#F0E442", + max.color = "#0072B2", + min = 0, + max = NA, + order = c("unordered", "increasing", "decreasing", "randomize"), + main = "make", + sub = NULL, + xlab = "make", + ylab = "make", + rename.var.groups = NULL, + rename.shape.groups = NULL, + theme = theme_bw(), + show.axes.numbers = TRUE, + show.grid.lines = if (is.character(reduction.use)) { + !grepl("umap|tsne", + tolower(reduction.use)) + } else { + TRUE + }, + do.hover = FALSE, + hover.data = var, + add.trajectory.lineages = NULL, + add.trajectory.curves = NULL, + trajectory.cluster.meta, + trajectory.arrow.size = 0.15, + do.contour = FALSE, + contour.color = "black", + contour.linetype = 1, + legend.show = TRUE, + legend.size = 5, + legend.title = "make", + legend.breaks = waiver(), + legend.breaks.labels = waiver(), + shape.legend.size = 5, + shape.legend.title = shape.by, + do.raster = FALSE, + raster.dpi = 300, + data.out = FALSE +) +} +\arguments{ +\item{object}{A Seurat, SingleCellExperiment, or SummarizedExperiment object.} + +\item{var}{String name of a "gene" or "metadata" (or "ident" for a Seurat \code{object}) to use for coloring the plots. +This is the data that will be displayed for each cell/sample. Discrete or continuous data both work. + +Alternatively, a string vector naming multiple genes or metadata, OR a vector of the same length as there are cells/samples in the \code{object} which provides per-cell data directly.} + +\item{reduction.use}{String, such as "pca", "tsne", "umap", or "PCA", etc, which is the name of a dimensionality reduction slot within the object, and which sets what dimensionality reduction space within the object to use. + +Default = the first dimensionality reduction slot inside the object with "umap", "tsne", or "pca" within its name, (priority: UMAP > t-SNE > PCA) or the first dimensionality reduction slot if none of those exist. + +Alternatively, a matrix (or data.frame) containing the dimensionality reduction embeddings themselves. +The matrix should have as many rows as there are cells/samples in the \code{object}. +Note that \code{dim.1} and \code{dim.2} will still be used to select which columns to pull from, and column names will serve as the default \code{xlab} & \code{ylab}.} + +\item{size}{Number which sets the size of data points. Default = 0.1 here to enable seeing more cells in dense regions.} + +\item{opacity}{Number between 0 and 1, which defaults to 0.8 here, and can be increased or lowered to make cells less or more transparent, respectively.} + +\item{dim.1}{The component number to use on the x-axis. Default = 1} + +\item{dim.2}{The component number to use on the y-axis. Default = 2} + +\item{cells.use}{String vector of cells'/samples' names OR an integer vector specifying the indices of cells/samples which should be included. + +Alternatively, a Logical vector, the same length as the number of cells in the object, which sets which cells to include.} + +\item{shape.by}{Variable for setting the shape of cells/samples in the plot. Note: must be discrete. Can be the name of a gene or meta-data. Alternatively, can be "ident" for clusters of a Seurat object. Alternatively, can be a numeric of length equal to the total number of cells/samples in object. + +Note: shapes can be harder to see, and to process mentally, than colors. +Even as a color blind person myself writing this code, I recommend use of colors for variables with many discrete values.} + +\item{split.by}{1 or 2 strings naming discrete metadata to use for splitting the cells/samples into multiple plots with ggplot faceting. + +When 2 metadatas are named, c(row,col), the first is used as rows and the second is used for columns of the resulting grid. + +When 1 metadata is named, shape control can be achieved with \code{split.nrow} and \code{split.ncol}} + +\item{split.adjust}{A named list which allows extra parameters to be pushed through to the faceting function call. +List elements should be valid inputs to the faceting functions, e.g. `list(scales = "free")`. + +For options, when giving 1 metadata to \code{split.by}, see \code{\link[ggplot2]{facet_wrap}}, +OR when giving 2 metadatas to \code{split.by}, see \code{\link[ggplot2]{facet_grid}}.} + +\item{extra.vars}{String vector providing names of any extra metadata to be stashed in the dataframe supplied to \code{ggplot(data)}. + +Useful for making custom splitting/faceting or other additional alterations \emph{after} dittoSeq plot generation.} + +\item{multivar.split.dir}{"row" or "col", sets the direction of faceting used for 'var' values when \code{var} is given multiple genes or metadata, and when \code{split.by} is used to provide additional data to facet by.} + +\item{show.others}{Logical. Whether other cells should be shown in the background in light gray. Default = TRUE.} + +\item{split.show.all.others}{Logical which sets whether gray "others" cells of facets should include all cells of other facets (\code{TRUE}) versus just cells left out by \code{cell.use} (\code{FALSE}).} + +\item{split.nrow, split.ncol}{Integers which set the dimensions of faceting/splitting when a single metadata is given to \code{split.by}.} + +\item{color.panel}{String vector which sets the colors to draw from. \code{dittoColors()} by default, see \code{\link{dittoColors}} for contents.} + +\item{colors}{Integer vector, the indexes / order, of colors from color.panel to actually use. + +Useful for quickly swapping the colors of nearby clusters.} + +\item{shape.panel}{Vector of integers corresponding to ggplot shapes which sets what shapes to use. +When discrete groupings are supplied by \code{shape.by}, this sets the panel of shapes. +When nothing is supplied to \code{shape.by}, only the first value is used. +Default is a set of 6, \code{c(16,15,17,23,25,8)}, the first being a simple, solid, circle. + +Note: Unfortunately, shapes can be hard to see when points are on top of each other & they are more slowly processed by the brain. +For these reasons, even as a color blind person myself writing this code, I recommend use of colors for variables with many discrete values.} + +\item{min.color}{color for lowest values of \code{var}/\code{min}. Default = yellow} + +\item{max.color}{color for highest values of \code{var}/\code{max}. Default = blue} + +\item{min}{Number which sets the value associated with the minimum color. Defaults to 0 here.} + +\item{max}{Number which sets the value associated with the maximum color.} + +\item{order}{String. If the data should be plotted based on the order of the color data, sets whether to plot (from back to front) in "increasing", "decreasing", "randomize" order. +If left as "unordered", plot order is simply based on the order of cells within the \code{object}.} + +\item{main}{String, sets the plot title. +Default title is automatically generated if not given a specific value. To remove, set to \code{NULL}.} + +\item{sub}{String, sets the plot subtitle} + +\item{xlab, ylab}{Strings which set the labels for the axes. +Default labels are generated if you do not give this a specific value. +To remove, set to \code{NULL}.} + +\item{rename.var.groups}{String vector which sets new names for the identities of \code{var} groups.} + +\item{rename.shape.groups}{String vector which sets new names for the identities of \code{shape.by} groups.} + +\item{theme}{A ggplot theme which will be applied before dittoSeq adjustments. +Default = \code{theme_bw()}. +See \url{https://ggplot2.tidyverse.org/reference/ggtheme.html} for other options and ideas.} + +\item{show.axes.numbers}{Logical which controls whether the axes values should be displayed.} + +\item{show.grid.lines}{Logical which sets whether gridlines of the plot should be shown. +They are removed when set to FALSE. +Default = FALSE for umap and tsne \code{reduction.use}, TRUE otherwise.} + +\item{do.hover}{Logical which controls whether the output will be converted to a plotly object so that data about individual points will be displayed when you hover your cursor over them. +\code{hover.data} argument is used to determine what data to use.} + +\item{hover.data}{String vector of gene and metadata names, example: \code{c("meta1","gene1","meta2")} which determines what data to show on hover when \code{do.hover} is set to \code{TRUE}.} + +\item{add.trajectory.lineages}{List of vectors representing trajectory paths, each from start-cluster to end-cluster, where vector contents are the names of clusters provided in the \code{trajectory.cluster.meta} input. + +If the \code{\link[slingshot]{slingshot}} package was used for trajectory analysis, +you can provide \code{add.trajectory.lineages = slingLineages('object')}.} + +\item{add.trajectory.curves}{List of matrices, each representing coordinates for a trajectory path, from start to end, where matrix columns represent x (\code{dim.1}) and y (\code{dim.2}) coordinates of the paths. + +Alternatively, a list of lists(/princurve objects) can be provided. +Thus, if the \code{\link[slingshot]{slingshot}} package was used for trajectory analysis, +you can provide \code{add.trajectory.curves = slingCurves('object')}} + +\item{trajectory.cluster.meta}{String name of metadata containing the clusters that were used for generating trajectories. Required when plotting trajectories using the \code{add.trajectory.lineages} method. Names of clusters inside the metadata should be the same as the contents of \code{add.trajectory.lineages} vectors.} + +\item{trajectory.arrow.size}{Number representing the size of trajectory arrows, in inches. Default = 0.15.} + +\item{do.contour}{Logical. Whether density-based contours should be displayed.} + +\item{contour.color}{String that sets the color(s) of the \code{do.contour} contours.} + +\item{contour.linetype}{String or numeric which sets the type of line used for \code{do.contour} contours. +Defaults to "solid", but see \code{\link[ggplot2]{linetype}} for other options.} + +\item{legend.show}{Logical. Whether the legend should be displayed. Default = \code{TRUE}.} + +\item{legend.size}{Number representing the size at which color legend shapes should be plotted (for discrete variable plotting) in the color legend. +Default = 5. *Enlarging the colors legend is incredibly helpful for making colors more distinguishable by color blind individuals.} + +\item{legend.title}{String which sets the title for the color legend. Default = \code{NULL} normally, but \code{var} when a shape legend will also be shown.} + +\item{legend.breaks}{Numeric vector which sets the discrete values to show in the color-scale legend for continuous data.} + +\item{legend.breaks.labels}{String vector, with same length as \code{legend.breaks}, which renames what's displayed next to the tick marks of the color-scale.} + +\item{shape.legend.size}{Number representing the size at which shapes should be plotted in the shape legend.} + +\item{shape.legend.title}{String which sets the title of the shapes legend. Default is \code{shape.by}} + +\item{do.raster}{Logical. When set to \code{TRUE}, rasterizes the internal plot area. Useful for editing in external programs (e.g. Illustrator).} + +\item{raster.dpi}{Number indicating dpi to use for rasterization. Default = 300.} + +\item{data.out}{Logical. When set to \code{TRUE}, changes the output, from the plot alone, to a list containing +the calculated neighborhood diversity metadata ("diversity") either as vector or data.frame depending on how many metadata were given to \code{var}, +the plot ("p"), +a data.frame containing the underlying data for target cells ("Target_data"), +and a data.frame containing the underlying data for non-target cells ("Others_data").} +} +\value{ +A ggplot or plotly object where neighborhood diversity of \code{var}-values among cells' 'nearby' nearest neighbors is overlayed, via color, onto a tSNE, PCA, UMAP, ..., plot of choice. + +Alternatively, if \code{data.out=TRUE}, a list containing four slots is output: +the calculated neighborhood diversity metadata ("diversity") either as vector or data.frame depending on how many metadata were given to \code{var}, +the plot (named 'p'), +a data.table containing the underlying data for target cells (named 'Target_data'), +and a data.table containing the underlying data for non-target cells (named 'Others_data'). + +Alternatively, if \code{do.hover} is set to \code{TRUE}, the plot is coverted from ggplot to plotly & +cell/sample information, determined by the \code{hover.data} input, is retrieved, added to the dataframe, and displayed upon hovering the cursor over the plot. +} +\description{ +Shows data overlayed on a tsne, pca, or similar type of plot +} +\details{ +These plotters start by making use of \code{\link{calcNeighborMetadataDiversity}} +} +\examples{ +example(importDittoBulk, echo = FALSE) +myRNA + +# Temporary Seurat code for calculating neighbors +dittoSeq:::.error_if_no_Seurat() +myRNA <- Seurat::as.Seurat(myRNA) +myRNA <- Seurat::FindNeighbors(myRNA, reduction = "pca", dims = 1:5, return.neighbor = TRUE) + +# (Using bigger size than the default for these examples because the example data has so few cells) +dittoNeighborDiversityPlot(myRNA, "groups", size = 1) + +} +\seealso{ +\code{\link{calcNeighborMetadataDiversity}} for details on the neighborhood diversity calculation +\code{\link{dittoDimPlot}} and \code{\link{dittoDimHex}} for additional details about the other options as these are the plotters used after diversity calculations complete. +} +\author{ +Daniel Bunis +} From 72fd8286255db4a86b738cfe29646b69b59b6af6 Mon Sep 17 00:00:00 2001 From: dtm2451 Date: Fri, 8 Dec 2023 16:51:28 -0500 Subject: [PATCH 2/2] initialize 'dittoNeighborDiversityHex()' --- NAMESPACE | 1 + R/dittoNeighborDiv.R | 129 ++++++++++++++++-- man/calcNeighborMetadataDiversity.Rd | 4 +- ...rsityPlot.Rd => dittoNeighborDiversity.Rd} | 105 +++++++++++++- 4 files changed, 217 insertions(+), 22 deletions(-) rename man/{dittoNeighborDiversityPlot.Rd => dittoNeighborDiversity.Rd} (77%) diff --git a/NAMESPACE b/NAMESPACE index f01e9ab..cd5a90c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ export(dittoDimPlot) export(dittoDotPlot) export(dittoFreqPlot) export(dittoHeatmap) +export(dittoNeighborDiversityHex) export(dittoNeighborDiversityPlot) export(dittoPlot) export(dittoPlotVarsAcrossGroups) diff --git a/R/dittoNeighborDiv.R b/R/dittoNeighborDiv.R index 7640bdd..f501dab 100644 --- a/R/dittoNeighborDiv.R +++ b/R/dittoNeighborDiv.R @@ -1,4 +1,5 @@ -#' Shows data overlayed on a tsne, pca, or similar type of plot +#' Shows Neighbor Diversity data, per a given metadata, overlaid on a umap, tsne, pca, or similar +#' @name dittoNeighborDiversity #' @import ggplot2 #' #' @param var String name of a "gene" or "metadata" (or "ident" for a Seurat \code{object}) to use for coloring the plots. @@ -14,8 +15,9 @@ #' a data.frame containing the underlying data for target cells ("Target_data"), #' and a data.frame containing the underlying data for non-target cells ("Others_data"). #' @inheritParams dittoDimPlot +#' @inheritParams dittoHex #' -#' @return A ggplot or plotly object where neighborhood diversity of \code{var}-values among cells' 'nearby' nearest neighbors is overlayed, via color, onto a tSNE, PCA, UMAP, ..., plot of choice. +#' @return A ggplot or plotly object where neighborhood diversity of \code{var}-values among cells' 'nearby' nearest neighbors is overlaid, via color, onto a tSNE, PCA, UMAP, ..., plot of choice. #' #' Alternatively, if \code{data.out=TRUE}, a list containing four slots is output: #' the calculated neighborhood diversity metadata ("diversity") either as vector or data.frame depending on how many metadata were given to \code{var}, @@ -27,15 +29,14 @@ #' cell/sample information, determined by the \code{hover.data} input, is retrieved, added to the dataframe, and displayed upon hovering the cursor over the plot. #' #' @details -#' These plotters start by making use of \code{\link{calcNeighborMetadataDiversity}} -#' +#' These plotters start by making use of \code{\link{calcNeighborMetadataDiversity}}, then +#' passes all inputs through to \code{\link{dittoDimPlot}} or \code{\link{dittoDimHex}} plotters #' #' @seealso #' \code{\link{calcNeighborMetadataDiversity}} for details on the neighborhood diversity calculation #' \code{\link{dittoDimPlot}} and \code{\link{dittoDimHex}} for additional details about the other options as these are the plotters used after diversity calculations complete. #' #' @author Daniel Bunis -#' @export #' @examples #' example(importDittoBulk, echo = FALSE) #' myRNA @@ -47,8 +48,12 @@ #' #' # (Using bigger size than the default for these examples because the example data has so few cells) #' dittoNeighborDiversityPlot(myRNA, "groups", size = 1) -#' +#' dittoNeighborDiversityHex(myRNA, "groups") +#' +NULL +#' @describeIn dittoNeighborDiversity Shows Neighbor Diversity data, per a given metadata, overlaid per cell on a umap, tsne, pca, or similar +#' @export dittoNeighborDiversityPlot <- function( object, var, @@ -75,7 +80,7 @@ dittoNeighborDiversityPlot <- function( shape.panel = c(16,15,17,23,25,8), min.color = "#F0E442", max.color = "#0072B2", - min = 0, + min = NA, max = NA, order = c("unordered", "increasing", "decreasing", "randomize"), main = "make", @@ -88,7 +93,7 @@ dittoNeighborDiversityPlot <- function( show.axes.numbers = TRUE, show.grid.lines = if (is.character(reduction.use)) { !grepl("umap|tsne", tolower(reduction.use)) } else {TRUE}, do.hover = FALSE, - hover.data = var, + hover.data = c(var, paste0(var, "_diversity")), add.trajectory.lineages = NULL, add.trajectory.curves = NULL, trajectory.cluster.meta, @@ -109,9 +114,9 @@ dittoNeighborDiversityPlot <- function( var_use <- c() for (this_var in var) { - this_var_use <- paste0(var, "_diversity") + this_var_use <- paste0(this_var, "_diversity") object[[this_var_use]] <- calcNeighborMetadataDiversity( - object, var, neighbors, distances, quantile + object, this_var, neighbors, distances, quantile ) var_use <- c(var_use, this_var_use) } @@ -129,7 +134,7 @@ dittoNeighborDiversityPlot <- function( do.letter = FALSE, do.ellipse = FALSE, do.label = FALSE, labels.size = 5, labels.highlight = TRUE, labels.repel = TRUE, labels.split.by = split.by, labels.repel.adjust = list(), - do.hover, hover.data = c(var, paste0(var, "_diversity")), + do.hover, hover.data, hover.assay = NA, hover.slot = NA, hover.adjustment = NULL, add.trajectory.lineages, add.trajectory.curves, trajectory.cluster.meta, trajectory.arrow.size, do.contour, contour.color, contour.linetype, @@ -152,8 +157,106 @@ dittoNeighborDiversityPlot <- function( } } -#' Shows data overlayed on a tsne, pca, or similar type of plot -#' @import ggplot2 +#' @describeIn dittoNeighborDiversity Shows Neighbor Diversity data, per a given metadata, summarized and overlaid per hexagonaly-shaped region on a umap, tsne, pca, or similar plot +#' @export +dittoNeighborDiversityHex <- function( + object, + var, + neighbors = .default_neighbors(object), + distances, + quantile = 0.9, + bins = 30, + color.method = NULL, + reduction.use = .default_reduction(object), + dim.1 = 1, + dim.2 = 2, + cells.use = NULL, + color.panel = dittoColors(), + colors = seq_along(color.panel), + split.by = NULL, + extra.vars = NULL, + multivar.split.dir = c("col", "row"), + split.nrow = NULL, + split.ncol = NULL, + split.adjust = list(), + assay.extra = assay, + slot.extra = slot, + adjustment.extra = adjustment, + show.axes.numbers = TRUE, + show.grid.lines = !grepl("umap|tsne", tolower(reduction.use)), + main = "make", + sub = NULL, + xlab = "make", + ylab = "make", + theme = theme_bw(), + do.contour = FALSE, + contour.color = "black", + contour.linetype = 1, + min.density = NA, + max.density = NA, + min.color = "#F0E442", + max.color = "#0072B2", + min.opacity = 0.2, + max.opacity = 1, + min = NA, + max = NA, + do.ellipse = FALSE, + add.trajectory.lineages = NULL, + add.trajectory.curves = NULL, + trajectory.cluster.meta, + trajectory.arrow.size = 0.15, + data.out = FALSE, + legend.show = TRUE, + legend.color.title = "make", + legend.color.breaks = waiver(), + legend.color.breaks.labels = waiver(), + legend.density.title = if (isBulk(object)) "Samples" else "Cells", + legend.density.breaks = waiver(), + legend.density.breaks.labels = waiver() +) { + var_use <- c() + for (this_var in var) { + this_var_use <- paste0(this_var, "_diversity") + object[[this_var_use]] <- calcNeighborMetadataDiversity( + object, this_var, neighbors, distances, quantile + ) + var_use <- c(var_use, this_var_use) + } + div_out <- getMetas(object, names.only = FALSE)[,var_use] + + # Make dataframes and plot + p.df <- dittoDimHex( + object, var_use, bins, color.method, reduction.use, dim.1, dim.2, + cells.use, color.panel, colors, split.by, extra.vars, + multivar.split.dir, split.nrow, split.ncol, split.adjust, + assay = NA, slot = NA, adjustment = NULL, swap.rownames = NULL, + assay.extra = assay, slot.extra = slot, adjustment.extra = adjustment, + show.axes.numbers, show.grid.lines, main, sub, xlab, ylab, theme, + do.contour, contour.color, contour.linetype, min.density, max.density, + min.color, max.color, min.opacity, max.opacity, min, max, + rename.color.groups = NULL, do.ellipse, do.label = FALSE, + labels.size = 5, labels.highlight = TRUE, labels.repel = TRUE, + labels.split.by = split.by, labels.repel.adjust = list(), + add.trajectory.lineages, add.trajectory.curves, trajectory.cluster.meta, + trajectory.arrow.size, data.out = TRUE, legend.show, legend.color.title, + legend.color.breaks, legend.color.breaks.labels, legend.density.title, + legend.density.breaks, legend.density.breaks.labels + ) + p <- p.df$plot + data <- p.df$data + + ### RETURN the PLOT ### + if (data.out) { + list( + diversity = div_out, + plot = p, + data = data) + } else { + p + } +} + +#' Calculates Neighbor Diversity of a given metadata per each cell #' #' @param var String name of a "gene" or "metadata" (or "ident" for a Seurat \code{object}) to use for coloring the plots. #' This is the data that will be displayed for each cell/sample. Discrete or continuous data both work. diff --git a/man/calcNeighborMetadataDiversity.Rd b/man/calcNeighborMetadataDiversity.Rd index f3d303d..1432acf 100644 --- a/man/calcNeighborMetadataDiversity.Rd +++ b/man/calcNeighborMetadataDiversity.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dittoNeighborDiv.R \name{calcNeighborMetadataDiversity} \alias{calcNeighborMetadataDiversity} -\title{Shows data overlayed on a tsne, pca, or similar type of plot} +\title{Calculates Neighbor Diversity of a given metadata per each cell} \usage{ calcNeighborMetadataDiversity( object, @@ -28,7 +28,7 @@ Otherwise, must be given a matrix with cells in its rows and distance measures t A named numeric vector of diversity counts, the same length as the number of cells in \code{object} which can be added to the object as cell metadata, and named by the cell names of the \code{object}. } \description{ -Shows data overlayed on a tsne, pca, or similar type of plot +Calculates Neighbor Diversity of a given metadata per each cell } \details{ If given a Seurat \code{object} and \code{neighbors} is given (default) a string value representing a Neighbors object slot name. diff --git a/man/dittoNeighborDiversityPlot.Rd b/man/dittoNeighborDiversity.Rd similarity index 77% rename from man/dittoNeighborDiversityPlot.Rd rename to man/dittoNeighborDiversity.Rd index af66f11..a17377d 100644 --- a/man/dittoNeighborDiversityPlot.Rd +++ b/man/dittoNeighborDiversity.Rd @@ -1,8 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dittoNeighborDiv.R -\name{dittoNeighborDiversityPlot} +\name{dittoNeighborDiversity} +\alias{dittoNeighborDiversity} \alias{dittoNeighborDiversityPlot} -\title{Shows data overlayed on a tsne, pca, or similar type of plot} +\alias{dittoNeighborDiversityHex} +\title{Shows Neighbor Diversity data, per a given metadata, overlaid on a umap, tsne, pca, or similar} \usage{ dittoNeighborDiversityPlot( object, @@ -30,7 +32,7 @@ dittoNeighborDiversityPlot( shape.panel = c(16, 15, 17, 23, 25, 8), min.color = "#F0E442", max.color = "#0072B2", - min = 0, + min = NA, max = NA, order = c("unordered", "increasing", "decreasing", "randomize"), main = "make", @@ -48,7 +50,7 @@ dittoNeighborDiversityPlot( TRUE }, do.hover = FALSE, - hover.data = var, + hover.data = c(var, paste0(var, "_diversity")), add.trajectory.lineages = NULL, add.trajectory.curves = NULL, trajectory.cluster.meta, @@ -67,6 +69,62 @@ dittoNeighborDiversityPlot( raster.dpi = 300, data.out = FALSE ) + +dittoNeighborDiversityHex( + object, + var, + neighbors = .default_neighbors(object), + distances, + quantile = 0.9, + bins = 30, + color.method = NULL, + reduction.use = .default_reduction(object), + dim.1 = 1, + dim.2 = 2, + cells.use = NULL, + color.panel = dittoColors(), + colors = seq_along(color.panel), + split.by = NULL, + extra.vars = NULL, + multivar.split.dir = c("col", "row"), + split.nrow = NULL, + split.ncol = NULL, + split.adjust = list(), + assay.extra = assay, + slot.extra = slot, + adjustment.extra = adjustment, + show.axes.numbers = TRUE, + show.grid.lines = !grepl("umap|tsne", tolower(reduction.use)), + main = "make", + sub = NULL, + xlab = "make", + ylab = "make", + theme = theme_bw(), + do.contour = FALSE, + contour.color = "black", + contour.linetype = 1, + min.density = NA, + max.density = NA, + min.color = "#F0E442", + max.color = "#0072B2", + min.opacity = 0.2, + max.opacity = 1, + min = NA, + max = NA, + do.ellipse = FALSE, + add.trajectory.lineages = NULL, + add.trajectory.curves = NULL, + trajectory.cluster.meta, + trajectory.arrow.size = 0.15, + data.out = FALSE, + legend.show = TRUE, + legend.color.title = "make", + legend.color.breaks = waiver(), + legend.color.breaks.labels = waiver(), + legend.density.title = if (isBulk(object)) "Samples" else "Cells", + legend.density.breaks = waiver(), + legend.density.breaks.labels = waiver() +) } \arguments{ \item{object}{A Seurat, SingleCellExperiment, or SummarizedExperiment object.} @@ -224,9 +282,33 @@ the calculated neighborhood diversity metadata ("diversity") either as vector or the plot ("p"), a data.frame containing the underlying data for target cells ("Target_data"), and a data.frame containing the underlying data for non-target cells ("Others_data").} + +\item{bins}{Numeric or numeric vector giving the number of haxagonal bins in the x and y directions. Set to 30 by default.} + +\item{color.method}{Works differently depending on whether the color.var is continous versus discrete: + +\strong{Continuous}: String signifying a function for how target data should be summarized for each bin. +Can be any function that summarizes a numeric vector input with a single numeric output value. +Default is \code{median}. Other useful options are \code{sum}, \code{mean}, \code{sd}, or \code{mad}. + +\strong{Discrete}: A string signifying whether the color should (default) be simply based on the "max" grouping of the bin, +or based on the "max.prop"ortion of cells/samples belonging to any grouping.} + +\item{min.density, max.density}{Number which sets the min/max values used for the density scale. +Used no matter whether density is represented through opacity or color.} + +\item{min.opacity, max.opacity}{Scalar between [0,1] which sets the minimum or maximum opacity used for the density legend (when color is used for \code{color.var} data and density is shown via opacity).} + +\item{do.ellipse}{Logical. Whether the groups should be surrounded by median-centered ellipses.} + +\item{legend.density.title, legend.color.title}{Strings which set the title for the legends.} + +\item{legend.density.breaks, legend.color.breaks}{Numeric vector which sets the discrete values to label in the density and color.var legends.} + +\item{legend.density.breaks.labels, legend.color.breaks.labels}{String vector, with same length as \code{legend.*.breaks}, which sets the labels for the tick marks or hex icons of the associated legend.} } \value{ -A ggplot or plotly object where neighborhood diversity of \code{var}-values among cells' 'nearby' nearest neighbors is overlayed, via color, onto a tSNE, PCA, UMAP, ..., plot of choice. +A ggplot or plotly object where neighborhood diversity of \code{var}-values among cells' 'nearby' nearest neighbors is overlaid, via color, onto a tSNE, PCA, UMAP, ..., plot of choice. Alternatively, if \code{data.out=TRUE}, a list containing four slots is output: the calculated neighborhood diversity metadata ("diversity") either as vector or data.frame depending on how many metadata were given to \code{var}, @@ -238,11 +320,19 @@ Alternatively, if \code{do.hover} is set to \code{TRUE}, the plot is coverted fr cell/sample information, determined by the \code{hover.data} input, is retrieved, added to the dataframe, and displayed upon hovering the cursor over the plot. } \description{ -Shows data overlayed on a tsne, pca, or similar type of plot +Shows Neighbor Diversity data, per a given metadata, overlaid on a umap, tsne, pca, or similar } \details{ -These plotters start by making use of \code{\link{calcNeighborMetadataDiversity}} +These plotters start by making use of \code{\link{calcNeighborMetadataDiversity}}, then +passes all inputs through to \code{\link{dittoDimPlot}} or \code{\link{dittoDimHex}} plotters } +\section{Functions}{ +\itemize{ +\item \code{dittoNeighborDiversityPlot()}: Shows Neighbor Diversity data, per a given metadata, overlaid per cell on a umap, tsne, pca, or similar + +\item \code{dittoNeighborDiversityHex()}: Shows Neighbor Diversity data, per a given metadata, summarized and overlaid per hexagonaly-shaped region on a umap, tsne, pca, or similar plot + +}} \examples{ example(importDittoBulk, echo = FALSE) myRNA @@ -254,6 +344,7 @@ myRNA <- Seurat::FindNeighbors(myRNA, reduction = "pca", dims = 1:5, return.neig # (Using bigger size than the default for these examples because the example data has so few cells) dittoNeighborDiversityPlot(myRNA, "groups", size = 1) +dittoNeighborDiversityHex(myRNA, "groups") } \seealso{