Merge pull request #162 from jleechung/feat-aft

Updates to RunBanksy
satijalab · Apr 5, 2024 · 8d46d6c · 8d46d6c
2 parents d9594f6 + 278b572
commit 8d46d6c
Show file tree

Hide file tree

Showing 14 changed files with 857 additions and 828 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SeuratWrappers
 Title: Community-Provided Methods and Extensions for the Seurat Object
-Version: 0.3.4
-Date: 2024-01-23
+Version: 0.3.5
+Date: 2024-04-04
 Authors@R: c(
   person(given = 'Andrew', family = 'Butler', email = '[email protected]', role = 'aut', comment = c(ORCID = '0000-0003-3608-0463')),
   person(given = "Saket", family = "Choudhary", email = "[email protected]", role = "ctb", comment = c(ORCID = "0000-0001-5202-7633")),

diff --git a/R/banksy.R b/R/banksy.R
@@ -9,18 +9,31 @@ NULL
 #' @param lambda (numeric) Spatial weight parameter
 #' @param assay (character) Assay in Seurat object to use
 #' @param slot (character) Slot in Seurat assay to use
+#' @param use_agf (boolean) Whether to use the AGF
 #' @param dimx (character) Column name of spatial x dimension (must be in metadata)
 #' @param dimy (character) Column name of spatial y dimension (must be in metadata)
+#' @param dimz (character) Column name of spatial z dimension (must be in metadata)
+#' @param ndim (integer) Number of spatial dimensions to extract
 #' @param features (character) Features to compute. Can be 'all', 'variable' or
 #'   a vector of feature names
+#' @param group (character) Column name of a grouping variable (must be in metadata)
+#' @param split.scale (boolean) Whether to separate scaling by group
 #' @param k_geom (numeric) kNN parameter - number of neighbors to use
 #' @param n (numeric) kNN_rn parameter - exponent of radius
 #' @param sigma (numeric) rNN parameter - standard deviation of Gaussian kernel
 #' @param alpha (numeric) rNN parameter - determines radius used
 #' @param k_spatial (numeric) rNN parameter - number of neighbors to use
-#' @param spatial_mode (character) spatial mode to use (kNN_r, kNN_rn, kNN_rank,
-#'   kNN_unif, rNN_gauss)
+#' @param spatial_mode (character) Kernel for neighborhood computation
+#' \itemize{
+#'  \item{kNN_median: k-nearest neighbors with median-scaled Gaussian kernel}
+#'  \item{kNN_r: k-nearest neighbors with $1/r$ kernel}
+#'  \item{kNN_rn: k-nearest neighbors with $1/r^n$ kernel}
+#'  \item{kNN_rank: k-nearest neighbors with rank Gaussian kernel}
+#'  \item{kNN_unif: k-nearest neighbors wth uniform kernel}
+#'  \item{rNN_gauss: radial nearest neighbors with Gaussian kernel}
+#' }
 #' @param assay_name (character) Name for Banksy assay in Seurat object
+#' @param M (numeric) Advanced usage. Highest azimuthal harmonic
 #' @param verbose (boolean) Print messages
 #'
 #' @return A Seurat object with new assay holding a Banksy matrix
@@ -33,11 +46,13 @@ NULL
 #' Algorithm that Unifies Cell Type Clustering and Tissue Domain Segmentation
 #'
 #' @export
-RunBanksy <- function(object, lambda, assay='RNA', slot='data',
-                      dimx=NULL, dimy=NULL, features='variable',
-                      k_geom=10, n=2, sigma=1.5,
-                      alpha=0.05, k_spatial=10, spatial_mode='kNN_r',
-                      assay_name='BANKSY', verbose=TRUE) {
+RunBanksy <- function(object, lambda, assay='RNA', slot='data', use_agf=FALSE,
+                      dimx=NULL, dimy=NULL, dimz=NULL, ndim=2,
+                      features='variable',
+                      group=NULL, split.scale=TRUE,
+                      k_geom=15, n=2, sigma=1.5,
+                      alpha=0.05, k_spatial=10, spatial_mode='kNN_median',
+                      assay_name='BANKSY', M=NULL, verbose=TRUE) {
     # Check packages
     SeuratWrappers:::CheckPackage(package = 'data.table', repository = 'CRAN')
     SeuratWrappers:::CheckPackage(package = 'Matrix', repository = 'CRAN')
@@ -50,20 +65,50 @@ RunBanksy <- function(object, lambda, assay='RNA', slot='data',
     data_own <- get_data(object, assay, slot, features, verbose)
 
     # Get locs
-    locs <- get_locs(object, dimx, dimy, data_own, verbose)
+    locs <- get_locs(object, dimx, dimy, dimz, ndim, data_own, group, verbose)
+    if (!is.null(group)) {
+        object <- AddMetaData(
+            object, metadata = locs,
+            col.name = paste0('staggered_', colnames(locs)))
+    }
 
     # Compute neighbor matrix
-    if (verbose) message('Computing neighbor matrix')
-    data_nbr <- Banksy:::compute.banksyMatrices(
-        gcm = data_own, locs = locs, sigma = sigma, alpha = alpha,
-        kspatial = k_spatial, k_geom = k_geom, n = n, spatialMode = spatial_mode,
-        verbose = verbose)
+    knn_list <- lapply(k_geom, function(kg) {
+      Banksy:::computeNeighbors(locs,
+                                spatial_mode = spatial_mode, k_geom = kg, n = n,
+                                sigma=sigma, alpha=alpha, k_spatial=k_spatial,
+                                verbose=verbose)
+    })
 
     # Create Banksy matrix
+    M <- seq(0, max(Banksy:::getM(use_agf, M)))
+    # Compute harmonics
+    center <- rep(TRUE, length(M))
+    # Only center higher harmonics
+    center[1] <- FALSE
+    har <- Map(function(knn_df, M, center) {
+      x <- Banksy:::computeHarmonics(data_own, knn_df, M, center, verbose)
+      rownames(x) <- paste0(rownames(x), '.m', M)
+      x
+    }, knn_list, M, center)
+
+    # Scale by lambdas
+    lambdas <- Banksy:::getLambdas(lambda, n_harmonics = length(har))
+
+    # Merge with own expression
     if (verbose) message('Creating Banksy matrix')
-    data_banksy <- Matrix::Matrix(
-        rbind(sqrt(1 - lambda) * data_own, sqrt(lambda) * data_nbr),
-        sparse = TRUE)
+    data_banksy <- c(list(data_own), har)
+    if (verbose) message('Scaling BANKSY matrix. Do not call ScaleData on assay ', assay_name)
+    data_scaled <- lapply(data_banksy, fast_scaler,
+                          object, group, split.scale, verbose)
+
+    # Multiple by lambdas
+    data_banksy <- Map(function(lam, mat) lam * mat, lambdas, data_banksy)
+    data_scaled <- Map(function(lam, mat) lam * mat, lambdas, data_scaled)
+
+    # Rbind
+    data_banksy <- do.call(rbind, data_banksy)
+    data_scaled <- do.call(rbind, data_scaled)
 
     # Create an assay object
     if (grepl(pattern = 'counts', x = slot)) {
@@ -76,6 +121,8 @@ RunBanksy <- function(object, lambda, assay='RNA', slot='data',
     if (verbose) message('Setting default assay to ', assay_name)
     object[[assay_name]] <- banksy_assay
     DefaultAssay(object) <- assay_name
+    object <- SetAssayData(object, slot = 'scale.data', new.data = data_scaled,
+                           assay = assay_name)
 
     # Log commands
     object <- Seurat::LogSeuratCommand(object = object)
@@ -89,9 +136,9 @@ get_data <- function(object, assay, slot, features, verbose) {
     if (verbose) message('Fetching data from slot ', slot,' from assay ', assay)
     data_own <- Seurat::GetAssayData(object = object, assay = assay, slot = slot)
     # Feature subset
-    if (features != 'all') {
+    if (features[1] != 'all') {
         if (verbose) message('Subsetting by features')
-        if (features == 'variable') {
+        if (features[1] == 'variable') {
             feat <- Seurat::VariableFeatures(object)
             if (length(feat) == 0) {
                 warning('No variable features found. Running Seurat::FindVariableFeatures')
@@ -109,33 +156,65 @@ get_data <- function(object, assay, slot, features, verbose) {
 }
 
 # Get locations from Seurat object
-get_locs <- function(object, dimx, dimy, data_own, verbose) {
+get_locs <- function(object, dimx, dimy, dimz, ndim, data_own, group, verbose) {
+
     if (!is.null(dimx) & !is.null(dimy)) {
-        # Convert locations data to data table
-        locations <- data.frame(
+        # Extract locations from metadata
+        locs <- data.frame(
             sdimx = unlist(object[[dimx]]),
             sdimy = unlist(object[[dimy]])
         )
-        rownames(locations) <- colnames(object)
-        locs <- data.table::data.table(locations, keep.rownames = TRUE)
-        data.table::setnames(locs, 'rn', 'cell_ID')
+        rownames(locs) <- colnames(object)
+
+        # Add z-dim if present
+        if (!is.null(dimz)) locs$sdimz = object[[dimz]]
 
         # Check locations
         obj_samples <- colnames(data_own)
-        locs_samples <- locs[['cell_ID']]
+        locs_samples <- rownames(locs)
         if (any(is.na(match(obj_samples, locs_samples)))) {
             na_id <- which(is.na(match(obj_samples, locs_samples)))
-            warning('No centroids found for samples: ', paste(obj_samples[na_id], collapse = ', '), '. Dropping samples.')
+            warning('No centroids found for samples: ',
+                    paste(obj_samples[na_id], collapse = ', '), '. Dropping samples.')
             data_own <- data_own[, -na_id, drop = FALSE]
         }
         locs <- locs[match(obj_samples, locs_samples),,drop=FALSE]
+
     } else {
-        locations <- Seurat::GetTissueCoordinates(object)[,1:2]
-        locs <- data.table::data.table(locations, keep.rownames = TRUE)
+        # Extract locations with Seurat accessor
+        locs <- Seurat::GetTissueCoordinates(object)[,seq_len(ndim)]
     }
 
-    colnames(locs) <- c('cell_ID', 'sdimx', 'sdimy')
+    dim_names <- paste0('sdim', c('x','y','z'))
+    colnames(locs) <- dim_names[seq_len(ncol(locs))]
+
+    if (!is.null(group)) {
+        # Stagger locations by group
+        if (verbose) message('Staggering locations by ', group)
+        locs[,1] = locs[,1] + abs(min(locs[,1]))
+        max_x = max(locs[,1]) * 2
+        n_groups = length(unique(unlist(object[[group]])))
+        shift = seq(from = 0, length.out = n_groups, by = max_x)
+        locs[,1] = locs[,1] + rep(shift, table(object[[group]]))
+    }
 
     return(locs)
 }
 
+# Scaling
+fast_scaler = function(data, object, group, split.scale, verbose) {
+    # Split scaling by group
+    if (!is.null(group) & split.scale) {
+        groups = unlist(object[[group]])
+        ugroups = unique(groups)
+        for (curr_group in ugroups) {
+            if (verbose) message('Scaling group: ', curr_group)
+            curr_group_id <- which(curr_group == groups)
+            data[, curr_group_id] <- Seurat:::FastRowScale(
+              data[, curr_group_id])
+        }
+    } else {
+        data <- Seurat::FastRowScale(data)
+    }
+    data
+}