diff --git a/DESCRIPTION b/DESCRIPTION index 734ac67..038db12 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: scSpatialSIM -Title: A point pattern simulator for spatial cellular data -Version: 0.1.3.1 +Title: A Point Pattern Simulator for Spatial Cellular Data +Version: 0.1.3.3 Authors@R: c( person(given = "Alex", @@ -31,14 +31,14 @@ Authors@R: person(given = "Brooke", family = "Fridley", email = "brooke.fridley.moffitt.org", - role = c("aut"), + role = c("cph"), comment = c(ORCID = "")), person(given = "Fridley", family = "Lab", email = "fridley.lab@moffitt.org", role = c("cre")) ) -Description: scSpatialSIM is an easy-to-use wrapper package for spatstat that allows users to simulate cellular point pattern data. The simulation of this data comes in a couple different functions whether since cell clustering (univariate) or multiple cell clustering/colocalization (bivariate) is wanting to be explored. The main audience for this package is those wanting to validate or test hypotheses or methods on controllable abundance/clustering of cells within different spatial single-cell resolution data. +Description: Single cell resolution data has been valuable in learning about tissue microenvironments and interactions between cells or spots. This package allows for the simulation of this level of data, be it single cell or ‘spots’, in both a univariate (single metric or cell type) and bivariate (2 or more metrics or cell types) ways. As more technologies come to marker, more methods will be developed to derive spatial metrics from the data which will require a way to benchmark methods against each other. Additionally, as the field currently stands, there is not a gold standard method to be compared against. We set out to develop an R package that will allow users to simulate point patterns that can be biologically informed from different tissue domains, holes, and varying degrees of clustering/colocalization. The data can be exported as spatial files and a summary file (like 'HALO'). . Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 @@ -66,4 +66,4 @@ Suggests: VignetteBuilder: knitr License: MIT + file LICENSE Config/testthat/edition: 3 -URL: https://github.com/FridleyLab/mIFsim +URL: https://github.com/FridleyLab/scSpatialSIM diff --git a/R/calculate_density.R b/R/calculate_density.R index 0326306..930329a 100644 --- a/R/calculate_density.R +++ b/R/calculate_density.R @@ -42,7 +42,7 @@ CalculateDensity = function(sim_object, steps = NULL, which = "all", step_size = names(step_adj) = step_adj #run grids - out = parallel::mclapply(step_adj, function(step){ + for(step in step_adj){ #if not working with the cell kernels if(step != "Cells"){ message(step) @@ -57,22 +57,21 @@ CalculateDensity = function(sim_object, steps = NULL, which = "all", step_size = if(length(s@`Simulated Kernels`) == 0 ){ #let user know that these haven't been used for any simulation step yet message(paste0("\t", step, " has not yet been simulated")) - NA + next } #make new cl@Parameters = s@Parameters cl@`Simulated Kernels` = s@`Simulated Kernels` cl@`Density Grids` = lapply(which, function(w){ - cat(w, "\n") cbind(grid, prob = CalculateGrid(grid, s@`Simulated Kernels`[[w]], cores = cores)) }) #assign slot back to bid data if(step == "Tissue"){ - sim_object@Tissue <<- cl + sim_object@Tissue = cl } else { - sim_object@Holes <<- cl + sim_object@Holes = cl } } @@ -82,19 +81,25 @@ CalculateDensity = function(sim_object, steps = NULL, which = "all", step_size = #make new cell class object cl = methods::new("Cell") s = methods::slot(sim_object, step)[[cell]] + + if(length(s@`Simulated Kernels`) == 0 ){ + #let user know that these haven't been used for any simulation step yet + message(paste0("\t", step, " has not yet been simulated")) + next + } #fill in with existing parameters cl@Parameters = s@Parameters cl@`Simulated Kernels` = s@`Simulated Kernels` cl@`Density Grids` = lapply(which, function(w){ - cat(w, "\n") cbind(grid, prob = CalculateGrid(grid, s@`Simulated Kernels`[[w]], cores = cores)) }) - sim_object@Cells[[cell]] <<- cl + sim_object@Cells[[cell]] = cl } } - }) + } + return(sim_object) } diff --git a/R/cell_distributions.R b/R/cell_distributions.R index 6d479ed..9aba81c 100644 --- a/R/cell_distributions.R +++ b/R/cell_distributions.R @@ -47,7 +47,7 @@ GenerateDistributions = function(spatial_data, if(dat_class == "list"){ spat_list = pbmcapply::pbmclapply(spatial_data, function(spat){ make_dis(spat, positive_mean, negative_mean, positive_sd, negative_sd) - }) + }, mc.cores = 1) return(spat_list) } else { #if data frame, perform once diff --git a/R/create_cells.R b/R/create_cells.R index 070a2ed..38b5373 100644 --- a/R/create_cells.R +++ b/R/create_cells.R @@ -20,6 +20,7 @@ #' @param random whether or not to randomly generate kernels for cells 2 or more, uf TRUE, shift is not used #' @param overwrite boolean whether to overwrite existing cell kernels and assignments if present #' @param use_window boolean whether to use the simulation window to set x and y limits +#' @param no_kernel boolean whether to create kernels or to randomly assign points positive based on `probs` #' #' @return Returns the original \code{scSpatialSIM} object with additional generated data added to each cell object. #' @@ -36,9 +37,13 @@ GenerateCellPositivity = function(sim_object, k = NA, Force = FALSE, density_heatmap = FALSE, step_size = 1, cores = 1, shift = 0, random = FALSE, overwrite = FALSE, - use_window = FALSE){ + use_window = FALSE, no_kernel = FALSE){ if(!methods::is(sim_object, "SpatSimObj")) stop("`sim_object` must be of class 'SpatSimObj'") if(any(is.null(c(k, xmin, xmax, ymin, ymax, sdmin, sdmax)))) stop("Cannot have `NULL` parameters") + if(no_kernel){ + message("random cell assignments without kernels") + density_heatmap = FALSE + } if(!is.empty(sim_object@Cells[[1]], "Simulated Kernels") & overwrite == FALSE) stop("Already have cell kernels and `overwrite == FALSE`") @@ -92,14 +97,16 @@ GenerateCellPositivity = function(sim_object, k = NA, #make sure that the shift is in bounds if((shift < 0 | shift > 1) & ncells > 1) stop("supply an appropriate shift") #dummy variable to prevent console printing - dmb = lapply(seq(ncells), function(cell){ + #need to convert to for loop + for (cell in seq(ncells)){ + cl = methods::new("Cell") #subset the params to the specific cells parameters params = params_overall params$probs = params$probs[cell,] %>% as.numeric() #if no parameters are input then use the initialized params = mapply(replace_na, sim_object@Cells[[cell]]@Parameters, params, SIMPLIFY = FALSE) #add updated parameters to the object cell - sim_object@Cells[[cell]]@Parameters <<- params + cl@Parameters = params #get the window size win_limits = c(sim_object@Window$xrange, sim_object@Window$yrange) #check whether the parameters would simulate outside window @@ -113,16 +120,16 @@ GenerateCellPositivity = function(sim_object, k = NA, message("x and y range inside window boundary") } #produce kernel parameter list for k clusters in each simulated pattern - if(cell == 1 | random == TRUE){ - sim_object@Cells[[cell]]@`Simulated Kernels` <<- lapply(seq(sim_object@Sims), function(hld){ + if((cell == 1 | random) & !no_kernel){ + cl@`Simulated Kernels` = lapply(seq(sim_object@Sims), function(hld){ do.call(gaussian_kernel, utils::head(params, -1)) }) - } else { + } else if(!no_kernel){ #shift kernel from initial if wanted otherwise random make new ones? if(shift == 0){ - sim_object@Cells[[cell]]@`Simulated Kernels` <<- sim_object@Cells[[1]]@`Simulated Kernels` + cl@`Simulated Kernels` = sim_object@Cells[[1]]@`Simulated Kernels` } else { - sim_object@Cells[[cell]]@`Simulated Kernels` <<- lapply(seq(sim_object@Sims), function(hld){ + cl@`Simulated Kernels` = lapply(seq(sim_object@Sims), function(hld){ kern = sim_object@Cells[[1]]@`Simulated Kernels`[[hld]] #if there are less than 3 centers, just return the kernel #will invert below @@ -142,48 +149,56 @@ GenerateCellPositivity = function(sim_object, k = NA, if(density_heatmap){ if(cell == 1 | shift != 0 | random == TRUE){ message(paste0("Computing density heatmap for Cell ", cell)) - sim_object@Cells[[cell]]@`Density Grids` <<- pbmcapply::pbmclapply(sim_object@Cells[[cell]]@`Simulated Kernels`, function(gauss_tab){ + cl@`Density Grids` = pbmcapply::pbmclapply(cl@`Simulated Kernels`, function(gauss_tab){ cbind(grid, prob = CalculateGrid(grid, gauss_tab, cores = cores)) - }) + }, mc.cores = 1) } else { message(paste0("Copying density heatmap for Cell ", cell)) - sim_object@Cells[[cell]]@`Density Grids` <<- pbmcapply::pbmclapply(sim_object@Cells[[1]]@`Density Grids`, function(grid_tab){ + cl@`Density Grids` = pbmcapply::pbmclapply(sim_object@Cells[[1]]@`Density Grids`, function(grid_tab){ return(grid_tab) - }) + }, mc.cores = 1) } } if(is.empty(sim_object, "Spatial Files")){ - sim_object@`Spatial Files` <<- lapply(sim_object@Patterns, data.frame) + sim_object@`Spatial Files` = lapply(sim_object@Patterns, data.frame) } message(paste0("Computing probability for Cell ", cell)) - sim_object@`Spatial Files` <<- pbmcapply::pbmclapply(seq(sim_object@`Spatial Files`), function(spat_num){ - vec = CalculateGrid(sim_object@`Spatial Files`[[spat_num]], - sim_object@Cells[[cell]]@`Simulated Kernels`[[spat_num]], cores = cores) - #if the cell is other than the first, adjust it based on first cell and correlation - # if(cell != 1){ - # if(correlation == 0){ - # vec = stats::runif(length(vec), min = 0, max = 1) - # } else if(correlation < 0){ - # vec = vec * correlation + 1 - # } else { - # vec = vec * correlation - # } - # } - #make table with probabilities and positive/negative - df = data.frame(col1 = scale_probs(vec * 0.9, params$probs)) - df$col2 = ifelse(stats::rbinom(nrow(df), size = 1, prob = df$col1) == 1, "Positive", "Negative") + sim_object@`Spatial Files` = pbmcapply::pbmclapply(seq(sim_object@`Spatial Files`), function(spat_num){ + #if no_kernel if FALSE, use kernel + if(!no_kernel){ + vec = CalculateGrid(sim_object@`Spatial Files`[[spat_num]], + cl@`Simulated Kernels`[[spat_num]], cores = cores) + #if the cell is other than the first, adjust it based on first cell and correlation + # if(cell != 1){ + # if(correlation == 0){ + # vec = stats::runif(length(vec), min = 0, max = 1) + # } else if(correlation < 0){ + # vec = vec * correlation + 1 + # } else { + # vec = vec * correlation + # } + # } + #make table with probabilities and positive/negative + df = data.frame(col1 = scale_probs(vec * 0.9, params$probs)) + df$col2 = ifelse(stats::rbinom(nrow(df), size = 1, prob = df$col1) == 1, "Positive", "Negative") + } else { + df = data.frame(col1 = rep(params$probs[2], nrow(sim_object@`Spatial Files`[[spat_num]]))) + df$col2 = ifelse(stats::rbinom(nrow(df), size = 1, prob = df$col1) == 1, "Positive", "Negative") + } names(df) = c(paste("Cell", cell, "Probability"), paste("Cell", cell, "Assignment")) return(cbind(sim_object@`Spatial Files`[[spat_num]], df)) - }) - }) + }, mc.cores = 1) + + sim_object@Cells[[cell]] = cl + } return(sim_object) } diff --git a/R/create_holes.R b/R/create_holes.R index 0fd039e..ef9f309 100644 --- a/R/create_holes.R +++ b/R/create_holes.R @@ -125,7 +125,7 @@ GenerateHoles = function(sim_object, xmin = NA, xmax = NA, ymin = NA, ymax = NA, sim_object@Holes@`Density Grids` = pbmcapply::pbmclapply(sim_object@Holes@`Simulated Kernels`, function(gauss_tab){ cbind(grid, prob = CalculateGrid(grid, gauss_tab, cores = cores)) - }) + }, mc.cores = 1) } if(is.empty(sim_object, "Spatial Files")){ @@ -147,7 +147,7 @@ GenerateHoles = function(sim_object, xmin = NA, xmax = NA, ymin = NA, ymax = NA, # "Drop", "Keep") df$`Hole Assignment` = ifelse(stats::rbinom(nrow(df), size = 1, prob = df$`Hole Probability Scaled`) == 1, "Drop", "Keep") return(df) - }) + }, mc.cores = 1) return(sim_object) } diff --git a/R/create_tissue.R b/R/create_tissue.R index 00f968b..5d609ab 100644 --- a/R/create_tissue.R +++ b/R/create_tissue.R @@ -134,7 +134,7 @@ GenerateTissue = function(sim_object, k = NA, sim_object@Tissue@`Density Grids` = pbmcapply::pbmclapply(sim_object@Tissue@`Simulated Kernels`, function(gauss_tab){ cbind(grid, prob = CalculateGrid(grid, gauss_tab, cores = cores)) - }) + }, mc.cores = 1) } if(is.empty(sim_object, "Spatial Files")){ @@ -148,7 +148,7 @@ GenerateTissue = function(sim_object, k = NA, sim_object@Tissue@`Simulated Kernels`[[spat_num]], cores = cores) * 0.9) df$`Tissue Assignment` = ifelse(stats::rbinom(nrow(df), size = 1, prob = df$`Tissue Probability`) == 1, "Tissue 1", "Tissue 2") return(df) - }) + }, mc.cores = 1) return(sim_object) } diff --git a/R/get_spatial_list.R b/R/get_spatial_list.R index 8223406..22642b4 100644 --- a/R/get_spatial_list.R +++ b/R/get_spatial_list.R @@ -41,7 +41,7 @@ CreateSpatialList = function(sim_object, single_df = FALSE, multihit_action = " unlist() #return data return(tmp_dat) - }) + }, mc.cores = 1) #give each spatial data frame a psuedo sample name names(spatial_files) = paste("Spatial Data", seq_along(spatial_files)) diff --git a/R/plotting.R b/R/plotting.R index f158aba..85ca846 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -107,7 +107,7 @@ PlotSimulation = function(sim_object, nrow = 1, ncol = 1, which = 1, what = "tis } } else if(what == "whole core"){ if(length(which) == 1){ - if(length(sim_object@Cells[[1]]@`Simulated Kernels`) == 0){ + if(length(sim_object@`Spatial Files`[[1]]) == 0){ stop("Need to simulate cells") } df = sim_object@`Spatial Files`[[which]] @@ -146,7 +146,7 @@ PlotSimulation = function(sim_object, nrow = 1, ncol = 1, which = 1, what = "tis stop("For whole core, only a single spatial file can be plotted at once") } } else { - cat("please be patient while method is being updated") + stop("Please be patient while method is being updated") } } diff --git a/R/summary.R b/R/summary.R index f2ab5ef..be0c2e4 100644 --- a/R/summary.R +++ b/R/summary.R @@ -4,6 +4,7 @@ #' @param ... nothing else to pass to summary if object is a SpatSimObj #' #' @method summary SpatSimObj +#' @returns summary of the SpatSimObj to the terminal #' #' @export summary.SpatSimObj <- function(object, ...){ @@ -30,7 +31,7 @@ summary.SpatSimObj <- function(object, ...){ #' `which` pattern to plot, and `what` which currently only works with "Processes" but may be updated in the future #' #' @method plot SpatSimObj -#' +#' @returns basic x-y ggplot object #' #' @export plot.SpatSimObj <- function(x, ...){ # diff --git a/R/utils-helpers.R b/R/utils-helpers.R index 4b03655..154d397 100644 --- a/R/utils-helpers.R +++ b/R/utils-helpers.R @@ -87,7 +87,6 @@ replace_na <- function(x, y) { generate_sum_vector <- function(num_vals, min_val, max_val, sum_val) { vals <- numeric(num_vals) - #cat("running\n") cn = 1 while (TRUE) { vals <- stats::runif(num_vals, min_val, max_val) @@ -98,7 +97,6 @@ generate_sum_vector <- function(num_vals, min_val, max_val, sum_val) { #for example, trying to randomly generate 3 numbers between 0.1 and 0.35, that add up to 0.3 #and unles the numbers are identical it'll never achieve it if(cn == 10000){ - #cat("tick\n") min_val = min_val - (min_val * 0.1) cn = 1 } @@ -112,7 +110,6 @@ generate_sum_vector <- function(num_vals, min_val, max_val, sum_val) { make_dis = function(spat, positive_mean, negative_mean, positive_sd, negative_sd){ cells = grep("Cell", colnames(spat), value = TRUE) dat = spat %>% dplyr::arrange(get(cells)) - #ugh for loop for(cell_n in seq(cells)){ cell = cells[cell_n] counts = data.frame(table(spat[[cell]])) @@ -160,7 +157,6 @@ gaussian_kernel_shift = function(kern, shift, win_limits){ repl = ifelse(nrow(dirichlet_intersect) < nrow(kern), TRUE, FALSE) if(repl){ warning("kernel has more rows than the Dirishlet Intersections") - print(kern) } dirichlet_intersect = dirichlet_intersect[sample(seq(nrow(dirichlet_intersect)), nrow(kern), replace = ifelse(nrow(dirichlet_intersect) < nrow(kern), TRUE, FALSE)),] diff --git a/docs/articles/a01_Introduction.html b/docs/articles/a01_Introduction.html index 632956b..27c081e 100644 --- a/docs/articles/a01_Introduction.html +++ b/docs/articles/a01_Introduction.html @@ -26,7 +26,7 @@ scSpatialSIM - 0.1.2.0 + 0.1.3.2