Skip to content

Commit

Permalink
Merge pull request #15 from julia-wrobel/alex-dev
Browse files Browse the repository at this point in the history
Update for Manuscript Reviewers
  • Loading branch information
ACSoupir authored Oct 4, 2024
2 parents 34a9416 + a34d9e8 commit 260764c
Show file tree
Hide file tree
Showing 367 changed files with 3,497 additions and 439 deletions.
8 changes: 8 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
^.*\.Rproj$
^\.Rproj\.user$
^docs$
^data-raw$
^pkgdown$
^LICENSE\.md$
^_pkgdown\.yml$
^\.github$
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
inst/doc
.Rbuildignore
*DS_Store
*.Rproj.user/
.Rproj.user/shared/notebooks/paths
Expand Down
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mxfda
Title: A Functional Data Analysis Package for Spatial Single Cell Data
Version: 0.2.1-1
Version: 0.2.2
Date: 2024-05-06
Authors@R:
c(
Expand All @@ -21,7 +21,7 @@ URL: https://github.com/julia-wrobel/mxfda/, http://juliawrobel.com/mxfda/
BugReports: https://github.com/julia-wrobel/mxfda/issues/
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
Suggests:
knitr,
rmarkdown,
Expand All @@ -32,7 +32,9 @@ Suggests:
spatialTIME,
tibble,
broom,
refund.shiny
refund.shiny,
Seurat,
SeuratObject
Imports:
magrittr,
rlang,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ export(Lcross)
export(Lest)
export(add_summary_function)
export(bivariate)
export(can_permute)
export(entropy)
export(extract_c)
export(extract_entropy)
export(extract_fpca_object)
export(extract_fpca_scores)
export(extract_model)
Expand Down
10 changes: 9 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# mxfda (development version)

# mxfda v0.2.2

- added vignette for working with spatial transcriptomic data as provided in a seurat object from a clear cell renal cell carcinoma CosMx SMI study. Data can be downloaded [here](https://doi.org/10.5281/zenodo.12730227).
- from original v0.1.0 submitted to [CRAN](https://cran.r-project.org/web/packages/mxfda/index.html).
- implemented Vu et al bivariate entropy measure.
- ther bug fixes. See [Commit History](https://github.com/julia-wrobel/mxfda/commits/main/) for complete list of commits associated with the main branch of mxFDA.
- added ability to calculate an empirical estimate for complete spatial randomness (CSR) when extracting summary functions

# mxfda v0.1.0

* starting package out
- starting package out
2 changes: 2 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#' \item{sample_key}{column in both `Metadata` and `Spatial` that is a 1:1 with the samples (unique per sample)}
#' \item{univariate_summaries}{univariate summary slot with nearest neighbor G calculared}
#' \item{bivariate_summaries}{empty slot available for bivariate summaries}
#' \item{multiivariate_summaries}{empty slot available for multivariate summaries}
#' \item{functional_pca}{empty slot for functional PCA data of summaries}
#' \item{functional_cox}{empty slot for functional models}
#' }
Expand Down Expand Up @@ -66,6 +67,7 @@
#' \item{sample_key}{column in both `Metadata` and `Spatial` that is a 1:1 with the samples (unique per sample)}
#' \item{univariate_summaries}{univariate summary slot with nearest neighbor G calculared}
#' \item{bivariate_summaries}{empty slot available for bivariate summaries}
#' \item{multiivariate_summaries}{empty slot available for multivariate summaries}
#' \item{functional_pca}{empty slot for functional PCA data of summaries}
#' \item{functional_cox}{empty slot for functional models}
#' }
Expand Down
77 changes: 52 additions & 25 deletions R/extract_bivariate.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
#' @param r_vec Numeric vector of radii over which to evaluate spatial summary functions. Must begin at 0.
#' @param func Spatial summary function to calculate. Options are c(Kcross, Lcross, Gcross) which denote Ripley's K, Besag's L, and nearest neighbor G function, respectively, or entropy from Vu et al, 2023.
#' @param edge_correction Character string that denotes the edge correction method for spatial summary function. For Kcross and Lcross choose one of c("border", "isotropic", "Ripley", "translate", "none"). For Gcross choose one of c("rs", "km", "han")
#' @param breaks an integer for the number of breaks used for entropy
#' @param empirical_CSR logical to indicate whether to use the permutations to identify the sample-specific complete spatial randomness (CSR) estimation.
#' @param permutations integer for the number of permtuations to use if empirical_CSR is `TRUE` and exact CSR not calculable
#'
#' @details `r lifecycle::badge('stable')`
#'
Expand All @@ -22,61 +23,87 @@
#' \item{fundiff}{sumfun - csr, positive values indicate clustering and negative values repulsion}
#'
#' @author Julia Wrobel \email{`r juliawrobel_email`}
#' @author Alex Soupir \email{`r alexsoupir_email`}
#'
#' @references Xiao, L., Ruppert, D., Zipunnikov, V., and Crainiceanu, C. (2016).
#' @references
#'
#' Xiao, L., Ruppert, D., Zipunnikov, V., and Crainiceanu, C. (2016).
#' Fast covariance estimation for high-dimensional functional data.
#' \emph{Statistics and Computing}, 26, 409-421.
#' DOI: 10.1007/s11222-014-9485-x.
#' @references Vu, T., Seal, S., Ghosh, T., Ahmadian, M., Wrobel, J., & Ghosh, D. (2023).
#'
#' Vu, T., Seal, S., Ghosh, T., Ahmadian, M., Wrobel, J., & Ghosh, D. (2023).
#' FunSpace: A functional and spatial analytic approach to cell imaging data using entropy measures.
#' \emph{PLOS Computational Biology}, 19(9), e1011490.
#'
#' Creed, J. H., Wilson, C. M., Soupir, A. C., Colin-Leitzinger, C. M., Kimmel, G. J.,
#' Ospina, O. E., Chakiryan, N. H., Markowitz, J., Peres, L. C., Coghill, A., & Fridley, B. L. (2021).
#' spatialTIME and iTIME: R package and Shiny application for visualization and analysis of
#' immunofluorescence data. \emph{Bioinformatics} (Oxford, England), 37(23), 4584–4586.
#' https://doi.org/10.1093/bioinformatics/btab757
#'
#' @importFrom spatstat.geom ppp convexhull.xy
#' @import dplyr
#'
#' @export
bivariate = function(mximg,
markvar,
mark1,
mark2,
r_vec,
func = c(Kcross, Lcross, Gcross, entropy),
edge_correction,
breaks = NULL){
markvar,
mark1,
mark2,
r_vec,
func = c(Kcross, Lcross, Gcross, entropy),
edge_correction,
empirical_CSR = FALSE,
permutations = 1000){
#### note to switch edge correction based on choice of func, this should be automated

# subset data to only cells if interest
#mximg = filter(mximg, get(markvar) %in% c(mark1, mark2))
# mximg = filter(mximg, markvar == mark1)
n = mximg %>% count(get(markvar)) %>% pull(n)
#some of the samples will have other cell types, as well as missing cell type of interest
n = mximg %>% count(get(markvar)) %>% pull(n, name = 1)
if(any(n < 2)) return(NA)
if(length(n) < 2) return(NA)
if(sum(names(n) %in% c(mark1, mark2)) < 2) return(NA) #make sure the marks wanted are in n vector

# create a convex hull as the observation window
w = convexhull.xy(mximg[["x"]], mximg[["y"]])

# create ppp object
pp_obj = ppp(mximg[["x"]], mximg[["y"]], window = w,
marks = as.factor(mximg[[markvar]]), checkdup = FALSE)
if(!is.null(breaks)){
r_vec = exp(seq(log(0.05*max(r_vec)), log(max(r_vec)), length.out = breaks))
}

# estimate L using spatstat
sumfun = func(pp_obj,
mark1,
mark2,
r = r_vec,
correction = edge_correction)
if(is.null(breaks)){
df = data.frame(sumfun) %>%
select(r, sumfun = all_of(edge_correction), csr = theo) %>%
mutate(fundiff = sumfun - csr) %>%
select(r, sumfun, csr, fundiff)
return(df)
} else {
return(data.frame(sumfun))

#if the user wants permutations to estimate CSR then will have to run them
if(empirical_CSR == TRUE){
if(identical(Kcross, func)){
#message("Using Exact Complete Spatial Randomness for Ripley's K")
sumfun[[2]] = func(pp_obj, #exact CSR is across all points
r = r_vec,
correction = edge_correction)[[3]]
} else {
sumfun[[2]] = sapply(1:permutations, function(x){
pp_obj_tmp = pp_obj
pp_obj_tmp$marks = sample(pp_obj_tmp$marks)
func(pp_obj_tmp, #sample ppp to length of the correct marks
mark1,
mark2,
r = r_vec,
correction = edge_correction)[[3]]
}) %>%
rowMeans(na.rm = TRUE) #take average of all permutations
}

}

df = data.frame(sumfun) %>%
select(r, sumfun = all_of(edge_correction), csr = theo) %>%
mutate(fundiff = sumfun - csr) %>%
select(r, sumfun, csr, fundiff)



}
Expand Down
119 changes: 101 additions & 18 deletions R/extract_entropy.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,69 @@
#' extract_entropy
#'
#' The extract_entropy() is used to compute spatial entropy at each distance
#' interval for all cell types of interest. The goal is to capture the diversity
#' in cellular composition, such as similar proportions across cell types or
#' dominance of a single type, at a specific distance range. Additionally,
#' spatial patterns, including clustered, independent, or regular, among cell
#' types can also be acquired. In this example, we will look at the spatial
#' heterogeneity across T cells, macrophages, and others. To focus on the local
#' cell-to-cell interactions, we set the default maximum of the distance range
#' (i.e., rmax) to be 400 microns. The default number of distance
#' breaks/intervals is set to 50. Then, a sequence of distance breaks is
#' generated by linearly decreasing from rmax to 0 on a log scale. At each
#' distance range, partial spatial entropy and residual entropy are
#' calculated as in Vu et al. (2023), Altieri et al. (2018). These spatial
#' entropy functions can then be used as input functions for FPCA.
#'
#' @param mxFDAobject object of class `mxFDA`
#' @param markvar The name of the variable that denotes cell type(s) of interest. Character.
#' @param marks Character vector that denotes cell types of interest.
#' @param n_break Total number of distance ranges/intervals of interest made from 0 to `rmax` for calculating entropy
#' @param rmax Max distance between pairs of cells
#'
#' @return object of class `mxFDA` with a dataframe in the `multivariate_summaries` slot
#' @export
#'
extract_entropy = function(mxFDAobject,
markvar,
marks,
n_break = 50,
rmax = 400){
#make sure input object is of right class
if(!inherits(mxFDAobject, "mxFDA"))
stop("Object must be of class `mxFDA`.")
#need spatial data to calculate spatial summary functions
if(nrow(mxFDAobject@Spatial) == 0)
stop("No summary function to be calculated - missing spatial")
#check to make sure that there are more than 1 level in the markvar column
if(!all(marks %in% unique(mxFDAobject@Spatial[[markvar]])))
stop("One or marks not in markvar column of Spatial data")
#check to make sure that there are more than 1 level in the markvar column
if(length(unique(mxFDAobject@Spatial[[markvar]]))<2)
stop("Calculating entropy requires at least 2 cell types")
#get only those columns and marks needed
df_nest = mxFDAobject@Spatial %>%
dplyr::filter(!!dplyr::sym(markvar) %in% marks) %>%
dplyr::select(dplyr::all_of(mxFDAobject@sample_key), x, y, dplyr::all_of(markvar)) %>%
tidyr::nest(data = c(x, y, dplyr::all_of(markvar)))
r_vec = exp(seq(log(0.05*max(rmax)), log(max(rmax)), length.out = n_break))

ndat = df_nest %>% dplyr::mutate(sumfuns = purrr::map(df_nest$data, entropy,
r_vec = r_vec,
markvar = markvar,
.progress = TRUE)) %>%
select(-data) %>%
unnest(sumfuns)

mxFDAobject@multivariate_summaries$entropy = ndat
return(mxFDAobject)
}

#' Entropy
#'
#' @param X object of class `ppp` from spatstat with 2 marks
#' @param i ignored
#' @param j ignored
#' @param df data frame with x and y columns, along with a column for point marks
#' @param r_vec vector of length wanted for breaks (will be rescaled) with max value at max for measuring entropy
#' @param correction ignored
#' @param markvar The name of the variable that denotes cell type(s) of interest. Character.
#'
#' @details `r lifecycle::badge('experimental')`
#'
Expand All @@ -21,30 +80,55 @@
#' \emph{Environmental and ecological statistics}, 25, 95-110.
#'
#' @export
entropy = function(X,
i,
j,
entropy = function(df,
r_vec,
correction
#n_break = 50,
#rmax = 400
){
markvar){
#if not enough points return NAs
if(nrow(df) < 3)
return(data.frame(r = r_vec, spatial_entropy = NA, residual_entropy = NA))
win = spatstat.geom::convexhull.xy(df$x, df$y)
X = spatstat.geom::ppp(df$x, df$y, window = win, marks = df[[markvar]])
breaks = length(r_vec)
#nnumber of cells for marks
cells = df %>%
dplyr::group_by(!!dplyr::sym(markvar)) %>%
dplyr::summarise(counts = dplyr::n()) %>%
dplyr::mutate(!!markvar := paste0(get(markvar), " cells")) %>%
tidyr::spread(key = markvar, value = 'counts')

# Compute spatial entropy
spa_entropy = SimDesign::quiet(
SpatEntropy::altieri(X, distbreak = r_vec[-breaks],
verbose = FALSE, plotout = FALSE)
)
spa_entropy <- tryCatch({
# Try to calculate spatial entropy using SpatEntropy::altieri
SimDesign::quiet(
SpatEntropy::altieri(X, distbreak = r_vec[-breaks], verbose = FALSE, plotout = FALSE)
) %>% suppressWarnings()
}, error = function(e) {
# In case of error, return a fallback data frame
data.frame(
r = r_vec,
spatial_entropy = NA,
residual_entropy = NA
)
})

if(inherits(spa_entropy, "data.frame")){
return(spa_entropy)
}

if (length(spa_entropy$SPI.terms) < breaks){
breaks = length(spa_entropy$SPI.terms)
r = unique(c(spa_entropy$distance.breaks[,1],spa_entropy$distance.breaks[,2]))[-c(1)]
r[breaks] = max(r_vec)
r_vec = r
}
}
r_vec = round(r_vec, 2)

df = data.frame(r = r_vec, spatial_entropy = spa_entropy$SPI.terms,
residual_entropy = spa_entropy$RES.terms)
residual_entropy = spa_entropy$RES.terms) %>%
dplyr::full_join(cells %>%
dplyr::slice(rep(1:n(), each = length(r_vec))) %>% # Repeat the row
dplyr::mutate(r = r_vec),
by = dplyr::join_by(r))

df

Expand All @@ -54,4 +138,3 @@ entropy = function(X,




3 changes: 3 additions & 0 deletions R/extract_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ extract_model = function(mxFDAobject, metric, type, model_name){
if(grepl("[U|u]", metric[1]) & grepl("[K|k]", metric[2])) fit = mxFDAobject@functional_cox$Kest[[model_name]]
if(grepl("[U|u]", metric[1]) & grepl("[G|g]", metric[2])) fit = mxFDAobject@functional_cox$Gest[[model_name]]
if(grepl("[U|u]", metric[1]) & grepl("[L|l]", metric[2])) fit = mxFDAobject@functional_cox$Lest[[model_name]]
if(grepl("[M|m]", metric[1]) & grepl("[E|e]", metric[2])) fit = mxFDAobject@`functional_cox`$entropy[[model_name]]
} else if(type == 'mcox'){
#get mixed effects
if(grepl("[B|b]", metric[1]) & grepl("[K|k]", metric[2])) fit = mxFDAobject@functional_mcox$Kcross[[model_name]]
Expand All @@ -56,6 +57,7 @@ extract_model = function(mxFDAobject, metric, type, model_name){
if(grepl("[U|u]", metric[1]) & grepl("[K|k]", metric[2])) fit = mxFDAobject@functional_mcox$Kest[[model_name]]
if(grepl("[U|u]", metric[1]) & grepl("[G|g]", metric[2])) fit = mxFDAobject@functional_mcox$Gest[[model_name]]
if(grepl("[U|u]", metric[1]) & grepl("[L|l]", metric[2])) fit = mxFDAobject@functional_mcox$Lest[[model_name]]
if(grepl("[M|m]", metric[1]) & grepl("[E|e]", metric[2])) fit = mxFDAobject@functional_mcox$entropy[[model_name]]
} else if(type == 'sofr'){
#get scalar on functional regression
if(grepl("[B|b]", metric[1]) & grepl("[K|k]", metric[2])) fit = mxFDAobject@scalar_on_functional$Kcross[[model_name]]
Expand All @@ -64,6 +66,7 @@ extract_model = function(mxFDAobject, metric, type, model_name){
if(grepl("[U|u]", metric[1]) & grepl("[K|k]", metric[2])) fit = mxFDAobject@scalar_on_functional$Kest[[model_name]]
if(grepl("[U|u]", metric[1]) & grepl("[G|g]", metric[2])) fit = mxFDAobject@scalar_on_functional$Gest[[model_name]]
if(grepl("[U|u]", metric[1]) & grepl("[L|l]", metric[2])) fit = mxFDAobject@scalar_on_functional$Lest[[model_name]]
if(grepl("[M|m]", metric[1]) & grepl("[E|e]", metric[2])) fit = mxFDAobject@scalar_on_functional$entropy[[model_name]]
}

if(is.null(fit))
Expand Down
Loading

0 comments on commit 260764c

Please sign in to comment.