-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscRNA_scATAC_correlation_analysis.R
78 lines (59 loc) · 2.42 KB
/
scRNA_scATAC_correlation_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
library(Seurat)
library(ggplot2)
library(ggfortify)
library(jcolors)
require(gridExtra)
library(grid)
library(gam)
library(princurve)
library(parallel)
library(tidyverse)
library(MyEllipsefit)
library(sctransform)
library(openxlsx)
library(doParallel)
library(tidytext)
library(ggrepel)
library(dtwclust)
library(geomtextpath)
source('./util_funcs.R')
num.cores <- detectCores(all.tests = FALSE, logical = TRUE)
## Splines
sc.rna.spline.fits <- readRDS('../Input/toxo_cdc/rds/sc_rna_spline_fits_all_genes.rds')
sc.atac.spline.fits <- readRDS('../Input/toxo_cdc/rds/sc_atac_spline_fits_all_genes.rds')
marker.genes <- readRDS('../Input/toxo_cdc/rds/Intra_markers_sig.rds')
marker.genes.phase <- marker.genes %>% transmute(GeneID = gene, phase = cluster) %>% distinct()
## Turn the data into wide format (time by gene) and center & scale each gene
sc.rna.dtw.wide <- sc.rna.spline.fits %>%
pivot_wider(names_from = 'GeneID', values_from = 'y') %>%
mutate_at(vars(matches("TGME")), scale) %>%
as.data.frame()
sc.atac.dtw.wide <- sc.atac.spline.fits %>%
pivot_wider(names_from = 'GeneID', values_from = 'y') %>%
mutate_at(vars(matches("TGME")), scale) %>%
as.data.frame()
sc.rna.mu.scale <- sc.rna.dtw.wide %>%
pivot_longer(-x, names_to = 'GeneID', values_to = 'expr')
sc.atac.mu.scale <- sc.atac.dtw.wide %>%
pivot_longer(-x, names_to = 'GeneID', values_to = 'expr')
## Filter to include markers only
sc.rna.mu.scale <- sc.rna.mu.scale %>% dplyr::filter(GeneID %in% marker.genes$gene)
sc.atac.mu.scale <- sc.atac.mu.scale %>% dplyr::filter(GeneID %in% marker.genes$gene)
genes <- unique(sc.atac.mu.scale$GeneID)
cc.dat <- mclapply(1:length(genes), function(i){
atac.g <- sc.atac.mu.scale %>% dplyr::filter(GeneID == genes[i]) %>%
transmute(t = x, y = expr) %>% arrange(t)
rna.g <- sc.rna.mu.scale %>% dplyr::filter(GeneID == genes[i]) %>%
transmute(t = x, y = expr) %>% arrange(t)
tmp <- ccf(c(atac.g$y), c(rna.g$y), plot = F)
L <- list(Lag = tmp$lag[which.max(tmp$acf)], cc = max(tmp$acf))
return(L)
}, mc.cores = num.cores)
cc.dat <- data.frame(GeneID = genes, Lags = unlist(lapply(cc.dat, `[[`, 1)), ccs = unlist(lapply(cc.dat, `[[`, 2)))
saveRDS(cc.dat, '../Input/toxo_cdc/rds/sc_rna_sc_atac_cross_cor_lag.rds')
ggplot(cc.dat, aes(x = ccs)) +
geom_histogram(aes(y = ..density..),
colour = 1, fill = "white") +
geom_density(lwd = 1.2,
linetype = 2,
colour = 2)