-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathIMC_clusters.R
187 lines (137 loc) · 6.88 KB
/
IMC_clusters.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
library(Seurat)
library(ggplot2)
library(ggfortify)
library(jcolors)
require(gridExtra)
library(grid)
library(gam)
library(princurve)
library(parallel)
library(tidyverse)
library(MyEllipsefit)
library(sctransform)
library(openxlsx)
library(doParallel)
library(tidytext)
library(ggrepel)
library(dtwclust)
library(geomtextpath)
source('./util_funcs.R')
## IDs
prod.desc <- read.xlsx('../Input/toxo_genomics/genes/ProductDescription_GT1.xlsx')
TGGT1_ME49 <- read.xlsx('../Input/toxo_genomics/Orthologs/TGGT1_ME49 Orthologs.xlsx')
## DEGs
#DEG.sig <- readRDS('../Input/toxo_cdc/rds_ME49_59/Intra_DEGs_up_and_down_sig.rds') ## both up & down
DEG.sig <- readRDS( '../Input/toxo_cdc/rds_ME49_59/Intra_markers_sig.rds')
## New
IMCs <- read.xlsx("../Input/Toxo_genomics/genes/IMC gene list 090122.xlsx")
IMCs <- IMCs %>% dplyr::select(Gene.ID, new.name)
IMCs <- left_join(IMCs, TGGT1_ME49, by = c("Gene.ID" = "TGGT1"))
IMCs$TGME49 <- gsub('_', '-', IMCs$TGME49)
colnames(IMCs) <- gsub("new.name", "ProductDescription", colnames(IMCs))
## scRNA and scATAC
rna_sub <- readRDS('../Input/toxo_cdc/rds_ME49_59/S.O_intra_lables_pt.rds')
atac_sub <- readRDS('../Input/toxo_cdc/rds_ME49_59/S.O_intra_atac_lables_pt.rds')
## Splines
sc.rna.spline.fits <- readRDS('../Input/toxo_cdc/rds_ME49_59/sc_rna_spline_fits_all_genes.rds')
sc.atac.spline.fits <- readRDS('../Input/toxo_cdc/rds_ME49_59/sc_atac_spline_fits_all_genes.rds')
## Turn the data into wide format (time by gene) and center & scale each gene
sc.rna.dtw.wide <- sc.rna.spline.fits %>%
pivot_wider(names_from = 'GeneID', values_from = 'y') %>%
mutate_at(vars(matches("TGME")), ~scale(., center = T, scale = T)) %>%
as.data.frame()
na.ind <- which(apply(sc.rna.dtw.wide, 2, function(x) any(is.na(x))))
if(length(na.ind)){
sc.rna.dtw.wide <- sc.rna.dtw.wide[,-na.ind]
}
sc.atac.dtw.wide <- sc.atac.spline.fits %>%
pivot_wider(names_from = 'GeneID', values_from = 'y') %>%
mutate_at(vars(matches("TGME")), ~scale(., center = T, scale = T)) %>%
as.data.frame()
na.ind <- which(apply(sc.atac.dtw.wide, 2, function(x) any(is.na(x))))
if(length(na.ind)){
sc.atac.dtw.wide <- sc.atac.dtw.wide[,-na.ind]
}
sc.rna.mu.scale <- sc.rna.dtw.wide %>%
pivot_longer(-x, names_to = 'GeneID', values_to = 'expr')
sc.atac.mu.scale <- sc.atac.dtw.wide %>%
pivot_longer(-x, names_to = 'GeneID', values_to = 'expr')
sig.IMCs <- IMCs[IMCs$TGME49 %in% c(DEG.sig$gene),]
saveRDS(sig.IMCs, '../Input/toxo_cdc/rds_ME49_59/sig_IMCs.rds')
## Clustering IMCs
k <- 5
sc.rna.IMCs <- sc.rna.dtw.wide[,colnames(sc.rna.dtw.wide) %in% sig.IMCs$TGME49 ]
sc.atac.IMCs <- sc.atac.dtw.wide[,colnames(sc.atac.dtw.wide) %in% sig.IMCs$TGME49 ]
sc.rna.IMCs.markers.hc_dtw <- dtwClustCurves(sc.rna.IMCs, nclust = k)
sc.atac.IMCs.markers.hc_dtw <- dtwClustCurves(sc.atac.IMCs, nclust = k)
plot(sc.rna.IMCs.markers.hc_dtw, type = 'sc')
plot(sc.atac.IMCs.markers.hc_dtw, type = 'sc')
## GGplot cluster graphs
sc.rna.long.IMCs <- inner_join(sc.rna.mu.scale, IMCs, by = c('GeneID' = 'TGME49'))
sc.rna.long.IMCs <- sc.rna.long.IMCs %>%
transmute(time = x, GeneID = GeneID, normExpr = expr, Name = ProductDescription) %>% distinct()
sc.atac.long.IMCs <- inner_join(sc.atac.mu.scale, IMCs, by = c('GeneID' = 'TGME49'))
sc.atac.long.IMCs <- sc.atac.long.IMCs %>%
transmute(time = x, GeneID = GeneID, normExpr = expr, Name = ProductDescription) %>% distinct()
sc.rna.clust.info <- data.frame(GeneID = colnames(sc.rna.IMCs), cluster = cutree(sc.rna.IMCs.markers.hc_dtw, k = k))
sc.atac.clust.info <- data.frame(GeneID = colnames(sc.atac.IMCs), cluster = cutree(sc.atac.IMCs.markers.hc_dtw, k = k))
sc.rna.long.clust <- inner_join(sc.rna.long.IMCs, sc.rna.clust.info, by = 'GeneID')
sc.atac.long.clust <- inner_join(sc.atac.long.IMCs, sc.atac.clust.info, by = 'GeneID')
sc.rna.sc.atac.joint <- inner_join(sc.rna.long.clust, sc.atac.long.clust,
by = c("time", "GeneID", "Name"))
colnames(sc.rna.sc.atac.joint) <- c("time", "GeneID", "scRNA", "Name", "cluster.RNA", "scATAC", "cluster.ATAC")
sc.rna.sc.atac.joint.long <- sc.rna.sc.atac.joint %>%
pivot_longer(-c('time', "GeneID", "Name", "cluster.RNA", "cluster.ATAC"),
names_to = 'data', values_to = 'normExpr')
sc.rna.sc.atac.joint.long$label <- NA
sc.rna.sc.atac.joint.long$label[which(sc.rna.sc.atac.joint.long$time == 3)] <-
sc.rna.sc.atac.joint.long$Name[which(sc.rna.sc.atac.joint.long$time == 3)]
sc.rna.sc.atac.joint.long$cluster.RNA <- paste('C', sc.rna.sc.atac.joint.long$cluster.RNA)
#saveRDS(sc.rna.sc.atac.joint.long, '../Input/toxo_cdc/rds_ME49_59/IMC_sc_rna_sc_atac_joint_dtw_clust.rds')
saveRDS(sc.rna.sc.atac.joint.long, '../Input/toxo_cdc/rds_ME49_59/IMC_sc_rna_sc_atac_joint_dtw_clust_new_update.rds')
sc.rna.sc.atac.joint.long <- readRDS('../Input/toxo_cdc/rds_ME49_59/IMC_sc_rna_sc_atac_joint_dtw_clust_new_update.rds')
sc.rna.sc.atac.joint.long$data <- factor(sc.rna.sc.atac.joint.long$data, levels = c("scRNA", "scATAC"))
plot_rna_atac_trends <- function(sc.rna.sc.atac.joint.long.sub){
p <- ggplot(sc.rna.sc.atac.joint.long.sub, aes(x= time,y=normExpr)) +
geom_path(aes(color = Name,),alpha = 0.8, size = 0.8)+
theme_bw(base_size = 14) +
theme(legend.position = "right") +
ylab('normExpr') + xlab('Time') +
theme(axis.text.x = element_text(angle = 0, hjust = 1, size = 12, face="bold")) +
theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 12, face="bold")) +
theme(strip.background = element_rect(colour="black", fill="white",
size=0.5, linetype="solid")) +
theme(strip.text = element_text(size = 14, face="bold", angle = 0)) +
coord_cartesian(xlim = c(0,6.5)) +
geom_text_repel(aes(label = label), size = 2.5, fontface = "bold",
box.padding = unit(0.6, "lines"),
max.overlaps = 300,
#segment.angle = 180,
nudge_x = 0.25,
nudge_y = 0.25,
hjust=0.25,
#nudge_x=0.25,
segment.size = 0.1,
na.rm = TRUE)+
facet_grid(cluster.RNA~data, scales = 'free', space = 'free') +
#ggtitle(titles[i]) +
theme(
plot.title = element_text(size=14, face = "bold.italic", color = 'red'),
axis.title.x = element_text(size=14, face="bold", hjust = 1),
axis.title.y = element_text(size=14, face="bold")
) +
theme(#legend.position = c(0.15, 0.85),
legend.position = 'none',
legend.title = element_text(colour="black", size=12,
face="bold"),
legend.text = element_text(colour="black", size=12,
face="bold"))
return(p)
}
p1 <- plot_rna_atac_trends(sc.rna.sc.atac.joint.long)
plot(p1)
ggsave(filename="../Output/toxo_cdc/ME49_59/figures_paper/IMC_dtw_5_clusters.pdf",
plot=p1,
width = 10, height = 8,
units = "in"
)