-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrajectory_analysis.R
180 lines (133 loc) · 5.8 KB
/
trajectory_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# Trajectory analysis with Monocle3 with a Seurat object
library(Seurat)
library(monocle3)
library(tidyverse)
library("biomaRt")
library(SeuratDisk)
library(dplyr)
library(patchwork)
seurat_obj <-readRDS(file = "seurat_obj.rds")
DimPlot(seurat_obj, reduction = "umap",
label = TRUE, group.by = "seurat_clusters",
label.size = 6, pt.size =2.5)
# Subsetting to some clusters only
subsetted <- subset(x = seurat_obj,
subset = seurat_clusters %in% c('1','7','0','10','11') )
DimPlot(subsetted, reduction = "umap",
label = TRUE, group.by = "seurat_clusters",
label.size = 6, pt.size =2.5)+
labs(title = "Subsetted cells")
FeaturePlot(subsetted, features = "X_score")
# Transforming from Seurat to cell data set
cds <- SeuratWrappers::as.cell_data_set(subsetted)
cds <- estimate_size_factors(cds)
# For plot_cells to work with gene names
rowData(cds)$gene_name <- rownames(cds)
rowData(cds)$gene_short_name <- rowData(cds)$gene_name
cds <- cluster_cells(cds,resolution = 1/200) ## Step 4: Cluster the cells
# Show the clusters calculated by monocle3
plot_cells(cds, color_cells_by = "cluster", show_trajectory_graph = FALSE)
# Show partitions calculated by monocle3
plot_cells(cds, color_cells_by = "partition", show_trajectory_graph = FALSE)
cds <- learn_graph(cds) ## Step 5: Learn a graph
cds <- order_cells(cds) ## Step 6: Order cells
# Plotting the trajectory graph white circles are leaves dark circles are forks
plot_cells(cds,
show_trajectory_graph = T,
color_cells_by = "seurat_clusters",
label_cell_groups = FALSE,
group_label_size = 6,
label_groups_by_cluster = F,
cell_size = 0.8,
graph_label_size = 3)
# Plotting calculated pseudotime
plot_cells(cds,
color_cells_by = "pseudotime",
label_cell_groups=FALSE,
label_leaves=FALSE,
label_branch_points=FALSE,
graph_label_size=1.5,
cell_size = 0.7)+
labs(title = "resolution = 1/200")
# Saving pseudotime as metadata
cds$monocle3_pseudotime <- pseudotime(cds)
# Boxplots of pseudotime for each cluster
data.pseudo1 <- as.data.frame(colData(cds))
ggplot(data.pseudo1, aes(monocle3_pseudotime, seurat_clusters,
fill = seurat_clusters)) + geom_boxplot()
ggplot(data.pseudo1, aes(monocle3_pseudotime,
reorder(seurat_clusters, monocle3_pseudotime),
fill = seurat_clusters)) + geom_boxplot()
+labs(y='resolution = 1/200')
ggsave("pseudotime_by_cluster.png", dpi = 600)
# Regression analysis
gene_fits <- fit_models(cds, model_formula_str = "~Group")
fit_coefs <- coefficient_table(gene_fits)
Group_terms <- fit_coefs %>% filter(term == "Group_WT")
Group_terms %>% filter (q_value < 0.05) %>%
dplyr::select(gene_id, term, q_value, estimate)
genes <- c("Rpl39",
"Tmsb4x",
"Dnaja1")
cds_subset <- cds[rowData(cds)$gene_short_name %in% genes,]
plot_genes_violin(cds_subset, group_cells_by="seurat_clusters", ncol=2) +
theme(axis.text.x=element_text(angle=45, hjust=1))
# Should we add batch effect to the model too?
group_batch_models <- fit_models(cds_subset,
model_formula_str = "~Group + batch",
expression_family="negbinomial")
group_models <- fit_models(cds_subset,
model_formula_str = "~Group",
expression_family="negbinomial")
compare_models(group_batch_models, group_models) %>% select(gene_short_name, q_value)
# Differential gene expression throughout the trajectory
cds_pr_test_res <- graph_test(cds, neighbor_graph="principal_graph", cores=8)
cds_pr_test_res_ok <- arrange(cds_pr_test_res, q_value)%>% filter(status == "OK")
cds_pr_test_res_ok%>% head()
cds_pr_test_res2 <- arrange(cds_pr_test_res, q_value)%>% filter(status == "OK")%>% top_n(-12)
top12 <- row.names( cds_pr_test_res2)[1:12]
# Plotting top 12 trajectory dependent genes
plot_cells(cds, genes=top12,
show_trajectory_graph=FALSE,
label_cell_groups=FALSE,
label_leaves=FALSE,
cell_size = 0.4)
RidgePlot(subsetted, features = top12[1:4],
sort = T,ncol = 2)
# Finding modules of co-regulated genes
pr_deg_ids <- row.names(subset(cds_pr_test_res_ok, q_value < 0.005))
cds <- preprocess_cds(cds) # necessary for gene modules
gene_modules <- find_gene_modules(cds[pr_deg_ids,], resolution=c(10^seq(-6,-1)))
table(gene_modules$module)
cell_groups <- data.frame(cell = row.names(colData(cds)),
cell_group = colData(cds)$seurat_clusters)
agg_mat <- aggregate_gene_expression(cds,
gene_group_df = gene_modules,
cell_group_df = cell_groups)
dim(agg_mat)
row.names(agg_mat) <- paste0("Module ", row.names(agg_mat))
pheatmap::pheatmap(agg_mat,
scale="column",
treeheight_row = 0,
treeheight_col = 0,
clustering_method="ward.D2")
gm <- gene_modules[which(gene_modules$module %in% c(1, 1)),]
plot_cells(cds,
genes=gm$id[1:15],
label_cell_groups=FALSE,
show_trajectory_graph=FALSE,
label_branch_points = FALSE,
label_roots = FALSE,
label_leaves = FALSE,
trajectory_graph_color = "grey60")
cds_subset <- cds[rowData(cds)$gene_short_name %in% top12,]
cds_subset <- order_cells(cds_subset)
plot_genes_in_pseudotime(cds_subset,
color_cells_by="seurat_clusters",
min_expr=0.5)
plot_genes_violin(cds_subset, group_cells_by="seurat_clusters", ncol=4)
plot_cells(cds, genes='Hspa1b',
show_trajectory_graph=FALSE,
label_cell_groups=FALSE,
label_leaves=FALSE,
cell_size = 0.4)