-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimulation_ancombc.R
128 lines (122 loc) · 5.27 KB
/
simulation_ancombc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
## This file use ancombc in the simulation data
library(tidyverse)
library(ANCOMBC)
library(TreeSummarizedExperiment)
## load data
source("functionsV2.r")
## simulation 2
filenames = list.files("/Users/linxy29/Documents/Data/DCATS/simulation/current", full.names = TRUE)
for (file in filenames[1]) {
load(file)
for (i in 1:length(oper)) {
if (is.na(oper[[i]])[1]) next
print(i)
count_info = rbind(oper[[i]]$seurat_count$cond1, oper[[i]]$seurat_count$cond2)
#count_info = rbind(oper[[i]]$seurat_count$cond1, oper[[i]]$seurat_count$cond2) %>% mutate(condition = str_replace(condition, "Condition ", "cond"))
assay_data = count_info %>%
#select(-condition) %>%
as.matrix() %>% t()
col_data = data.frame(condition = rownames(count_info) %>% str_remove("s[1-9]"))
t9start = Sys.time()
ancombcRes = getANCOMBC(assay_data, col_data)
oper[[i]]$time = c(oper[[i]]$time, Sys.time() - t9start)
## ancombc test with bias correction
dcats_bc_knn = dcats_bc(assay_data %>% t(), oper[[i]]$knn_matrix)
dcats_bc_true = dcats_bc(assay_data %>% t(), oper[[i]]$true_matrix)
dcats_bc_svm = dcats_bc(assay_data %>% t(), oper[[i]]$svm_matrix)
ancombc_knn = getANCOMBC(dcats_bc_knn %>% t(), col_data)
ancombc_true = getANCOMBC(dcats_bc_true %>% t(), col_data)
ancombc_svm = getANCOMBC(dcats_bc_svm %>% t(), col_data)
## merge results
ancombcRes = ancombcRes %>%
dplyr::rename(cluster = taxon) %>%
dplyr::rename(ancombc_pvals = q_conditioncond2) %>%
mutate(ancombc_knn_pvals = ancombc_knn$q_conditioncond2,
ancombc_true_pvals = ancombc_true$q_conditioncond2,
ancombc_svm_pvals = ancombc_svm$q_conditioncond2)
oper[[i]]$clusterDF = oper[[i]]$clusterDF %>%
merge(ancombcRes)
}
save(oper, file = file)
}
## simulation 3
file = filenames[2]
load(file)
for (i in 1:length(oper)) {
if (is.na(oper[[i]])[1]) next
print(i)
count_info = oper[[i]]$seurat_count
#count_info = rbind(oper[[i]]$seurat_count$cond1, oper[[i]]$seurat_count$cond2) %>% mutate(condition = str_replace(condition, "Condition ", "cond"))
assay_data = count_info %>%
#select(-condition) %>%
as.matrix() %>% t()
col_data = design_mat
t9start = Sys.time()
ancombc_nbc = getANCOMBC(assay_data, col_data)
oper[[i]]$time = c(oper[[i]]$time, Sys.time() - t9start)
## ancombc test with bias correction
dcats_bc_knn = dcats_bc(assay_data %>% t(), oper[[i]]$knn_matrix)
dcats_bc_true = dcats_bc(assay_data %>% t(), oper[[i]]$true_matrix)
dcats_bc_svm = dcats_bc(assay_data %>% t(), oper[[i]]$svm_matrix)
ancombc_knn = getANCOMBC(dcats_bc_knn %>% t(), col_data)
ancombc_true = getANCOMBC(dcats_bc_true %>% t(), col_data)
ancombc_svm = getANCOMBC(dcats_bc_svm %>% t(), col_data)
## merge results
ancombcRes = ancombc_nbc %>%
dplyr::rename(cluster = taxon) %>%
dplyr::rename(ancombc_pvals = q_conditioncond2) %>%
dplyr::select(cluster, ancombc_pvals) %>%
mutate(ancombc_knn_pvals = ancombc_knn$q_conditioncond2,
ancombc_true_pvals = ancombc_true$q_conditioncond2,
ancombc_svm_pvals = ancombc_svm$q_conditioncond2)
oper[[i]]$conditionDF = oper[[i]]$conditionDF %>%
merge(ancombcRes)
ancombcRes = ancombc_nbc %>%
dplyr::rename(cluster = taxon) %>%
dplyr::rename(ancombc_pvals = q_gendermale) %>%
dplyr::select(cluster, ancombc_pvals) %>%
mutate(ancombc_knn_pvals = ancombc_knn$q_gendermale,
ancombc_true_pvals = ancombc_true$q_gendermale,
ancombc_svm_pvals = ancombc_svm$q_gendermale)
oper[[i]]$genderDF = oper[[i]]$genderDF %>%
merge(ancombcRes)
}
save(oper, file = file)
## simulation 4
file = filenames[1]
load(file)
clusterRes = data.frame()
resL$clusterRes = resL$clusterRes %>% mutate(sim = rep(1:30, each = 8))
for (i in 1:30) {
print(i)
#if (is.na(oper[[i]])[1]) next
count_info = resL$seurat_countDF %>% filter(sim == i) %>%
dplyr::select(-sim)
#count_info = rbind(oper[[i]]$seurat_count$cond1, oper[[i]]$seurat_count$cond2) %>% mutate(condition = str_replace(condition, "Condition ", "cond"))
assay_data = count_info %>%
#select(-condition) %>%
as.matrix() %>% t()
col_data = data.frame(condition = c(rep("cond1",3), rep("cond2", 3)))
ancombcRes = getANCOMBC(assay_data, col_data)
## ancombc test with bias correction
dcats_bc_knn = dcats_bc(assay_data %>% t(), resL$knn_matrixL[[i]])
dcats_bc_true = dcats_bc(assay_data %>% t(), resL$true_matrixL[[i]])
dcats_bc_svm = dcats_bc(assay_data %>% t(), resL$svm_matrixL[[i]])
ancombc_knn = getANCOMBC(dcats_bc_knn %>% t(), col_data)
ancombc_true = getANCOMBC(dcats_bc_true %>% t(), col_data)
ancombc_svm = getANCOMBC(dcats_bc_svm %>% t(), col_data)
## merge results
ancombcRes = ancombcRes %>%
dplyr::rename(cluster = taxon) %>%
dplyr::rename(ancombc_pvals = q_conditioncond2) %>%
mutate(ancombc_knn_pvals = ancombc_knn$q_conditioncond2,
ancombc_true_pvals = ancombc_true$q_conditioncond2,
ancombc_svm_pvals = ancombc_svm$q_conditioncond2)
clusterDF = resL$clusterRes %>%
filter(sim == i) %>%
select(-sim) %>%
merge(ancombcRes)
clusterRes = rbind(clusterRes, clusterDF)
}
resL$clusterRes = clusterRes
save(resL, file = "/Users/linxy29/Documents/Data/DCATS/simulation/current/one_increase_wANCOMBC.RData")