-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy paths6_1_signature_based_deconvolution_make_mixed_data.Rmd
104 lines (80 loc) · 2.37 KB
/
s6_1_signature_based_deconvolution_make_mixed_data.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
---
title: "R Notebook"
output: html_notebook
---
# Repeat of partial figure GSE11058
Here we assume that data is already generated (see another file)
You need to install CellMix to use dsa
## Tricky installation
Note: maximum R version supported by CellMix is 3.6.
(docker for this https://hub.docker.com/r/aladyevae/cellmix)
Original package is not available for later version of R
But we found a fork of this repo that is working
```{r}
# Before: copy CellMix folder we provided in our repo to .libPaths() folder
if (!require("CellMix", quietly = TRUE))
devtools::install_github('denklewer/cellmix')
library("CellMix")
library(digest)
library(matrixStats)
library(dplyr)
```
# Make Mixed Dataset
```{r}
if (!require("GEOquery", quietly = TRUE))
BiocManager::install('GEOquery')
# BiocManager::install('Metrics')
# BiocManager::install('uwot')
#BiocManager::install('tidyverse')
```
```{r}
library(CellMix)
library(digest)
library(GEOquery)
```
```{r}
dir_to_save <- "../data/large/signature_based/GSE11058_benchmark/"
DS_NAME <- "GSE11058"
n_markers <- 20
```
```{r}
GSE19830.em <- ExpressionMix("GSE11058", verbose=TRUE)
```
```{r}
props <- coef(GSE11058.em)
ex <- exprs(GSE11058.em)
bas <- basis(GSE11058.em)
ex <- ex[log2(apply(ex, 1, mad)) > 8, ]
bas <- bas[rownames(ex),]
cat(dim(ex), "\n")
data_full <- ex
colnames(bas) <- gsub(" |-", "_", colnames(bas))
```
## Save it to use in another R version
```{r}
saveRDS(GSE11058.em, paste0(dir_to_save, "single_run_em.rds"))
saveRDS(bas, paste0(dir_to_save, "single_run_bas.rds"))
saveRDS(props, paste0(dir_to_save, "single_run_props.rds"))
saveRDS(data_full, paste0(dir_to_save, "/single_run_data_full.rds"))
```
## Multiple runs
```{r}
for (rep_n in seq(1,5)) {
set.seed(rep_n)
DS_NAME <- "GSE11058"
GSE11058.em <- ExpressionMix("GSE11058", verbose=TRUE)
props <- coef(GSE11058.em)
ex <- exprs(GSE11058.em)
bas <- basis(GSE11058.em)
ex <- ex[sample(nrow(ex), nrow(ex)*0.8), ]
ex <- ex[log2(apply(ex, 1, mad)) > 8, ]
bas <- bas[rownames(ex),]
data_full <- ex
colnames(bas) <- gsub(" |-", "_", colnames(bas))
rownames(props) <- gsub(" |-", "_", rownames(props))
saveRDS(GSE11058.em, paste0(dir_to_save, rep_n,"_run_em.rds"))
saveRDS(bas, paste0(dir_to_save, rep_n,"_run_bas.rds"))
saveRDS(props, paste0(dir_to_save, rep_n, "_run_props.rds"))
saveRDS(data_full, paste0(dir_to_save, rep_n, "_run_data_full.rds"))
}
```