-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpredict-cumulative.Rmd
101 lines (82 loc) · 2.66 KB
/
predict-cumulative.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
```{r init}
source("lib/function_library.R", echo = F)
# Load other R source files in the lib directory.
load_all_code("lib")
# Load a bunch of required packages, installing missing ones as needed.
load_all_packages(auto_install = T)
# Set some standard settings.
conf = list(data_dir = "data", dir = ".")
```
```{r predict-setup}
# Setup multinode processing if available.
# Just use multicore for now.
conf$cluster = ck37r::parallelize(allow_multinode = F)#, type = "doSNOW")
#conf$method = "method.AUC"
conf$method = "method.NNLS"
conf$outer_cv_folds = 20
#conf$outer_cv_folds = 5
conf$cv_folds = 10
#conf$cv_folds = 8
# If the cluster object is null it means we're setup for multicore (or nuthin).
if (is.null(conf$cluster) || is.na(conf$cluster)) {
# Use non-CV if we're running this in RStudio on a laptop.
if (F && .Platform$GUI == "RStudio") {
cat("Using normal SuperLearner.\n")
fn = "sl_fn"
} else {
cat("Using CV.SuperLearner.\n")
fn = "cv_sl_fn"
}
if (T) {
# Multicore version.
conf$sl_fn = ckTools::gen_superlearner(parallel = "multicore",
outer_cv_folds = conf$outer_cv_folds)[[fn]]
} else {
# Sequential version.
conf$sl_fn = SuperLearner
}
rm(fn)
} else {
# Multinode version.
conf$sl_fn = ck37r::gen_superlearner(parallel = "snow",
cluster = conf$cluster,
outer_cv_folds = conf$outer_cv_folds)$cv_sl_fn
}
```
```{r predict}
load(paste0(conf$data_dir, "/create-dataset.RData"))
if (foreach::getDoParName() == "doSNOW") {
# Set multinode-compatible seed.
clusterSetRNGStream(conf$cluster, iseed = 1)
} else {
# Set multicore-compatible seed.
set.seed(1, "L'Ecuyer-CMRG")
}
X = dataset$X
Y = dataset$outcomes$progression
libs = cumulative_library_seq(dim(X), type = "regression", glm = F)
length(libs)
#libs = libs[4:5]
#libs = libs[c(1, 2, 3, 4, 5, 7)]
results = fit_model_libs(Y, X, libs,
family=gaussian(), sl_fn = conf$sl_fn,
cv_folds = conf$cv_folds, cluster = conf$cluster)
save(results, libs, conf, dataset,
file = paste0(conf$data_dir, "/predict_results.RData"))
```
Process results.
```{r predict-results}
load(paste0(conf$data_dir, "/predict_results.RData"))
# Create plots and generate tables for all results.
# TODO: update this function to support RMSE rather than AUC as the metric.
# Also display R^2 in this analysis.
if (F) {
plot_all_results(results, dataset$outcomes$progresson, "PD", libs,
type = "regression",
plot_dir = paste0(conf$dir, "/visuals/"),
tex_dir = paste0(conf$dir, "/tex/"))
}
```
```{r cleanup}
ck37r::stop_cluster(conf$cluster)
```