-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathE4_clf_semi_v2.py
74 lines (55 loc) · 2.16 KB
/
E4_clf_semi_v2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
E4 - classification with reduced number of measures -- semi-synthetic streams
"""
import numpy as np
from sklearn import clone
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from tqdm import tqdm
from sklearn.metrics import balanced_accuracy_score
import utils
np.random.seed(1233)
base_clfs = [
GaussianNB(),
KNeighborsClassifier(),
SVC(random_state=11313),
DecisionTreeClassifier(random_state=11313),
MLPClassifier(random_state=11313)
]
origial_datasets=6
n_splits=2
n_repeats=5
n_drift_types = 2
clf_res = np.zeros((origial_datasets, n_drift_types, n_splits*n_repeats, len(base_clfs)))
pbar = tqdm(total=origial_datasets*n_drift_types*n_splits*n_repeats*len(base_clfs))
res = np.load('results/combined_semi.npy')
print(res.shape) # features+label, drifts, reps, chunks
indexes = utils.selected2_indexes
indexes.append(-1) #keep label
labels = utils.selected2_measure_names
for d_id in range(n_drift_types):
for o_id in range(origial_datasets):
#shuffle
res_temp = res[:,o_id, d_id]
res_temp = res_temp.swapaxes(0,1)
p = np.random.permutation(res_temp.shape[0])
res_temp = res_temp[p]
# print(res_rep.shape) # chunks, measures + label
X = res_temp[:,:-1]
y = res_temp[:,-1]
X[np.isnan(X)]=1
X[np.isinf(X)]=1
rskf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=3242)
for fold, (train, test) in enumerate(rskf.split(X, y)):
for base_id, base_c in enumerate(base_clfs):
clf = clone(base_c)
pred = clf.fit(X[train], y[train]).predict(X[test])
acc = balanced_accuracy_score(y[test], pred)
clf_res[o_id, d_id, fold, base_id] = acc
pbar.update(1)
print(np.mean(clf_res[o_id, d_id], axis=0))
np.save('results/semi_clf_reduced.npy', clf_res)