-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrain.py
101 lines (75 loc) · 2.46 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""This module serves as an entry point for the pipeline where one can either
manually introduce or use tools such as Guild AI to test different configurations.
"""
from runs import run
import numpy as np
from classes import drug
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from sklearn.svm import SVR
from classes import tuning
import pandas as pd
from sklearn.feature_selection import SelectKBest, SelectPercentile, f_regression, mutual_info_regression, SelectFromModel, VarianceThreshold
rf = {
'n_estimators' : [10, 50, 100, 150],
'max_depth' : [2, 3, 4, 5, 6, 7, 8, None],
'max_features': ['auto', 'sqrt', 'log2'],
}
svr = {
'C':[1, 10, 50, 100, 250, 500, 750, 1000],
'kernel':['rbf', 'linear'],
'gamma':['scale']
}
en = {
'alpha': [0.5, 1, 1.5, 2, 5],
'l1_ratio': [0, 0.25, 0.5, 0.75, 1],
'max_iter': [5000]
}
knn = {
'n_neighbors': range(2, 30, 1),
'weights': ['uniform', 'distance'],
'algorithm': ['ball_tree', 'kd_tree'],
'p': [1,2,3,4],
'n_jobs': [-1]
}
dt = {
'criterion': ['mse', 'mae'],
'max_depth': range(2, 40, 2),
'max_features': ['auto', 'sqrt', 'log2']
}
ts = {'KNeighborsRegressor':knn, 'SVR':svr, 'DecisionTreeRegressor':dt, 'RandomForestRegressor':rf, 'ElasticNet':en}
feda = False
model = 'DecisionTreeRegressor'
threshold = 0.01
cutoff = 4
test = None
ajive = 0
gdsc = False
ctrp = False
ccle = False
fs = 'f_regression'
norm = ''
n = 0.199108
data = {'gdsc':gdsc, 'ctrp':ctrp, 'ccle':ccle}
tuning = tuning(ts[model], iterations=35, cv=3, scoring='r2', jobs = -1)
drugs = 1
metric = 'AUC_EC50'
# Make sure at least one dataset was selected
if True in data.values():
r1, drugs = run(data, fs, feda, model, p = threshold, t=cutoff, tuning = tuning, drugs=drugs, test=test, n=n, norm=norm)
scores = pd.DataFrame.from_dict(drugs, orient='index')
mean = scores.describe().loc[['mean']]['r2_score'][0]
std = scores.describe().loc[['std']]['r2_score'][0]
print('r_2_mean: '+ str(mean))
print('r_2_std: '+ str(std))
result = {k:v.model.get_params() for k,v in r1.items()}
scores = scores.join(pd.DataFrame.from_dict(result, orient='index'))
scores.to_csv('scores.csv')
#scores.boxplot(figsize=(12,9))
#plt.tight_layout()
#plt.savefig('boxplot.png')