-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_deeplearning.py
84 lines (70 loc) · 2.37 KB
/
build_deeplearning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from joblib import Parallel, delayed
from keras import Sequential
from keras.layers import Dense
from pandas import DataFrame, read_csv
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import set_random_seed
from common import cleanup, initialize, logger, measured, paths, refresh
initialize()
def import_features(project):
file = paths("features", project)
return read_csv(
file,
usecols=[
column
for column in read_csv(file, nrows=0)
if column
not in [
"project",
"pull_number",
"open",
"closed",
"merged",
"pr_changed_files",
"pr_lifetime",
"contributor_contribution_period",
"review_participants",
"review_responses_interval",
"project_pulls",
"project_contributors",
]
],
)
def create_model():
model = Sequential(
[
Dense(11, activation="relu", input_dim=11),
Dense(6, activation="relu"),
Dense(1, activation="sigmoid"),
]
)
model.compile(optimizer="Adam", loss="binary_crossentropy")
return model
def build_deeplearning(project):
log = logger(__file__)
log.info(f"{project}: Building deep learning model")
set_random_seed(1)
features = import_features(project).values
X = features[:, 1:].astype(float)
y = features[:, 0].astype(float)
X = StandardScaler().fit(X).transform(X)
results = cross_val_score(
KerasClassifier(create_model), X, y, scoring="roc_auc", cv=RepeatedStratifiedKFold(n_splits=10)
)
return {"project": project, "auc": results.mean()}
def export_scores(scores):
DataFrame(scores).to_csv(paths("deeplearning"), index=False)
def main():
if cleanup("deeplearning", refresh()):
with Parallel(n_jobs=-1) as parallel:
export_scores(parallel(delayed(build_deeplearning)(project) for project in measured()))
else:
print("Skip building deep learning models")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("Stop building deep learning models")
exit(1)