-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval.py
148 lines (114 loc) · 6.19 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import numpy as np
import json
import pandas as pd
from cm_plotter import pretty_plot_confusion_matrix
import uuid
from tqdm import tqdm
import os
class Evaluation(object):
def __init__(self, eval_dataloader, config):
self.dataloader = eval_dataloader
self.metrics = config['evaluation']['metrics']
self.device = config['device']
self.labels_to_int = config['labels_to_int']
def eval_model(self, model, finished_training=False):
print("Starting evaluation ...")
model.eval()
labels = []
predicted_labels = []
eval_losses = []
original_texts = []
processed_texts = []
for idx, batch in enumerate(self.dataloader):
if finished_training:
original_texts += batch[2]
processed_texts += batch[3]
batch = (batch[0].to(self.device), batch[1].to(self.device))
labels_batch, tokens_batch = batch
predicted_labels_batch, eval_loss_batch = model(tokens_batch, labels_batch)
eval_losses += [eval_loss_batch.cpu().detach()]
predicted_labels += [predicted_labels_batch.cpu().detach()]
labels += [labels_batch.cpu().detach()]
cm = self.get_confusion_matrix(labels, predicted_labels)
metrics = {}
assets = []
images_fns = []
if 'per_class_precision' in self.metrics:
metrics.update(self.get_per_class_precision(cm))
if 'per_class_recall' in self.metrics:
metrics.update(self.get_per_class_recall(cm))
if 'per_class_f1' in self.metrics:
metrics.update(self.get_per_class_f1(cm))
if 'micro_average_accuracy' in self.metrics:
metrics.update(self.get_micro_average_accuracy(cm))
if 'macro_average_precision' in self.metrics:
metrics.update(self.get_macro_precision(cm))
if 'macro_average_recall' in self.metrics:
metrics.update(self.get_macro_recall(cm))
if 'macro_average_f1' in self.metrics:
metrics.update(self.get_macro_f1(cm))
if 'eval_loss' in self.metrics:
metrics.update({'eval_loss' : np.mean(np.array(eval_losses))})
if 'in_out' in self.metrics and finished_training:
assets += self.get_text_y_yhat(predicted_labels, labels, original_texts, processed_texts)
if 'cm' in self.metrics and finished_training:
images_fns += self.save_and_get_cm_image(cm)
model.train()
return metrics, assets, images_fns
def get_confusion_matrix(self, labels, predicted_labels):
labels_np = np.concatenate([label.numpy() for label in labels], axis=0)
output_labels_np = np.concatenate([predicted_label.numpy() for predicted_label in predicted_labels], axis=0)
n_classes = len(self.labels_to_int)
confusion_matrix = np.zeros((n_classes, n_classes))
for row_label in self.labels_to_int.values():
for column_label in self.labels_to_int.values():
confusion_matrix[row_label, column_label] = np.count_nonzero(
output_labels_np[labels_np == row_label] == column_label)
return confusion_matrix
def get_per_class_precision(self, confusion_matrix):
per_class_precision = np.diag(confusion_matrix) / np.sum(confusion_matrix, axis=0)
result = {}
for label, int_label in self.labels_to_int.items():
result[label + '_precision'] = per_class_precision[int_label]
return result
def get_per_class_recall(self, confusion_matrix):
per_class_recall = np.diag(confusion_matrix) / np.sum(confusion_matrix, axis=1)
result = {}
for label, int_label in self.labels_to_int.items():
result[label + '_recall'] = per_class_recall[int_label]
return result
def get_per_class_f1(self, confusion_matrix):
per_class_precision = np.diag(confusion_matrix) / np.sum(confusion_matrix, axis=0)
per_class_recall = np.diag(confusion_matrix) / np.sum(confusion_matrix, axis=1)
result = {}
for label in self.labels_to_int.keys():
int_label = self.labels_to_int[label]
result['f1_' + label] = (2 * per_class_precision[int_label] * per_class_recall[int_label]) / (
per_class_precision[int_label] + per_class_recall[int_label])
return result
# This is also micro precision, micro recall, and micro f1 in case of multi-class classification
def get_micro_average_accuracy(self, confusion_matrix):
return {'micro_average_accuracy' : np.sum(np.diag(confusion_matrix)) / np.sum(confusion_matrix)}
def get_macro_precision(self, confusion_matrix):
return {'macro_average_precision': np.mean(np.diag(confusion_matrix) / np.sum(confusion_matrix, axis=0))}
def get_macro_recall(self, confusion_matrix):
return {'macro_average_recall': np.mean(np.diag(confusion_matrix) / np.sum(confusion_matrix, axis=1))}
def get_macro_f1(self, confusion_matrix):
p = np.mean(np.diag(confusion_matrix) / np.sum(confusion_matrix, axis=0))
r = np.mean(np.diag(confusion_matrix) / np.sum(confusion_matrix, axis=1))
return {'macro_average_f1' : (2*p*r)/(p+r)}
def get_text_y_yhat(self, predicted_labels, labels, original_texts, processed_texts):
labels_np = np.concatenate([label.numpy() for label in labels], axis=0)
output_labels_np = np.concatenate([predicted_label.numpy() for predicted_label in predicted_labels], axis=0)
asset = []
for idx in range(len(original_texts)):
asset.append({'original_text' : original_texts[idx],
'processed_texts' : processed_texts[idx],
'y': list(self.labels_to_int.keys())[labels_np[idx]],
'yhat': list(self.labels_to_int.keys())[output_labels_np[idx]]})
return [json.dumps(asset, ensure_ascii=False).encode('utf8')]
def save_and_get_cm_image(self, cm):
fname = os.path.join('dump', 'cm' + uuid.uuid4().hex + '.png')
df_cm = pd.DataFrame(cm, list(self.labels_to_int.keys()),list(self.labels_to_int.keys()))
pretty_plot_confusion_matrix(fname, df_cm, pred_val_axis='x')
return [fname]