-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathnarrativeqa_eval_utils.py
43 lines (39 loc) · 1.26 KB
/
narrativeqa_eval_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import copy
import json
import rouge
from os import listdir
from os.path import isfile, join, exists
# from rouge_score import rouge
rouge_l_evaluator = rouge.Rouge(
metrics=["rouge-l"],
max_n=4,
limit_length=True,
length_limit=100,
length_limit_type="words",
apply_avg=True,
apply_best=True,
alpha=0.5,
weight_factor=1.2,
stemming=True,
)
def rouge_l(p, g):
return rouge_l_evaluator.get_scores(p, g)
def metric_max_over_ground_truths_r(metric_fn, prediction, ground_truths):
scores_for_ground_truths = []
for ground_truth in ground_truths:
score = metric_fn(prediction, [ground_truth])
scores_for_ground_truths.append(score)
if isinstance(score, dict) and "rouge-l" in score:
max_score = copy.deepcopy(score)
max_score["rouge-l"]["f"] = round(
max([score["rouge-l"]["f"] for score in scores_for_ground_truths]), 2
)
max_score["rouge-l"]["p"] = round(
max([score["rouge-l"]["p"] for score in scores_for_ground_truths]), 2
)
max_score["rouge-l"]["r"] = round(
max([score["rouge-l"]["r"] for score in scores_for_ground_truths]), 2
)
return max_score
else:
return round(max(scores_for_ground_truths), 2)