From a8c21b5cb4d63f77039169093055586fc74e8119 Mon Sep 17 00:00:00 2001 From: Kaushik Acharya Date: Sun, 4 Oct 2020 19:24:34 +0530 Subject: [PATCH 1/2] classification_report outputs string/dict as requested in issue #41 --- seqeval/metrics/sequence_labeling.py | 80 +++++++++++++++++----------- 1 file changed, 48 insertions(+), 32 deletions(-) diff --git a/seqeval/metrics/sequence_labeling.py b/seqeval/metrics/sequence_labeling.py index ac56f18..8f85acb 100644 --- a/seqeval/metrics/sequence_labeling.py +++ b/seqeval/metrics/sequence_labeling.py @@ -301,16 +301,17 @@ def performance_measure(y_true, y_pred): return performance_dict -def classification_report(y_true, y_pred, digits=2, suffix=False): +def classification_report(y_true, y_pred, digits=2, suffix=False, output_dict=False): """Build a text report showing the main classification metrics. Args: y_true : 2d array. Ground truth (correct) target values. y_pred : 2d array. Estimated targets as returned by a classifier. digits : int. Number of digits for formatting output floating point values. + output_dict : bool(default=False). If True, return output as dict else str. Returns: - report : string. Text summary of the precision, recall, F1 score for each class. + report : string/dict. Summary of the precision, recall, F1 score for each class. Examples: >>> from seqeval.metrics import classification_report @@ -324,6 +325,7 @@ def classification_report(y_true, y_pred, digits=2, suffix=False): micro avg 0.50 0.50 0.50 2 macro avg 0.50 0.50 0.50 2 + weighted avg 0.50 0.50 0.50 2 """ true_entities = set(get_entities(y_true, suffix)) @@ -338,15 +340,17 @@ def classification_report(y_true, y_pred, digits=2, suffix=False): for e in pred_entities: d2[e[0]].add((e[1], e[2])) - last_line_heading = 'weighted avg' - width = max(name_width, len(last_line_heading), digits) + if output_dict: + report_dict = dict() + else: + last_line_heading = 'weighted avg' + width = max(name_width, len(last_line_heading), digits) + headers = ["precision", "recall", "f1-score", "support"] + head_fmt = u'{:>{width}s} ' + u' {:>9}' * len(headers) + report = head_fmt.format(u'', *headers, width=width) + report += u'\n\n' - headers = ["precision", "recall", "f1-score", "support"] - head_fmt = u'{:>{width}s} ' + u' {:>9}' * len(headers) - report = head_fmt.format(u'', *headers, width=width) - report += u'\n\n' - - row_fmt = u'{:>{width}s} ' + u' {:>9.{digits}f}' * 3 + u' {:>9}\n' + row_fmt = u'{:>{width}s} ' + u' {:>9.{digits}f}' * 3 + u' {:>9}\n' ps, rs, f1s, s = [], [], [], [] for type_name in sorted(d1.keys()): @@ -360,33 +364,45 @@ def classification_report(y_true, y_pred, digits=2, suffix=False): r = nb_correct / nb_true if nb_true > 0 else 0 f1 = 2 * p * r / (p + r) if p + r > 0 else 0 - report += row_fmt.format(*[type_name, p, r, f1, nb_true], width=width, digits=digits) + if output_dict: + report_dict[type_name] = {'precision': p, 'recall': r, 'f1-score': f1, 'support': nb_true} + else: + report += row_fmt.format(*[type_name, p, r, f1, nb_true], width=width, digits=digits) ps.append(p) rs.append(r) f1s.append(f1) s.append(nb_true) - report += u'\n' + if not output_dict: + report += u'\n' # compute averages - report += row_fmt.format('micro avg', - precision_score(y_true, y_pred, suffix=suffix), - recall_score(y_true, y_pred, suffix=suffix), - f1_score(y_true, y_pred, suffix=suffix), - np.sum(s), - width=width, digits=digits) - report += row_fmt.format('macro avg', - np.average(ps), - np.average(rs), - np.average(f1s), - np.sum(s), - width=width, digits=digits) - report += row_fmt.format(last_line_heading, - np.average(ps, weights=s), - np.average(rs, weights=s), - np.average(f1s, weights=s), - np.sum(s), - width=width, digits=digits) - - return report + nb_true = np.sum(s) + + for avg_type in ['micro avg', 'macro avg', 'weighted avg']: + if avg_type == 'micro avg': + # micro average + p = precision_score(y_true, y_pred, suffix=suffix) + r = recall_score(y_true, y_pred, suffix=suffix) + f1 = f1_score(y_true, y_pred, suffix=suffix) + elif avg_type == 'macro avg': + # macro average + p = np.average(ps) + r = np.average(rs) + f1 = np.average(f1s) + elif avg_type == 'weighted avg': + # weighted average + p = np.average(ps, weights=s) + r = np.average(rs, weights=s) + f1 = np.average(f1s, weights=s) + + if output_dict: + report_dict[avg_type] = {'precision': p, 'recall': r, 'f1-score': f1, 'support': nb_true} + else: + report += row_fmt.format(*[avg_type, p, r, f1, nb_true], width=width, digits=digits) + + if output_dict: + return report_dict + else: + return report From 4216f3b7c694f60b70015447c033254d9aeb866d Mon Sep 17 00:00:00 2001 From: Kaushik Acharya Date: Sun, 4 Oct 2020 23:43:02 +0530 Subject: [PATCH 2/2] Considering all the average types string for width --- seqeval/metrics/sequence_labeling.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/seqeval/metrics/sequence_labeling.py b/seqeval/metrics/sequence_labeling.py index 8f85acb..e29d4d6 100644 --- a/seqeval/metrics/sequence_labeling.py +++ b/seqeval/metrics/sequence_labeling.py @@ -340,11 +340,13 @@ def classification_report(y_true, y_pred, digits=2, suffix=False, output_dict=Fa for e in pred_entities: d2[e[0]].add((e[1], e[2])) + avg_types = ['micro avg', 'macro avg', 'weighted avg'] + if output_dict: report_dict = dict() else: - last_line_heading = 'weighted avg' - width = max(name_width, len(last_line_heading), digits) + avg_width = max([len(x) for x in avg_types]) + width = max(name_width, avg_width, digits) headers = ["precision", "recall", "f1-score", "support"] head_fmt = u'{:>{width}s} ' + u' {:>9}' * len(headers) report = head_fmt.format(u'', *headers, width=width) @@ -380,7 +382,7 @@ def classification_report(y_true, y_pred, digits=2, suffix=False, output_dict=Fa # compute averages nb_true = np.sum(s) - for avg_type in ['micro avg', 'macro avg', 'weighted avg']: + for avg_type in avg_types: if avg_type == 'micro avg': # micro average p = precision_score(y_true, y_pred, suffix=suffix) @@ -396,6 +398,8 @@ def classification_report(y_true, y_pred, digits=2, suffix=False, output_dict=Fa p = np.average(ps, weights=s) r = np.average(rs, weights=s) f1 = np.average(f1s, weights=s) + else: + assert False, "unexpected average: {}".format(avg_type) if output_dict: report_dict[avg_type] = {'precision': p, 'recall': r, 'f1-score': f1, 'support': nb_true}