From f57d2cc62c186593b614d318380b4bef5697f369 Mon Sep 17 00:00:00 2001 From: Gregor Middell Date: Tue, 26 Nov 2024 19:34:54 +0100 Subject: [PATCH] Fixes coverage test --- dwdsmor/build/benchmark.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dwdsmor/build/benchmark.py b/dwdsmor/build/benchmark.py index d45ce61..29fe5fe 100755 --- a/dwdsmor/build/benchmark.py +++ b/dwdsmor/build/benchmark.py @@ -115,14 +115,14 @@ def compute_coverage(automata, limit=None, show_progress=False): mismatches = defaultdict(Counter) for token in tokens: form, lemma, xpos = token - pos_candidates = {f"+{xpos}"}.union(dwdsmor_pos_tags.get(xpos, {})) - is_match = lemmatizer(form, pos_candidates) == lemma + pos_candidates = {f"+{xpos}"}.union(dwdsmor_pos_tags.get(xpos, set())) + is_match = lemmatizer(form, pos_candidates) is not None if not is_match and lemmatizer(lemma) is not None: # skip tokens where we can analyze the given lemma but not the form: # compounds are lemmatized to their basic words in German-UD/HDT continue registry = matches if is_match else mismatches - registry[xpos][lemma] += 1 + registry[xpos][form] += 1 coverage = [] @@ -130,7 +130,7 @@ def compute_coverage(automata, limit=None, show_progress=False): total_types = 0 total_type_matches = 0 total_token_matches = 0 - for registry in [matches, mismatches]: + for registry in (matches, mismatches): for _pos_tag, types in registry.items(): for _type, token_count in types.items(): total_types += 1 @@ -141,7 +141,7 @@ def compute_coverage(automata, limit=None, show_progress=False): tag_types = 0 tag_token_matches = 0 tag_type_matches = 0 - for registry, is_match in [(matches, True), (mismatches, False)]: + for registry, is_match in ((matches, True), (mismatches, False)): for _type, token_count in registry.get(pos_tag, {}).items(): tag_types += 1 tag_tokens += token_count