From 026016da48322cfc064d06863c1a9f66f77826d7 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Tue, 2 May 2023 12:30:48 +0200 Subject: [PATCH] Replace deprecated levenshtein method in NEL benchmark (#192) --- benchmarks/nel/requirements.txt | 1 + benchmarks/nel/scripts/candidate_generation/embeddings.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/nel/requirements.txt b/benchmarks/nel/requirements.txt index f043d02bc..a1ab4db43 100644 --- a/benchmarks/nel/requirements.txt +++ b/benchmarks/nel/requirements.txt @@ -3,6 +3,7 @@ tqdm prettytable scikit-learn fuzzyset2 +rapidfuzz>=2.0.0 spacyfishing virtualenv pysqlite3-binary diff --git a/benchmarks/nel/scripts/candidate_generation/embeddings.py b/benchmarks/nel/scripts/candidate_generation/embeddings.py index 66ee33ef9..38c412d11 100644 --- a/benchmarks/nel/scripts/candidate_generation/embeddings.py +++ b/benchmarks/nel/scripts/candidate_generation/embeddings.py @@ -7,7 +7,7 @@ from spacy.tokens import Span from .base import NearestNeighborCandidateSelector from compat import KnowledgeBase -from rapidfuzz.string_metric import normalized_levenshtein +from rapidfuzz.distance.Levenshtein import normalized_similarity class EmbeddingCandidateSelector(NearestNeighborCandidateSelector): @@ -39,7 +39,7 @@ def _fetch_candidates( candidate_entity_ids: Set[str] = set() for nne in nn_entities: for name in nn_entities[nne].aliases: - if normalized_levenshtein(name.lower(), span.text.lower()) / 100 >= lexical_similarity_cutoff: + if normalized_similarity(name.lower(), span.text.lower()) / 100 >= lexical_similarity_cutoff: candidate_entity_ids.add(nne) break