fix bug related to https://github.com/WorksApplications/SudachiPy/iss…

…ues/160
WorksApplications · Nov 10, 2021 · c23c9e7 · c23c9e7
1 parent 438cb54
commit c23c9e7
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/sudachitra/pretokenizer/sudachipy_pretokenizer.py b/sudachitra/pretokenizer/sudachipy_pretokenizer.py
@@ -96,9 +96,9 @@ def custom_split(self, i: int, normalized_string: NormalizedString) -> List[Norm
             List[NormalizedString]: List of normalized_strings.
         """
         morphs = super().tokenize(str(normalized_string).strip())
-        tokens = list(map(lambda m: m.surface(), morphs))
+        tokens = [m.surface() for m in morphs if m.surface() != ""]
         normalized_strings = self.split_normalized_string(normalized_string, tokens)
-        if not (len(morphs) == len(tokens) == len(normalized_strings)):
+        if len(tokens) != len(normalized_strings):
             raise ValueError(len(morphs), len(tokens), len(normalized_strings), tokens, normalized_strings)
 
         if self.word_form_type != 'surface':