Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
t-yamamura committed Nov 10, 2021
1 parent 438cb54 commit c23c9e7
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions sudachitra/pretokenizer/sudachipy_pretokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ def custom_split(self, i: int, normalized_string: NormalizedString) -> List[Norm
List[NormalizedString]: List of normalized_strings.
"""
morphs = super().tokenize(str(normalized_string).strip())
tokens = list(map(lambda m: m.surface(), morphs))
tokens = [m.surface() for m in morphs if m.surface() != ""]
normalized_strings = self.split_normalized_string(normalized_string, tokens)
if not (len(morphs) == len(tokens) == len(normalized_strings)):
if len(tokens) != len(normalized_strings):
raise ValueError(len(morphs), len(tokens), len(normalized_strings), tokens, normalized_strings)

if self.word_form_type != 'surface':
Expand Down

0 comments on commit c23c9e7

Please sign in to comment.