Skip to content

Commit

Permalink
Blackening
Browse files Browse the repository at this point in the history
  • Loading branch information
vthorsteinsson committed Nov 19, 2020
1 parent c88a623 commit d0c3226
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/reynir/bintokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1322,7 +1322,9 @@ def eat_surnames(
yield token


def parse_phrases_3(token_stream: Iterator[Bin_TOK], token_ctor: TokenConstructor) -> Iterator[Bin_TOK]:
def parse_phrases_3(
token_stream: Iterator[Bin_TOK], token_ctor: TokenConstructor
) -> Iterator[Bin_TOK]:
""" Parse a stream of tokens looking for phrases and making substitutions.
Third pass: coalesce uppercase, otherwise unrecognized words with
a following person name, if any; also coalesce entity names and
Expand Down Expand Up @@ -1638,7 +1640,9 @@ def match(self, tq: List[Tok], ix: int) -> Iterable[Tok]:
yield self._token_ctor.Word(w, StaticPhrases.get_meaning(ix), token=tq)


def parse_static_phrases(token_stream: Iterator[Bin_TOK], token_ctor: TokenConstructor, auto_uppercase: bool) -> Iterator[Bin_TOK]:
def parse_static_phrases(
token_stream: Iterator[Bin_TOK], token_ctor: TokenConstructor, auto_uppercase: bool
) -> Iterator[Bin_TOK]:
""" Use the StaticPhraseStream class to process the token stream
and replace static phrases with single tokens """
sps = StaticPhraseStream(token_ctor, auto_uppercase)
Expand Down Expand Up @@ -1721,7 +1725,9 @@ def match(self, tq: List[Tok], ix: int) -> Iterable[Tok]:
yield token_ctor.Word(t.txt, mm, token=t)


def disambiguate_phrases(token_stream: Iterator[Bin_TOK], token_ctor: TokenConstructor) -> Iterator[Bin_TOK]:
def disambiguate_phrases(
token_stream: Iterator[Bin_TOK], token_ctor: TokenConstructor
) -> Iterator[Bin_TOK]:

""" Parse a stream of tokens looking for common ambiguous multiword phrases
(i.e. phrases that have a well known very likely interpretation but
Expand Down

0 comments on commit d0c3226

Please sign in to comment.