Skip to content

Commit

Permalink
improve pyi
Browse files Browse the repository at this point in the history
  • Loading branch information
mh-northlander committed Jul 8, 2024
1 parent 706a573 commit 5d8620e
Showing 1 changed file with 31 additions and 16 deletions.
47 changes: 31 additions & 16 deletions python/py_src/sudachipy/sudachipy.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,20 @@ PartialPOS = Union[
Tuple[()],
]

# Fields that can be specified for partial dictionary loading.
# See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html.
"""
Fields that can be specified for partial dictionary loading.
See https://worksapplications.github.io/sudachi.rs/python/topics/subsetting.html.
"""
FieldSet = Optional[Set[Literal["surface", "pos", "normalized_form", "dictionary_form", "reading_form",
"word_structure", "split_a", "split_b", "synonym_group_id"]]]


"""
Strings that can be parsed as SplitMode
"""
SplitModeStr = Literal["A", "a", "B", "b", "C", "c"]


class SplitMode:
"""
Unit to split text.
Expand All @@ -48,11 +56,12 @@ class SplitMode:
C: ClassVar[SplitMode] = ...

@classmethod
def __init__(cls, mode: str = "C") -> None:
def __init__(cls, mode: Optional[SplitModeStr] = "C") -> None:
"""
Creates a split mode from a string value.
:param mode: string representation of the split mode. One of [A,B,C] in captital or lower case.
If None, returns SplitMode.C.
"""
...

Expand Down Expand Up @@ -88,10 +97,10 @@ class Dictionary:
...

def create(self,
mode: Union[SplitMode, Literal["A", "B", "C"]] = SplitMode.C,
fields: FieldSet = None,
mode: Union[SplitMode, SplitModeStr, None] = SplitMode.C,
fields: Optional[FieldSet] = None,
*,
projection: str = None) -> Tokenizer:
projection: Optional[str] = None) -> Tokenizer:
"""
Creates a sudachi tokenizer.
Expand All @@ -118,12 +127,12 @@ class Dictionary:
...

def pre_tokenizer(self,
mode: Union[SplitMode, Literal["A", "B", "C"]] = "C",
fields: FieldSet = None,
mode: Union[SplitMode, SplitModeStr, None] = SplitMode.C,
fields: Optional[FieldSet] = None,
handler: Optional[Callable[[
int, object, MorphemeList], list]] = None,
*,
projection: str = None) -> object:
projection: Optional[str] = None) -> object:
"""
Creates HuggingFace Tokenizers-compatible PreTokenizer.
Requires package `tokenizers` to be installed.
Expand Down Expand Up @@ -230,7 +239,10 @@ class Morpheme:
"""
...

def split(self, mode: Union[SplitMode, Literal["A", "B", "C"]], out: Optional[MorphemeList] = None, add_single: bool = True) -> MorphemeList:
def split(self,
mode: Union[SplitMode, SplitModeStr],
out: Optional[MorphemeList] = None,
add_single: bool = True) -> MorphemeList:
"""
Returns sub-morphemes in the provided split mode.
Expand Down Expand Up @@ -288,7 +300,7 @@ class MorphemeList:
def __init__(self) -> None: ...

@classmethod
def empty(cls, dict) -> MorphemeList:
def empty(cls, dict: Dictionary) -> MorphemeList:
"""
Returns an empty morpheme list with dictionary.
"""
Expand All @@ -306,7 +318,7 @@ class MorphemeList:
"""
...

def __getitem__(self, index) -> Morpheme: ...
def __getitem__(self, index: int) -> Morpheme: ...
def __iter__(self) -> Iterator[Morpheme]: ...
def __len__(self) -> int: ...

Expand All @@ -318,11 +330,13 @@ class Tokenizer:
Create using Dictionary.create method.
"""
SplitMode: ClassVar[SplitMode] = ...

@classmethod
def __init__(cls) -> None: ...

def tokenize(self, text: str,
mode: Union[SplitMode, Literal["A", "B", "C"]] = ...,
def tokenize(self,
text: str,
mode: Union[SplitMode, SplitModeStr, None] = None,
out: Optional[MorphemeList] = None) -> MorphemeList:
"""
Break text into morphemes.
Expand Down Expand Up @@ -359,6 +373,7 @@ class WordInfo:
surface: ClassVar[str] = ...
synonym_group_ids: ClassVar[List[int]] = ...
word_structure: ClassVar[List[int]] = ...

@classmethod
def __init__(self) -> None: ...
def length(self) -> int: ...
Expand All @@ -374,11 +389,11 @@ class PosMatcher:
def __iter__(self) -> Iterator[POS]: ...
def __len__(self) -> int: ...

def __call__(self, m: Morpheme) -> bool:
def __call__(self, /, m: Morpheme) -> bool:
"""
Checks whether a morpheme has matching POS.
:param m: morpheme.
:param m: a morpheme to check.
:return: if morpheme has matching POS.
"""
...
Expand Down

0 comments on commit 5d8620e

Please sign in to comment.