Skip to content

Commit

Permalink
Merge pull request #110 from eubinecto/issue_109
Browse files Browse the repository at this point in the history
version 3.2.3 -
  • Loading branch information
eubinecto authored Mar 10, 2023
2 parents 80a5f0d + c01b384 commit 2dc1ec8
Show file tree
Hide file tree
Showing 10 changed files with 212 additions and 74 deletions.
54 changes: 54 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,60 @@ pprint(" ".join([styler(sent, 2) for sent in sents])) # 2 = formal
'횡하게 달아나는 겝니다.')
```

### 4️⃣ `add_rules` of your own

you can add your own rules with `add_rules` method:
```python3
styler.add_rules(
{"이🏷VCP🔗(?P<MASK>다🏷EF)": (
{"다🏷EF"},
{"에요🏷EF"}, # 에요.
{"습니다🏷EF"},
)
})
sent = "한글은 한국의 글자이다."
print(styler(sent, 1))
```
```text
한글은 한국의 글자에요.
```
You can add multiple rules altogether too. Use `politely.SELF` to refer to the original word.
```python3
from politely import SELF
styler.add_rules(
{
r"(?P<MASK>(아빠|아버지|아버님)🏷NNG)": (
{f"아빠🏷NNG"},
{f"아버지🏷NNG", f"아버님🏷NNG"},
{f"아버지🏷NNG", f"아버님🏷NNG"}
),
r"(아빠|아버지|아버님)🏷NNG🔗(?P<MASK>\S+?🏷JKS)": (
{SELF}, # no change, replace with the original
{f"께서🏷JKS"},
{f"께서🏷JKS"}
),
r"(?P<MASK>ᆫ다🏷EF)": (
{SELF}, # no change, replace with the original
{"시🏷EP🔗어요🏷EF"},
{"시🏷EP🔗습니다🏷EF"},
)
}
)
sent = "아빠가 정실에 들어간다."
print(styler(sent, 1))
from pprint import pprint
pprint(styler.logs['guess']['out']) # you can look up the candidates from here
```
```text
아버지께서 정실에 들어가셔요.
[(['아버지🏷NNG', '께서🏷JKS', '정실🏷NNG', '에🏷JKB', '들어가🏷VV', '시🏷EP', '어요🏷EF', '.🏷SF'],
0.0125),
(['아버님🏷NNG', '께서🏷JKS', '정실🏷NNG', '에🏷JKB', '들어가🏷VV', '시🏷EP', '어요🏷EF', '.🏷SF'],
0.0125)]
```



## Hosting the interactive demo

You can either host the interactive demo locally ([you first have to sign up for papago API to get your secrets](https://developers.naver.com/docs/papago/README.md))
Expand Down
12 changes: 0 additions & 12 deletions config.yaml

This file was deleted.

13 changes: 13 additions & 0 deletions explore/add_rules_eg_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from politely import Styler
styler = Styler()
sent = "한글은 한국의 글자이다."
styler.rules.clear() # just for demonstration
print(styler(sent, 1)) # should be wrong
styler.add_rules(
{"이🏷VCP🔗(?P<MASK>다🏷EF)": (
{"다🏷EF"},
{"에요🏷EF"}, # 에요.
{"습니다🏷EF"},
)
})
print(styler(sent, 1)) # should be this
27 changes: 27 additions & 0 deletions explore/add_rules_eg_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from pprint import pprint

from politely import SELF, Styler
styler = Styler()
styler.rules.clear()
sent = "아빠가 정실에 들어간다."
styler.add_rules(
{
r"(?P<MASK>(아빠|아버지|아버님)🏷NNG)": (
{f"아빠🏷NNG"},
{f"아버지🏷NNG", f"아버님🏷NNG"},
{f"아버지🏷NNG", f"아버님🏷NNG"}
),
r"(아빠|아버지|아버님)🏷NNG🔗(?P<MASK>\S+?🏷JKS)": (
{SELF},
{f"께서🏷JKS"},
{f"께서🏷JKS"}
),
r"(?P<MASK>ᆫ다🏷EF)": (
{SELF},
{"시🏷EP🔗어요🏷EF"},
{"시🏷EP🔗습니다🏷EF"},
)
}
)
print(styler(sent, 1))
pprint(styler.logs['guess']['out'])
4 changes: 2 additions & 2 deletions politely/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from politely.rules import SEP, TAG, NULL, MASK, SELF, RULES, PREFERENCES, CASUAL, POLITE, FORMAL # noqa
from politely.rules import SEP, TAG, NULL, SELF, RULES, PREFERENCES, CASUAL, POLITE, FORMAL # noqa
from politely.styler import Styler # noqa

__version__ = "v3.2.2"
__version__ = "v3.2.3"
17 changes: 16 additions & 1 deletion politely/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class SFNotIncludedError(Exception):
"""
Exception raised when a sentence fragment is not included in the SF list.
An exception raised when a sentence does not include a SF.
"""

def __init__(self, out: str):
Expand All @@ -15,7 +15,22 @@ def __str__(self) -> str:
return "Sentence does not include a SF:\n" + "\n".join(self.out)


class EFNotIncludedError(Exception):
"""
An exception raised when a sentence does not include an EF.
"""

def __init__(self, out: str):
self.out = out

def __str__(self) -> str:
return "Sentence does not include an EF:\n" + "\n".join(self.out)


class EFNotSupportedError(Exception):
"""
An exception raised when a sentence includes an EF that is not supported.
"""
def __init__(self, out: str):
self.out = out

Expand Down
11 changes: 0 additions & 11 deletions politely/fetchers.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,4 @@
from kiwipiepy import Kiwi
from politely.scorer import Scorer


def fetch_scorer() -> Scorer:
"""
use fetch_scorer to
"""
# as of right now, Scorer is not really "fetched".
# we define this function nevertheless, as we will need this by the time
# we use n-grams for the scorer.
return Scorer()


def fetch_kiwi() -> Kiwi:
Expand Down
47 changes: 20 additions & 27 deletions politely/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,7 @@
NULL = "❌"
TAG = "🏷"
SEP = "🔗"
MASK = "MASK"

# --- regex --- #
ALL = rf"[^\s{SEP}{TAG}]" # all characters except whitespace, sep and tag
EFS = rf"(?P<{MASK}>{ALL}+?{TAG}EF)"
SELF = rf"\g<{MASK}>"
WITH_JS = rf"[{''.join({chr(i) for i in range(44032, 55204)} - {chr(44032 + 28 * i) for i in range(399)})}]"

# --- all EF's of different styles --- #
CASUAL = {
Expand All @@ -31,6 +25,7 @@

POLITE = {
f"어요{TAG}EF",
f"시{TAG}EP{SEP}어요{TAG}EF",
f"에요{TAG}EF",
f"죠{TAG}EF",
f"래요{TAG}EF",
Expand All @@ -44,6 +39,7 @@

FORMAL = {
f"습니다{TAG}EF",
f"시{TAG}EP{SEP}습니다{TAG}EF",
f"습니까{TAG}EF",
f"ᆸ니까{TAG}EF",
f"ᆸ시오{TAG}EF",
Expand All @@ -52,6 +48,12 @@
}


# --- regex --- #
EFS = rf"(?P<MASK>({'|'.join([pair for pair in (CASUAL | POLITE | FORMAL)])}))"
SELF = rf"\g<MASK>"
WITH_JONG_SUNG = rf"[{''.join({chr(i) for i in range(44032, 55204)} - {chr(44032 + 28 * i) for i in range(399)})}]"


# --- programmatically populated RULES --- #
RULES: Dict[str, Tuple[Set[str], Set[str], Set[str]]] = dict()

Expand All @@ -64,19 +66,10 @@
)
})

# --- 시/EP (1): 시/으시로 끝나지 않는 VV의 경우, 뒤에 시 or 으시가 필요할 수도 있다 --- #
RULES.update({
rf"(?P<{MASK}>{ALL}+?{TAG}VV){SEP}(?!(시|으시){TAG}EP)": (
{SELF},
{SELF, rf"{SELF}{SEP}{TAG}EP", rf"{SELF}{SEP}으시{TAG}EP"}, # we should be able to do back-referencing
{SELF, rf"{SELF}{SEP}{TAG}EP", rf"{SELF}{SEP}으시{TAG}EP"}
)
})

# --- 시/EP (2): 이미 시/EP가 존재하는 경우, 반말을 쓸 때 제거한다 --- #
# --- 시/EP: 이미 시/EP가 존재하는 경우, 반말을 쓸 때 제거한다 --- #
RULES.update(
{
rf"(?P<{MASK}>(시|으시){TAG}EP)": (
rf"(?P<MASK>(시|으시){TAG}EP)": (
{NULL}, # you don't use them
{SELF}, # just repeat yourself
{SELF}, # just repeat yourself
Expand All @@ -88,7 +81,7 @@
# --- 종성이 있는 경우, 종성으로 시작하는 EF는 사용하지 않음 --- #
RULES.update(
{
rf"{WITH_JS}{TAG}[A-Z\-]+?{SEP}{EFS}": (
rf"{WITH_JONG_SUNG}{TAG}[A-Z\-]+?{SEP}{EFS}": (
CASUAL - {f"ᆫ다{TAG}EF", f"ᆯ게{TAG}EF", f"ᆫ대{TAG}EF"},
POLITE - {f"ᆯ게요{TAG}EF", f"ᆫ대요{TAG}EF", f"ᆫ가요{TAG}EF"},
FORMAL - {f"ᆸ니까{TAG}EF", f"ᆸ시오{TAG}EF", f"ᆸ니다{TAG}EF", f"ᆸ시다{TAG}EF"}
Expand All @@ -111,7 +104,7 @@
# --- 나/저 --- #
RULES.update(
{
rf"(?P<{MASK}>(나|저){TAG}NP)": (
rf"(?P<MASK>(나|저){TAG}NP)": (
{f"나{TAG}NP"},
{f"저{TAG}NP"},
{f"저{TAG}NP"}
Expand All @@ -123,7 +116,7 @@
# --- 너/당신 --- #
RULES.update(
{
rf"(?P<{MASK}>(너|당신){TAG}NP)": (
rf"(?P<MASK>(너|당신){TAG}NP)": (
{f"너{TAG}NP"},
{f"당신{TAG}NP"},
{f"당신{TAG}NP"}
Expand All @@ -135,10 +128,10 @@
# --- 엄마/어머니 --- #
RULES.update( # noqa
{
rf"(?P<{MASK}>(엄마|어머니){TAG}NNG)": (
rf"(?P<MASK>(엄마|어머니|어머님){TAG}NNG)": (
{f"엄마{TAG}NNG"},
{f"어머니{TAG}NNG"},
{f"어머니{TAG}NNG"}
{f"어머니{TAG}NNG", f"어머님{TAG}NNG"},
{f"어머니{TAG}NNG", f"어머님{TAG}NNG"}
)
}
)
Expand All @@ -147,18 +140,18 @@
# --- 아빠/아버지 --- #
RULES.update(
{
rf"(?P<{MASK}>(아빠|아버지){TAG}NNG)": (
{f"아빠{TAG}NNG"},
rf"(?P<MASK>(아빠|아버지|아버님){TAG}NNG)": (
{f"아빠{TAG}NNG"},
{f"아빠{TAG}NNG"}
{f"아버지{TAG}NNG", f"아버님{TAG}NNG"},
{f"아버지{TAG}NNG", f"아버님{TAG}NNG"}
)
}
)

# --- 께서 --- #
RULES.update(
{
rf"(엄마|어머니|아빠|아버지|선생님|할머니|할아버지){TAG}NNG{SEP}(?P<{MASK}>{SEP}{ALL}{TAG}JKS)": (
rf"(어머니|어머님|아버지|아버님|선생님|할머니|할아버지){TAG}NNG{SEP}(?P<MASK>\S+?{TAG}JKS)": (
{SELF},
{f"께서{TAG}JKS"},
{f"께서{TAG}JKS"}
Expand Down
Loading

0 comments on commit 2dc1ec8

Please sign in to comment.