Skip to content

Commit

Permalink
Translate "qualifier" template tags in linkage list
Browse files Browse the repository at this point in the history
xxyzz committed Mar 1, 2024
1 parent 63d6a7a commit 7296a7e
Showing 4 changed files with 29 additions and 1 deletion.
1 change: 0 additions & 1 deletion src/wiktextract/extractor/zh/gloss.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

from wikitextprocessor import NodeKind, WikiNode
from wikitextprocessor.parser import TemplateNode
from wiktextract.page import clean_node
2 changes: 2 additions & 0 deletions src/wiktextract/extractor/zh/linkage.py
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@
)
from .descendant import DESCENDANT_TEMPLATES, extract_descendant_list_item
from .models import Linkage, WordEntry
from .tags import translate_raw_tags


def extract_linkages(
@@ -54,6 +55,7 @@ def extract_linkages(
linkage_data.raw_tags.append(
clean_node(wxr, None, item_child).strip("()")
)
translate_raw_tags(linkage_data)
elif template_name.lower() in DESCENDANT_TEMPLATES:
not_term_indexes.add(index)
extract_descendant_list_item(
10 changes: 10 additions & 0 deletions src/wiktextract/extractor/zh/tags.py
Original file line number Diff line number Diff line change
@@ -84,12 +84,22 @@
# https://zh.wiktionary.org/wiki/Template:注释
LABEL_TAGS = {
"棄用": "obsolete",
"非標準": "nonstandard",
"非正式": "informal",
"古舊": "dated",
"新詞": "neologism",
"定語": "attributive",
"書面": "literary",
"貶義": "derogatory",
"比喻": "figuratively",
"俗語": "slang",
"古": "archaic",
"陽": "masculine",
"陰": "feminine",
"喻": "figuratively",
"書": "literary",
"口": "colloquial",
"口語": "colloquial",
"俚": "slang",
"俗": "slang",
"方": "dialectal",
17 changes: 17 additions & 0 deletions tests/test_zh_linkage.py
Original file line number Diff line number Diff line change
@@ -67,3 +67,20 @@ def test_ja_r_template(self):
"word": "家主",
},
)

def test_qual_tag(self):
page_data = [WordEntry(lang="漢語", lang_code="zh", word="駱駝")]
self.wxr.wtp.add_page("Template:qual", 10, "({{{1}}})")
self.wxr.wtp.add_page("Template:zh-l", 10, "{{{1}}}")
self.wxr.wtp.start_page("駱駝")
node = self.wxr.wtp.parse("* {{qual|比喻}} {{zh-l|沙漠之舟}}")
extract_linkages(self.wxr, page_data, node.children, "synonyms", "")
self.assertEqual(
[
s.model_dump(exclude_defaults=True)
for s in page_data[0].synonyms
],
[
{"tags": ["figuratively"], "word": "沙漠之舟"},
],
)

0 comments on commit 7296a7e

Please sign in to comment.