diff --git a/src/wiktextract/extractor/zh/gloss.py b/src/wiktextract/extractor/zh/gloss.py index 8e9b98d8a..9074548e5 100644 --- a/src/wiktextract/extractor/zh/gloss.py +++ b/src/wiktextract/extractor/zh/gloss.py @@ -1,4 +1,3 @@ - from wikitextprocessor import NodeKind, WikiNode from wikitextprocessor.parser import TemplateNode from wiktextract.page import clean_node diff --git a/src/wiktextract/extractor/zh/linkage.py b/src/wiktextract/extractor/zh/linkage.py index 6885ada9c..266604381 100644 --- a/src/wiktextract/extractor/zh/linkage.py +++ b/src/wiktextract/extractor/zh/linkage.py @@ -13,6 +13,7 @@ ) from .descendant import DESCENDANT_TEMPLATES, extract_descendant_list_item from .models import Linkage, WordEntry +from .tags import translate_raw_tags def extract_linkages( @@ -54,6 +55,7 @@ def extract_linkages( linkage_data.raw_tags.append( clean_node(wxr, None, item_child).strip("()") ) + translate_raw_tags(linkage_data) elif template_name.lower() in DESCENDANT_TEMPLATES: not_term_indexes.add(index) extract_descendant_list_item( diff --git a/src/wiktextract/extractor/zh/tags.py b/src/wiktextract/extractor/zh/tags.py index b380e1e03..d1c33f8d9 100644 --- a/src/wiktextract/extractor/zh/tags.py +++ b/src/wiktextract/extractor/zh/tags.py @@ -84,12 +84,22 @@ # https://zh.wiktionary.org/wiki/Template:注释 LABEL_TAGS = { "棄用": "obsolete", + "非標準": "nonstandard", + "非正式": "informal", + "古舊": "dated", + "新詞": "neologism", + "定語": "attributive", + "書面": "literary", + "貶義": "derogatory", + "比喻": "figuratively", + "俗語": "slang", "古": "archaic", "陽": "masculine", "陰": "feminine", "喻": "figuratively", "書": "literary", "口": "colloquial", + "口語": "colloquial", "俚": "slang", "俗": "slang", "方": "dialectal", diff --git a/tests/test_zh_linkage.py b/tests/test_zh_linkage.py index a2913bfd2..e2ba8295b 100644 --- a/tests/test_zh_linkage.py +++ b/tests/test_zh_linkage.py @@ -67,3 +67,20 @@ def test_ja_r_template(self): "word": "家主", }, ) + + def test_qual_tag(self): + page_data = [WordEntry(lang="漢語", lang_code="zh", word="駱駝")] + self.wxr.wtp.add_page("Template:qual", 10, "({{{1}}})") + self.wxr.wtp.add_page("Template:zh-l", 10, "{{{1}}}") + self.wxr.wtp.start_page("駱駝") + node = self.wxr.wtp.parse("* {{qual|比喻}} {{zh-l|沙漠之舟}}") + extract_linkages(self.wxr, page_data, node.children, "synonyms", "") + self.assertEqual( + [ + s.model_dump(exclude_defaults=True) + for s in page_data[0].synonyms + ], + [ + {"tags": ["figuratively"], "word": "沙漠之舟"}, + ], + )