From 63d6a7a7a335414d9fddb0ce959b134cb295828c Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Fri, 1 Mar 2024 16:16:21 +0800
Subject: [PATCH] Translate some "label" and "qualifier" template raw tags

---
 src/wiktextract/extractor/zh/gloss.py       |  3 ++-
 src/wiktextract/extractor/zh/tags.py        | 28 +++++++++++++++++++--
 src/wiktextract/extractor/zh/translation.py |  3 ++-
 tests/test_zh_gloss.py                      |  8 +++---
 tests/test_zh_translation.py                |  3 +--
 5 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/wiktextract/extractor/zh/gloss.py b/src/wiktextract/extractor/zh/gloss.py
index c50a808c6..8e9b98d8a 100644
--- a/src/wiktextract/extractor/zh/gloss.py
+++ b/src/wiktextract/extractor/zh/gloss.py
@@ -1,4 +1,3 @@
-import re
 
 from wikitextprocessor import NodeKind, WikiNode
 from wikitextprocessor.parser import TemplateNode
@@ -8,6 +7,7 @@
 from ..ruby import extract_ruby
 from .example import extract_examples
 from .models import Sense, WordEntry
+from .tags import translate_raw_tags
 
 # https://zh.wiktionary.org/wiki/Template:Label
 LABEL_TEMPLATES = frozenset(["lb", "lbl", "label"])
@@ -62,4 +62,5 @@ def extract_gloss(
                     extract_examples(wxr, new_gloss_data, child_node)
 
         if not has_nested_gloss:
+            translate_raw_tags(new_gloss_data)
             page_data[-1].senses.append(new_gloss_data)
diff --git a/src/wiktextract/extractor/zh/tags.py b/src/wiktextract/extractor/zh/tags.py
index 26bbe5d65..b380e1e03 100644
--- a/src/wiktextract/extractor/zh/tags.py
+++ b/src/wiktextract/extractor/zh/tags.py
@@ -78,12 +78,36 @@
     **VOICE_TAGS,
 }
 
+# https://zh.wiktionary.org/wiki/Template:Label
+# https://zh.wiktionary.org/wiki/Template:Qualifier
+# https://zh.wiktionary.org/wiki/Template:古
+# https://zh.wiktionary.org/wiki/Template:注释
+LABEL_TAGS = {
+    "棄用": "obsolete",
+    "古": "archaic",
+    "陽": "masculine",
+    "陰": "feminine",
+    "喻": "figuratively",
+    "書": "literary",
+    "口": "colloquial",
+    "俚": "slang",
+    "俗": "slang",
+    "方": "dialectal",
+    "废": "obsolete",
+    "貶": "derogatory",
+    "罕": "rare",
+    "引": "broadly",
+}
+
+
+ALL_TAGS = {**GRAMMATICAL_TAGS, **LABEL_TAGS}
+
 
 def translate_raw_tags(data: WordEntry) -> WordEntry:
     raw_tags = []
     for raw_tag in data.raw_tags:
-        if raw_tag.lower() in GRAMMATICAL_TAGS:
-            data.tags.append(GRAMMATICAL_TAGS[raw_tag.lower()])
+        if raw_tag.lower() in ALL_TAGS:
+            data.tags.append(ALL_TAGS[raw_tag.lower()])
         else:
             raw_tags.append(raw_tag)
     data.raw_tags = raw_tags
diff --git a/src/wiktextract/extractor/zh/translation.py b/src/wiktextract/extractor/zh/translation.py
index 29a54fcab..b069a5e88 100644
--- a/src/wiktextract/extractor/zh/translation.py
+++ b/src/wiktextract/extractor/zh/translation.py
@@ -8,7 +8,7 @@
 
 from .models import Translation, WordEntry
 from .section_titles import TRANSLATIONS_TITLES
-from .tags import TEMPLATE_TAG_ARGS
+from .tags import TEMPLATE_TAG_ARGS, translate_raw_tags
 
 
 def extract_translation(
@@ -134,6 +134,7 @@ def process_translation_list_item(
             tr_data.word = clean_node(wxr, None, child)
 
     if len(tr_data.word) > 0:
+        translate_raw_tags(tr_data)
         page_data[-1].translations.append(tr_data.model_copy(deep=True))
 
 
diff --git a/tests/test_zh_gloss.py b/tests/test_zh_gloss.py
index 5beeed8a6..28ac6147f 100644
--- a/tests/test_zh_gloss.py
+++ b/tests/test_zh_gloss.py
@@ -57,19 +57,19 @@ def test_example_list(self) -> None:
                 {"glosses": ["好玩的：", "不合理的，不合邏輯的"]},
                 {
                     "glosses": ["有趣的：", "有趣的"],
-                    "raw_tags": ["棄用"],
+                    "tags": ["obsolete"],
                 },
                 {
                     "glosses": ["有趣的：", "美味的"],
-                    "raw_tags": ["棄用"],
+                    "tags": ["obsolete"],
                 },
                 {
                     "glosses": ["有趣的：", "漂亮的"],
-                    "raw_tags": ["棄用"],
+                    "tags": ["obsolete"],
                 },
                 {
                     "glosses": ["有趣的：", "很好的，卓越的"],
-                    "raw_tags": ["棄用"],
+                    "tags": ["obsolete"],
                 },
             ],
         )
diff --git a/tests/test_zh_translation.py b/tests/test_zh_translation.py
index 42cbcc1f6..921ef3b4c 100644
--- a/tests/test_zh_translation.py
+++ b/tests/test_zh_translation.py
@@ -212,8 +212,7 @@ def test_l_template(self):
                     "lang_code": "cs",
                     "lang": "捷克语",
                     "word": "patližán",
-                    "tags": ["masculine"],
-                    "raw_tags": ["口"],
+                    "tags": ["masculine", "colloquial"],
                 },
             ],
         )