From aaf5f7aa72901dc22fd0713f607e99a609b4b83b Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 29 Jan 2024 14:21:47 +0800
Subject: [PATCH 1/5] Parse zh edition "t*" translation templates

High quality pages use "t*" templates in translation list, we could
get the language code from "t" template argument if the language name
text can't be converted to code.
---
 src/wiktextract/extractor/fr/models.py      |   2 +-
 src/wiktextract/extractor/zh/models.py      |   8 +-
 src/wiktextract/extractor/zh/page.py        |   5 +-
 src/wiktextract/extractor/zh/translation.py | 211 ++++++++++----------
 tests/test_zh_gloss.py                      |   2 +-
 tests/test_zh_headword.py                   |   2 +-
 tests/test_zh_translation.py                | 114 +++++------
 7 files changed, 172 insertions(+), 172 deletions(-)

diff --git a/src/wiktextract/extractor/fr/models.py b/src/wiktextract/extractor/fr/models.py
index dd8b5021a..2ad0137cf 100644
--- a/src/wiktextract/extractor/fr/models.py
+++ b/src/wiktextract/extractor/fr/models.py
@@ -66,7 +66,7 @@ class Linkage(FrenchBaseModel):
     word: str = ""
     tags: list[str] = []
     roman: str = ""
-    alt: str = Field("", description="ALternative form")
+    alt: str = Field("", description="Alternative form")
     translation: str = Field("", description="French translation")
     sense: str = Field("", description="Definition of the word")
     sense_index: int = Field(
diff --git a/src/wiktextract/extractor/zh/models.py b/src/wiktextract/extractor/zh/models.py
index 86b89e591..674aab89f 100644
--- a/src/wiktextract/extractor/zh/models.py
+++ b/src/wiktextract/extractor/zh/models.py
@@ -65,10 +65,12 @@ class Translation(ChineseBaseModel):
         "", description="Wiktionary language code of the translation term"
     )
     lang: str = Field("", description="Translation language name")
-    word: str = Field("", description="Translation term")
+    word: str = Field(description="Translation term")
     sense: str = Field("", description="Translation gloss")
     tags: list[str] = []
-    roman: str = ""
+    roman: str = Field("", description="Roman script")
+    alt: str = Field("", description="Alternative form")
+    lit: str = Field("", description="Literal translation for the term")
 
 
 class Linkage(ChineseBaseModel):
@@ -127,5 +129,5 @@ class WordEntry(ChineseBaseModel):
     descendants: list[Descendant] = []
     redirects: list[str] = Field(
         [],
-        description="Soft redirect page, extracted from template zh-see and ja-see",
+        description="Soft redirect page, extracted from template zh-see ja-see",
     )
diff --git a/src/wiktextract/extractor/zh/page.py b/src/wiktextract/extractor/zh/page.py
index f942d8c5b..61f35eb3d 100644
--- a/src/wiktextract/extractor/zh/page.py
+++ b/src/wiktextract/extractor/zh/page.py
@@ -31,8 +31,6 @@
 # Additional templates to be expanded in the pre-expand phase
 ADDITIONAL_EXPAND_TEMPLATES = frozenset(
     {
-        "multitrans",
-        "multitrans-nowiki",
         "col1",
         "col2",
         "col3",
@@ -198,6 +196,9 @@ def extract_pronunciation(
 def parse_page(
     wxr: WiktextractContext, page_title: str, page_text: str
 ) -> list[dict[str, Any]]:
+    # page layout documents
+    # https://zh.wiktionary.org/wiki/Wiktionary:佈局解釋
+    # https://zh.wiktionary.org/wiki/Wiktionary:体例说明
     if wxr.config.verbose:
         logging.info(f"Parsing page: {page_title}")
 
diff --git a/src/wiktextract/extractor/zh/translation.py b/src/wiktextract/extractor/zh/translation.py
index 8978ef375..8b28f78ba 100644
--- a/src/wiktextract/extractor/zh/translation.py
+++ b/src/wiktextract/extractor/zh/translation.py
@@ -1,127 +1,128 @@
-import re
 from typing import Optional, Union
 
-from mediawiki_langcodes import name_to_code
+from mediawiki_langcodes import code_to_name, name_to_code
 from wikitextprocessor import NodeKind, WikiNode
-from wikitextprocessor.parser import LEVEL_KIND_FLAGS
+from wikitextprocessor.parser import LEVEL_KIND_FLAGS, TemplateNode
 from wiktextract.page import clean_node
 from wiktextract.wxr_context import WiktextractContext
 
-from ..share import capture_text_in_parentheses
 from .models import Translation, WordEntry
 
 
 def extract_translation(
-    wxr: WiktextractContext, page_data: list[WordEntry], node: WikiNode
+    wxr: WiktextractContext,
+    page_data: list[WordEntry],
+    level_node: WikiNode,
+    sense: str = "",
 ) -> None:
-    sense_text = ""
-    for child in node.children:
-        if isinstance(child, WikiNode):
-            if child.kind == NodeKind.TEMPLATE:
-                template_name = child.template_name.lower()
-                if (
-                    template_name in {"trans-top", "翻譯-頂", "trans-top-also"}
-                    and 1 in child.template_parameters
-                ):
-                    sense_text = clean_node(
-                        wxr, None, child.template_parameters.get(1)
-                    )
-                elif template_name == "checktrans-top":
-                    return
-                elif template_name == "see translation subpage":
-                    translation_subpage(
-                        wxr, page_data, child.template_parameters
-                    )
-            elif child.kind == NodeKind.LIST:
-                for list_item_node in child.find_child(NodeKind.LIST_ITEM):
-                    if not list_item_node.contain_node(NodeKind.LIST):
-                        process_translation_list_item(
-                            wxr,
-                            page_data,
-                            clean_node(wxr, None, list_item_node.children),
-                            sense_text,
-                        )
-                    else:
-                        nested_list_index = 0
-                        for index, item_child in enumerate(
-                            list_item_node.children
-                        ):
-                            if (
-                                isinstance(item_child, WikiNode)
-                                and item_child.kind == NodeKind.LIST
-                            ):
-                                nested_list_index = index
-                                break
-
-                        process_translation_list_item(
-                            wxr,
-                            page_data,
-                            clean_node(
-                                wxr,
-                                None,
-                                list_item_node.children[:nested_list_index],
-                            ),
-                            sense_text,
-                        )
-                        for nested_list_node in list_item_node.find_child(
-                            NodeKind.LIST
-                        ):
-                            for nested_list_item in nested_list_node.find_child(
-                                NodeKind.LIST_ITEM
-                            ):
-                                process_translation_list_item(
-                                    wxr,
-                                    page_data,
-                                    clean_node(
-                                        wxr, None, nested_list_item.children
-                                    ),
-                                    sense_text,
-                                )
+    for child in level_node.find_child(NodeKind.TEMPLATE | NodeKind.LIST):
+        if isinstance(child, TemplateNode):
+            template_name = child.template_name.lower()
+            if (
+                template_name in {"trans-top", "翻譯-頂", "trans-top-also"}
+                and 1 in child.template_parameters
+            ):
+                sense = clean_node(wxr, None, child.template_parameters.get(1))
+            elif template_name == "see translation subpage":
+                translation_subpage(wxr, page_data, child.template_parameters)
+        else:
+            for list_item in child.find_child_recursively(NodeKind.LIST_ITEM):
+                process_translation_list_item(
+                    wxr,
+                    page_data,
+                    list_item,
+                    sense,
+                )
 
 
 def process_translation_list_item(
     wxr: WiktextractContext,
     page_data: list[WordEntry],
-    expanded_text: str,
+    list_item: WikiNode,
     sense: str,
 ) -> None:
-    from .headword_line import GENDERS
-
-    split_results = re.split(r":|：", expanded_text, maxsplit=1)
-    if len(split_results) != 2:
-        return
-    lang_text, words_text = split_results
-    lang_text = lang_text.strip()
-    words_text = words_text.strip()
-    if len(words_text) == 0:
-        return
-    lang_code = name_to_code(lang_text, "zh")
-
-    # split words by `,` or `;` that are not inside `()`
-    for word_and_tags in re.split(r"[,;、](?![^(]*\))\s*", words_text):
-        tags, word = capture_text_in_parentheses(word_and_tags)
-        tags = [tag for tag in tags if tag != lang_code]  # rm Wiktionary link
-        translation_data = Translation(
-            lang_code=lang_code, lang=lang_text, word=word
-        )
-        tags_without_roman = []
-        for tag in tags:
-            if re.search(r"[a-z]", tag):
-                translation_data.roman = tag
+    tr_data = Translation(word="", sense=sense)
+
+    for child in list_item.children:
+        if isinstance(child, str) and child.strip().endswith(("：", ":")):
+            tr_data.lang = clean_node(wxr, None, child).strip("：:")
+            tr_data.lang_code = name_to_code(tr_data.lang, "zh")
+        elif isinstance(child, TemplateNode):
+            template_name = child.template_name
+            if template_name in {
+                "t",
+                "t+",
+                "tt",
+                "tt+",
+                "t-check",
+                "t+check",
+                "t-needed",
+            }:
+                if len(tr_data.word) > 0:
+                    page_data[-1].translations.append(
+                        tr_data.model_copy(deep=True)
+                    )
+                    tr_data = Translation(
+                        word="",
+                        lang=tr_data.lang,
+                        lang_code=tr_data.lang_code,
+                        sense=sense,
+                    )
+                if tr_data.lang_code == "":
+                    tr_data.lang_code = child.template_parameters[1]
+                if tr_data.lang == "":
+                    tr_data.lang = code_to_name(tr_data.lang_code, "zh")
+                tr_data.word = clean_node(
+                    wxr, None, child.template_parameters[2]
+                )
+                tr_data.roman = clean_node(
+                    wxr, None, child.template_parameters.get("tr", "")
+                )
+                tr_data.alt = clean_node(
+                    wxr, None, child.template_parameters.get("alt", "")
+                )
+                tr_data.lit = clean_node(
+                    wxr, None, child.template_parameters.get("lit", "")
+                )
+                # find gender tags
+                expanded_template = wxr.wtp.parse(
+                    wxr.wtp.node_to_wikitext(child), expand_all=True
+                )
+                for span_node in expanded_template.find_html("span"):
+                    class_str = span_node.attrs.get("class", "")
+                    if "gender" in class_str:
+                        for abbr_tag in span_node.find_html("abbr"):
+                            if len(abbr_tag.attrs.get("title")) > 0:
+                                tr_data.tags.append(
+                                    clean_node(
+                                        wxr, None, abbr_tag.attrs.get("title")
+                                    )
+                                )
+                    elif tr_data.roman == "" and class_str.startswith("tr "):
+                        tr_data.roman = clean_node(wxr, None, span_node)
+            elif template_name == "multitrans":
+                multitrans = wxr.wtp.parse(
+                    child.template_parameter.get("data", "")
+                )
+                extract_translation(wxr, page_data, multitrans, sense)
             else:
-                tags_without_roman.append(tag)
-
-        if len(tags_without_roman) > 0:
-            translation_data.tags = tags_without_roman
-
-        gender = word.split(" ")[-1]
-        if gender in GENDERS:
-            translation_data.word = word.removesuffix(f" {gender}")
-            translation_data.tags.append(GENDERS.get(gender))
-
-        if len(sense) > 0:
-            translation_data.sense = sense
-        page_data[-1].translations.append(translation_data)
+                # qualifier template
+                tag = clean_node(wxr, None, child)
+                if len(tag) > 0:
+                    tr_data.tags.append(tag.strip("()"))
+        elif isinstance(child, WikiNode) and child.kind == NodeKind.LINK:
+            if len(tr_data.word) > 0:
+                page_data[-1].translations.append(tr_data.model_copy(deep=True))
+                tr_data = Translation(
+                    word="",
+                    lang=tr_data.lang,
+                    lang_code=tr_data.lang_code,
+                    sense=sense,
+                )
+            tr_data.word = clean_node(wxr, None, child)
+
+    if len(tr_data.word) > 0:
+        page_data[-1].translations.append(tr_data.model_copy(deep=True))
 
 
 def translation_subpage(
diff --git a/tests/test_zh_gloss.py b/tests/test_zh_gloss.py
index 3a5536903..d2af44788 100644
--- a/tests/test_zh_gloss.py
+++ b/tests/test_zh_gloss.py
@@ -6,8 +6,8 @@
 from wiktextract.extractor.zh.models import Sense, WordEntry
 from wiktextract.extractor.zh.page import (
     extract_gloss,
-    parse_section,
     parse_page,
+    parse_section,
 )
 from wiktextract.thesaurus import close_thesaurus_db
 from wiktextract.wxr_context import WiktextractContext
diff --git a/tests/test_zh_headword.py b/tests/test_zh_headword.py
index d9f95cbda..5a2739ef7 100644
--- a/tests/test_zh_headword.py
+++ b/tests/test_zh_headword.py
@@ -1,5 +1,5 @@
 from unittest import TestCase
-from unittest.mock import Mock, patch
+from unittest.mock import Mock
 
 from wikitextprocessor import Wtp
 from wiktextract.extractor.zh.headword_line import extract_headword_line
diff --git a/tests/test_zh_translation.py b/tests/test_zh_translation.py
index 2090535b5..357b6c370 100644
--- a/tests/test_zh_translation.py
+++ b/tests/test_zh_translation.py
@@ -1,7 +1,6 @@
 from unittest import TestCase
-from unittest.mock import patch
 
-from wikitextprocessor import Page, Wtp
+from wikitextprocessor import Wtp
 from wiktextract.config import WiktionaryConfig
 from wiktextract.extractor.zh.models import WordEntry
 from wiktextract.extractor.zh.translation import extract_translation
@@ -21,25 +20,22 @@ def tearDown(self) -> None:
             self.wxr.thesaurus_db_path, self.wxr.thesaurus_db_conn
         )
 
-    @patch(
-        "wikitextprocessor.Wtp.get_page",
-        return_value=Page(title="", namespace_id=10, body=""),
-    )
-    def test_normal(self, mock_get_page) -> None:
-        # test wikitext from page "你好" and "這裡"
-        page_data = [WordEntry(word="你好", lang_code="zh", lang="漢語")]
-        wikitext = """
-{{trans-top|靠近說話者的地方}}
-* 阿爾巴尼亞語：këtu (sq)
-* 阿帕切語：
-*: 西阿帕切語：kú
-* 阿拉伯語：هُنَا‎ (hunā)
-*: 埃及阿拉伯語：هنا‎ (henā)
-*俄语：[[привет|приве́т]] ‎(privét) (非正式), [[здравствуйте|здра́вствуйте]] ‎(zdrávstvujte) (正式, 第一个"в"不发音)
-{{trans-bottom}}
-* 斯洛伐克語：pracovať impf
-        """
-        self.wxr.wtp.start_page("你好")
+    def test_t_template(self):
+        self.wxr.wtp.start_page("太陽風")
+        self.wxr.wtp.add_page(
+            "Template:t+",
+            10,
+            """{{#switch:{{{3}}}
+|f=<span class="gender"><abbr title="陰性名詞">f</abbr></span>
+|m=<span class="gender"><abbr title="陽性名詞">m</abbr></span>
+}}""",
+        )
+        self.wxr.wtp.add_page("Template:qualifier", 10, "({{{1}}})")
+        page_data = [WordEntry(word="太陽風", lang_code="zh", lang="漢語")]
+        wikitext = """{{trans-top|太陽上層大氣射出的超高速電漿流}}
+* 希伯来语：{{t+|he|רוח השמש|tr=ruakh ha-shemesh}}、{{t+|he|רוח סולרית|f|tr=ruakh solarit}}
+* 塞尔维亚-克罗地亚语：
+*: 西里尔字母：{{qualifier|Ekavian}} {{t+|sh|сунчев ветар|m}}"""
         node = self.wxr.wtp.parse(wikitext)
         extract_translation(self.wxr, page_data, node)
         self.assertEqual(
@@ -49,52 +45,52 @@ def test_normal(self, mock_get_page) -> None:
             ],
             [
                 {
-                    "lang_code": "sq",
-                    "lang": "阿爾巴尼亞語",
-                    "sense": "靠近說話者的地方",
-                    "word": "këtu",
-                },
-                {
-                    "lang": "西阿帕切語",
-                    "sense": "靠近說話者的地方",
-                    "word": "kú",
+                    "lang_code": "he",
+                    "lang": "希伯来语",
+                    "sense": "太陽上層大氣射出的超高速電漿流",
+                    "word": "רוח השמש",
+                    "roman": "ruakh ha-shemesh",
                 },
                 {
-                    "lang_code": "ar",
-                    "lang": "阿拉伯語",
-                    "sense": "靠近說話者的地方",
-                    "roman": "hunā",
-                    "word": "هُنَا",
+                    "lang_code": "he",
+                    "lang": "希伯来语",
+                    "sense": "太陽上層大氣射出的超高速電漿流",
+                    "word": "רוח סולרית",
+                    "roman": "ruakh solarit",
+                    "tags": ["陰性名詞"],
                 },
                 {
-                    "lang_code": "arz",
-                    "lang": "埃及阿拉伯語",
-                    "sense": "靠近說話者的地方",
-                    "roman": "henā",
-                    "word": "هنا",
-                },
-                {
-                    "lang_code": "ru",
-                    "lang": "俄语",
-                    "sense": "靠近說話者的地方",
-                    "roman": "privét",
-                    "tags": ["非正式"],
-                    "word": "приве́т",
+                    "lang_code": "sh",
+                    "lang": "西里尔字母",
+                    "sense": "太陽上層大氣射出的超高速電漿流",
+                    "word": "сунчев ветар",
+                    "tags": ["Ekavian", "陽性名詞"],
                 },
+            ],
+        )
+
+    def test_link_words(self):
+        self.wxr.wtp.start_page("你好")
+        page_data = [WordEntry(word="你好", lang_code="zh", lang="漢語")]
+        wikitext = """{{翻譯-頂}}
+*英语：[[how do you do]]; [[how are you]]"""
+        node = self.wxr.wtp.parse(wikitext)
+        extract_translation(self.wxr, page_data, node)
+        self.assertEqual(
+            [
+                d.model_dump(exclude_defaults=True)
+                for d in page_data[0].translations
+            ],
+            [
                 {
-                    "lang_code": "ru",
-                    "lang": "俄语",
-                    "sense": "靠近說話者的地方",
-                    "roman": "zdrávstvujte",
-                    "tags": ['正式, 第一个"в"不发音'],
-                    "word": "здра́вствуйте",
+                    "lang_code": "en",
+                    "lang": "英语",
+                    "word": "how do you do",
                 },
                 {
-                    "lang_code": "sk",
-                    "lang": "斯洛伐克語",
-                    "sense": "靠近說話者的地方",
-                    "tags": ["imperfective aspect"],
-                    "word": "pracovať",
+                    "lang_code": "en",
+                    "lang": "英语",
+                    "word": "how are you",
                 },
             ],
         )

From 4f5c2f8e2819dc3b99d01c39d89f4831770de404 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 29 Jan 2024 15:24:06 +0800
Subject: [PATCH 2/5] Process zh edition "trans-see" subpage translation
 template

---
 src/wiktextract/extractor/zh/translation.py | 57 ++++++++++-----------
 tests/test_zh_translation.py                | 31 +++++++++++
 2 files changed, 57 insertions(+), 31 deletions(-)

diff --git a/src/wiktextract/extractor/zh/translation.py b/src/wiktextract/extractor/zh/translation.py
index 8b28f78ba..aed6f63fc 100644
--- a/src/wiktextract/extractor/zh/translation.py
+++ b/src/wiktextract/extractor/zh/translation.py
@@ -23,8 +23,15 @@ def extract_translation(
                 and 1 in child.template_parameters
             ):
                 sense = clean_node(wxr, None, child.template_parameters.get(1))
-            elif template_name == "see translation subpage":
-                translation_subpage(wxr, page_data, child.template_parameters)
+            elif template_name in {"see translation subpage", "trans-see"}:
+                translation_subpage(wxr, page_data, child)
+            elif template_name == "multitrans":
+                wikitext = "".join(
+                    wxr.wtp.node_to_wikitext(c)
+                    for c in child.template_parameters.get("data", [])
+                )
+                multitrans = wxr.wtp.parse(wikitext)
+                extract_translation(wxr, page_data, multitrans, sense)
         else:
             for list_item in child.find_child_recursively(NodeKind.LIST_ITEM):
                 process_translation_list_item(
@@ -100,11 +107,6 @@ def process_translation_list_item(
                                 )
                     elif tr_data.roman == "" and class_str.startswith("tr "):
                         tr_data.roman = clean_node(wxr, None, span_node)
-            elif template_name == "multitrans":
-                multitrans = wxr.wtp.parse(
-                    child.template_parameter.get("data", "")
-                )
-                extract_translation(wxr, page_data, multitrans, sense)
             else:
                 # qualifier template
                 tag = clean_node(wxr, None, child)
@@ -128,18 +130,21 @@ def process_translation_list_item(
 def translation_subpage(
     wxr: WiktextractContext,
     page_data: list[WordEntry],
-    template_args: dict[str, str],
+    template_node: TemplateNode,
 ) -> None:
+    # https://zh.wiktionary.org/wiki/Template:翻譯-見
+    # https://zh.wiktionary.org/wiki/Template:See_translation_subpage
     from .page import ADDITIONAL_EXPAND_TEMPLATES
 
     page_title = wxr.wtp.title
     target_section = None
-    if len(template_args) > 0:
-        target_section = template_args.get(1)
-    if len(template_args) > 1:
-        page_title = template_args.get(2)
+    if template_node.template_name == "see translation subpage":
+        target_section = template_node.template_parameters.get(1)
+    page_title = template_node.template_parameters.get(2, wxr.wtp.title)
 
-    translation_subpage_title = f"{page_title}/翻譯"
+    translation_subpage_title = page_title
+    if page_title == wxr.wtp.title:
+        translation_subpage_title = f"{page_title}/翻譯"
     subpage = wxr.wtp.get_page(translation_subpage_title)
     if subpage is None:
         return
@@ -166,22 +171,12 @@ def find_subpage_section(
     node: Union[WikiNode, str],
     target_section: Union[str, list[str]],
 ) -> Optional[WikiNode]:
-    if isinstance(node, WikiNode):
-        if node.kind in LEVEL_KIND_FLAGS:
-            section_title = clean_node(wxr, None, node.largs)
-            if (
-                isinstance(target_section, str)
-                and section_title == target_section
-            ):
-                return node
-            if (
-                isinstance(target_section, list)
-                and section_title in target_section
-            ):
-                return node
-
-        for child in node.children:
-            returned_node = find_subpage_section(wxr, child, target_section)
-            if returned_node is not None:
-                return returned_node
+    if not isinstance(node, WikiNode):
+        return None
+    for level_node in node.find_child_recursively(LEVEL_KIND_FLAGS):
+        section_title = clean_node(wxr, None, level_node.largs)
+        if isinstance(target_section, str) and section_title == target_section:
+            return level_node
+        if isinstance(target_section, list) and section_title in target_section:
+            return level_node
     return None
diff --git a/tests/test_zh_translation.py b/tests/test_zh_translation.py
index 357b6c370..4ee6a090c 100644
--- a/tests/test_zh_translation.py
+++ b/tests/test_zh_translation.py
@@ -94,3 +94,34 @@ def test_link_words(self):
                 },
             ],
         )
+
+    def test_subpage_multitrans(self):
+        self.wxr.wtp.start_page("英語")
+        self.wxr.wtp.add_page(
+            "英語/翻譯",
+            0,
+            """==漢語==
+===名詞===
+====翻譯====
+{{trans-top|一種源於英格蘭的語言}}{{multitrans|data=
+* 阿布哈茲語：{{tt|ab|англыз бызшәа}}
+}}""",
+        )
+        page_data = [WordEntry(word="英語", lang_code="zh", lang="漢語")]
+        wikitext = "{{trans-see|源於英格蘭的語言|英語/翻譯}}"
+        node = self.wxr.wtp.parse(wikitext)
+        extract_translation(self.wxr, page_data, node)
+        self.assertEqual(
+            [
+                d.model_dump(exclude_defaults=True)
+                for d in page_data[0].translations
+            ],
+            [
+                {
+                    "lang_code": "ab",
+                    "lang": "阿布哈茲語",
+                    "word": "англыз бызшәа",
+                    "sense": "一種源於英格蘭的語言",
+                }
+            ],
+        )

From 565fc09ef77105ac1af6c4e6ec7b9ac710b29120 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 29 Jan 2024 16:00:59 +0800
Subject: [PATCH 3/5] Ignore zh edition empty translation template "t-needed"

---
 src/wiktextract/extractor/zh/translation.py | 8 +++++---
 tests/test_zh_translation.py                | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/wiktextract/extractor/zh/translation.py b/src/wiktextract/extractor/zh/translation.py
index aed6f63fc..92a9c10e2 100644
--- a/src/wiktextract/extractor/zh/translation.py
+++ b/src/wiktextract/extractor/zh/translation.py
@@ -63,7 +63,6 @@ def process_translation_list_item(
                 "tt+",
                 "t-check",
                 "t+check",
-                "t-needed",
             }:
                 if len(tr_data.word) > 0:
                     page_data[-1].translations.append(
@@ -76,11 +75,11 @@ def process_translation_list_item(
                         sense=sense,
                     )
                 if tr_data.lang_code == "":
-                    tr_data.lang_code = child.template_parameters[1]
+                    tr_data.lang_code = child.template_parameters.get(1, "")
                 if tr_data.lang == "":
                     tr_data.lang = code_to_name(tr_data.lang_code, "zh")
                 tr_data.word = clean_node(
-                    wxr, None, child.template_parameters[2]
+                    wxr, None, child.template_parameters.get(2, "")
                 )
                 tr_data.roman = clean_node(
                     wxr, None, child.template_parameters.get("tr", "")
@@ -107,6 +106,9 @@ def process_translation_list_item(
                                 )
                     elif tr_data.roman == "" and class_str.startswith("tr "):
                         tr_data.roman = clean_node(wxr, None, span_node)
+            elif template_name == "t-needed":
+                # ignore empty translation
+                continue
             else:
                 # qualifier template
                 tag = clean_node(wxr, None, child)
diff --git a/tests/test_zh_translation.py b/tests/test_zh_translation.py
index 4ee6a090c..3282ec767 100644
--- a/tests/test_zh_translation.py
+++ b/tests/test_zh_translation.py
@@ -105,6 +105,7 @@ def test_subpage_multitrans(self):
 ====翻譯====
 {{trans-top|一種源於英格蘭的語言}}{{multitrans|data=
 * 阿布哈茲語：{{tt|ab|англыз бызшәа}}
+* 阿拉貢語：{{t-needed|an}}
 }}""",
         )
         page_data = [WordEntry(word="英語", lang_code="zh", lang="漢語")]

From 50c48eecc87d25cfa71a1cf790c924c476485989 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 29 Jan 2024 16:41:45 +0800
Subject: [PATCH 4/5] Handle Russian translation list in zh edition

Some pages only have Russian translations and use "1)" after the
language name text.
---
 src/wiktextract/extractor/zh/translation.py |  6 ++--
 tests/test_zh_translation.py                | 31 +++++++++++++++++++++
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/wiktextract/extractor/zh/translation.py b/src/wiktextract/extractor/zh/translation.py
index 92a9c10e2..6bab12381 100644
--- a/src/wiktextract/extractor/zh/translation.py
+++ b/src/wiktextract/extractor/zh/translation.py
@@ -50,9 +50,9 @@ def process_translation_list_item(
 ) -> None:
     tr_data = Translation(word="", sense=sense)
 
-    for child in list_item.children:
-        if isinstance(child, str) and child.strip().endswith(("：", ":")):
-            tr_data.lang = clean_node(wxr, None, child).strip("：:")
+    for child_index, child in enumerate(list_item.children):
+        if child_index == 0 and isinstance(child, str) and "：" in child:
+            tr_data.lang = clean_node(wxr, None, child[: child.index("：")])
             tr_data.lang_code = name_to_code(tr_data.lang, "zh")
         elif isinstance(child, TemplateNode):
             template_name = child.template_name
diff --git a/tests/test_zh_translation.py b/tests/test_zh_translation.py
index 3282ec767..2281b9edc 100644
--- a/tests/test_zh_translation.py
+++ b/tests/test_zh_translation.py
@@ -126,3 +126,34 @@ def test_subpage_multitrans(self):
                 }
             ],
         )
+
+    def test_strange_russian_translation(self):
+        self.wxr.wtp.start_page("林场")
+        page_data = [WordEntry(word="林场", lang_code="zh", lang="漢語")]
+        node = self.wxr.wtp.parse(
+            "*俄语：1) [[лесничество]], [[лесхоз]]; 2) [[лесосека]]"
+        )
+        extract_translation(self.wxr, page_data, node)
+        self.assertEqual(
+            [
+                d.model_dump(exclude_defaults=True)
+                for d in page_data[0].translations
+            ],
+            [
+                {
+                    "lang_code": "ru",
+                    "lang": "俄语",
+                    "word": "лесничество",
+                },
+                {
+                    "lang_code": "ru",
+                    "lang": "俄语",
+                    "word": "лесхоз",
+                },
+                {
+                    "lang_code": "ru",
+                    "lang": "俄语",
+                    "word": "лесосека",
+                },
+            ],
+        )

From f957217e3e831ca22332fe3d6157d7fecd77fa25 Mon Sep 17 00:00:00 2001
From: xxyzz <gitpull@protonmail.com>
Date: Mon, 29 Jan 2024 17:18:00 +0800
Subject: [PATCH 5/5] Get translation language name from expanded template text

---
 src/wiktextract/extractor/zh/translation.py | 17 +++++++++++++----
 tests/test_zh_translation.py                | 20 ++++++++++++++++++++
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/src/wiktextract/extractor/zh/translation.py b/src/wiktextract/extractor/zh/translation.py
index 6bab12381..eb4786b13 100644
--- a/src/wiktextract/extractor/zh/translation.py
+++ b/src/wiktextract/extractor/zh/translation.py
@@ -50,10 +50,19 @@ def process_translation_list_item(
 ) -> None:
     tr_data = Translation(word="", sense=sense)
 
-    for child_index, child in enumerate(list_item.children):
-        if child_index == 0 and isinstance(child, str) and "：" in child:
-            tr_data.lang = clean_node(wxr, None, child[: child.index("：")])
-            tr_data.lang_code = name_to_code(tr_data.lang, "zh")
+    for child_index, child in enumerate(list_item.filter_empty_str_child()):
+        if child_index == 0:
+            lang_text = ""
+            if isinstance(child, str):
+                if "：" in child:
+                    lang_text = child[: child.index("：")]
+                elif ":" in child:
+                    lang_text = child[: child.index(":")]
+            else:
+                lang_text = clean_node(wxr, None, child)
+            if len(lang_text) > 0:
+                tr_data.lang = lang_text.strip()
+                tr_data.lang_code = name_to_code(tr_data.lang, "zh")
         elif isinstance(child, TemplateNode):
             template_name = child.template_name
             if template_name in {
diff --git a/tests/test_zh_translation.py b/tests/test_zh_translation.py
index 2281b9edc..7401bf4d5 100644
--- a/tests/test_zh_translation.py
+++ b/tests/test_zh_translation.py
@@ -157,3 +157,23 @@ def test_strange_russian_translation(self):
                 },
             ],
         )
+
+    def test_language_name_template(self):
+        self.wxr.wtp.start_page("解析幾何")
+        page_data = [WordEntry(word="解析幾何", lang_code="zh", lang="漢語")]
+        self.wxr.wtp.add_page("Template:en", 10, "英語")
+        node = self.wxr.wtp.parse("* {{en}}：{{t+|en|analytic geometry}}")
+        extract_translation(self.wxr, page_data, node)
+        self.assertEqual(
+            [
+                d.model_dump(exclude_defaults=True)
+                for d in page_data[0].translations
+            ],
+            [
+                {
+                    "lang_code": "en",
+                    "lang": "英語",
+                    "word": "analytic geometry",
+                },
+            ],
+        )