diff --git a/src/wiktextract/clean.py b/src/wiktextract/clean.py
index 7cb7e46da..1f078fefd 100644
--- a/src/wiktextract/clean.py
+++ b/src/wiktextract/clean.py
@@ -9,13 +9,12 @@
 import re
 import html
 import unicodedata
-from typing import (
-    Callable,
-    Optional,
-    Union
-)
+from typing import Callable, Optional, Union
 from wikitextprocessor.common import MAGIC_FIRST, MAGIC_LAST
-from wikitextprocessor.core import NamespaceDataEntry
+from wikitextprocessor.core import (
+    NamespaceDataEntry,
+    TemplateArgs,
+)
 from .wxr_context import WiktextractContext
 
 ######################################################################
@@ -94,7 +93,7 @@
     "ι": "ᶥ",
     "φ": "ᵠ",
     "χ": "ᵡ",
-    "∞": "\u2002᪲"  # This is a KLUDGE
+    "∞": "\u2002᪲",  # This is a KLUDGE
 }
 
 subscript_ht: dict[str, str] = {
@@ -137,6 +136,7 @@
     "χ": "ᵪ",
 }
 
+
 def to_superscript(text: str) -> str:
     "Converts text to superscript."
     if not text:
@@ -147,6 +147,7 @@ def to_superscript(text: str) -> str:
         return "^" + text
     return "^({})".format(text)
 
+
 def to_subscript(text: str) -> str:
     """Converts text to subscript."""
     if not text:
@@ -157,10 +158,11 @@ def to_subscript(text: str) -> str:
         return "_" + text
     return "_({})".format(text)
 
+
 def to_chem(text: str) -> str:
     """Converts text to chemical formula, making digits subscript."""
-    return "".join(to_subscript(x) if x.isdigit() else x
-                   for x in text)
+    return "".join(to_subscript(x) if x.isdigit() else x for x in text)
+
 
 # Mapping from Latex names to Unicode characters/strings.  This is the
 # default mapping (some cases are handled specially in the code).
@@ -886,7 +888,6 @@ def to_chem(text: str) -> str:
     "zpipe": "⨠",
     "zproject": "⨡",
     "|": "‖",
-
     # Accents XXX these really should be handled specially with diacritics
     # after argument
     "acute": "́",
@@ -906,8 +907,6 @@ def to_chem(text: str) -> str:
     "overline": "◌̅",
     "tilde": "̃",
     "vec": "⃑",
-
-
     # Some ignored operators
     "bigl": "",
     "bigr": "",
@@ -973,7 +972,7 @@ def to_chem(text: str) -> str:
     "z": "𝓏",
 }
 
-mathfrak_map: dict[str, str]= {
+mathfrak_map: dict[str, str] = {
     "A": "𝔄",
     "B": "𝔅",
     "C": "ℭ",
@@ -1070,15 +1069,19 @@ def to_chem(text: str) -> str:
     "9": "𝟡",
 }
 
+
 def mathcal_fn(text: str) -> str:
     return "".join(mathcal_map.get(x, x) for x in text)
 
+
 def mathfrak_fn(text: str) -> str:
     return "".join(mathfrak_map.get(x, x) for x in text)
 
+
 def mathbb_fn(text: str) -> str:
     return "".join(mathbb_map.get(x, x) for x in text)
 
+
 def to_math(text: str) -> str:
     """Converts a mathematical formula to ASCII."""
     # print("to_math: {!r}".format(text))
@@ -1088,22 +1091,25 @@ def expand(text: str) -> str:
         while True:
             orig = text
             # formatting with {:c} converts input into character
-            text = re.sub(r"[{:c}-{:c}]".format(MAGIC_FIRST, MAGIC_LAST),
-                          lambda m: magic_vec[ord(m.group(0)) - MAGIC_FIRST],
-                          text)
+            text = re.sub(
+                r"[{:c}-{:c}]".format(MAGIC_FIRST, MAGIC_LAST),
+                lambda m: magic_vec[ord(m.group(0)) - MAGIC_FIRST],
+                text,
+            )
             if text == orig:
                 break
         return text
 
     def recurse(text: str) -> str:
-        def math_magic(text: str,
-                        left: str,
-                        right: str,
-                        fn: Callable[[str], str]
+        def math_magic(
+            text: str, left: str, right: str, fn: Callable[[str], str]
         ) -> str:
             regexp_str = r"{}([^{}{}]+){}".format(
-                re.escape(left), re.escape(left),
-                re.escape(right), re.escape(right))
+                re.escape(left),
+                re.escape(left),
+                re.escape(right),
+                re.escape(right),
+            )
             regexp = re.compile(regexp_str)
 
             def repl(m: re.Match) -> str:
@@ -1150,8 +1156,11 @@ def expand_group(v: str) -> str:
             elif re.match(r"\\sqrt($|[0-9]|\b)", v):
                 v = "√"
             elif re.match(r"\\(frac|binom)($|[0-9]|\b)", v):
-                m = re.match(r"\\(frac|binom)\s*(\\[a-zA-Z]+|\\.|.)\s*"
-                             r"(\\[a-zA-Z]+|\\.|.)$", v)
+                m = re.match(
+                    r"\\(frac|binom)\s*(\\[a-zA-Z]+|\\.|.)\s*"
+                    r"(\\[a-zA-Z]+|\\.|.)$",
+                    v,
+                )
                 if not m:
                     print("MATH FRAC/BINOM ERROR: {!r}".format(v))
                     return v
@@ -1198,31 +1207,37 @@ def expand_group(v: str) -> str:
             text = math_magic(text, "{", "}", recurse)
             if text == orig:
                 break
-        for m in re.finditer(r"\s+|"
-                             r"\\frac\s*(\\[a-zA-Z]+|\\.|.)\s*"
-                             r"(\\dot\\(bigvee|cup|cap|lor|vee)|"
-                             r"\\not\\(subset|supset|subseteq|supseteq|in|ni|"
-                             r"preceq|succeq|vartrianglelefteq|"
-                             r"vartrianglerighteq|trianglelefteq|"
-                             r"trianglerighteq)|"
-                             r"\\widehat\{=\}|\\widehat=|"
-                             r"\\overset\{?\}\{=\}|"
-                             r"\\overset\?=|"
-                             r"\\overset\{\\operatorname\{def\}\}\{=\}|"
-                             r"\\[a-zA-Z]+|\\.|.)|"
-                             r"(\\(mathcal|mathfrak|mathbb|text|begin|end|pmod)"
-                             r"\b\s*|"
-                             r"\\sqrt\b(\[\d+\])?)?"
-                             r"[_^]?(\\[a-zA-Z]+\s*|\\.|\w+|.)", text):
+        for m in re.finditer(
+            r"\s+|"
+            r"\\frac\s*(\\[a-zA-Z]+|\\.|.)\s*"
+            r"(\\dot\\(bigvee|cup|cap|lor|vee)|"
+            r"\\not\\(subset|supset|subseteq|supseteq|in|ni|"
+            r"preceq|succeq|vartrianglelefteq|"
+            r"vartrianglerighteq|trianglelefteq|"
+            r"trianglerighteq)|"
+            r"\\widehat\{=\}|\\widehat=|"
+            r"\\overset\{?\}\{=\}|"
+            r"\\overset\?=|"
+            r"\\overset\{\\operatorname\{def\}\}\{=\}|"
+            r"\\[a-zA-Z]+|\\.|.)|"
+            r"(\\(mathcal|mathfrak|mathbb|text|begin|end|pmod)"
+            r"\b\s*|"
+            r"\\sqrt\b(\[\d+\])?)?"
+            r"[_^]?(\\[a-zA-Z]+\s*|\\.|\w+|.)",
+            text,
+        ):
             v = m.group(0).strip()
             if not v:
                 continue
             v = expand_group(v)
             if v:
-                if ((parts and parts[-1][-1].isalpha() and
-                     v[0] in "0123456789") or
-                    (parts and parts[-1][-1] in "0123456789" and
-                     v[0] in "0123456789")):
+                if (
+                    parts and parts[-1][-1].isalpha() and v[0] in "0123456789"
+                ) or (
+                    parts
+                    and parts[-1][-1] in "0123456789"
+                    and v[0] in "0123456789"
+                ):
                     v = " " + v
                 parts.append(v)
 
@@ -1237,7 +1252,7 @@ def expand_group(v: str) -> str:
 def bold_follows(parts: list[str], i: int) -> bool:
     """Checks if there is a bold (''') in parts after parts[i].  We allow
     intervening italics ('')."""
-    parts = parts[i + 1:]
+    parts = parts[i + 1 :]
     for p in parts:
         if not p.startswith("''"):
             continue
@@ -1308,13 +1323,12 @@ def remove_italic_and_bold(text: str) -> str:
                 continue
             new_text_parts.append(part)
         new_text_parts.append("\n")
-    new_text_parts = new_text_parts[:-1] # remove last \n
+    new_text_parts = new_text_parts[:-1]  # remove last \n
     return "".join(new_text_parts)
 
-def clean_value(wxr: WiktextractContext,
-                title: str,
-                no_strip=False,
-                no_html_strip=False
+
+def clean_value(
+    wxr: WiktextractContext, title: str, no_strip=False, no_html_strip=False
 ) -> str:
     """Cleans a title or value into a normal string.  This should basically
     remove any Wikimedia formatting from it: HTML tags, templates, links,
@@ -1334,17 +1348,18 @@ def repl_exturl(m: re.Match) -> str:
                 break
             i += 1
         return " ".join(args[i:])
+
     def repl_link(m: re.Match) -> str:
         if m.group(2) and m.group(2).lower() in ("file", "image"):
             return ""
         v = m.group(3).split("|")
         return clean_value(wxr, v[0], no_strip=True)
+
     def repl_link_bars(m: re.Match) -> str:
         lnk = m.group(1)
         if re.match(r"(?si)(File|Image)\s*:", lnk):
             return ""
-        return clean_value(wxr, m.group(4) or m.group(2) or "",
-                           no_strip=True)
+        return clean_value(wxr, m.group(4) or m.group(2) or "", no_strip=True)
 
     def repl_1_sup(m: re.Match) -> str:
         return to_superscript(clean_value(wxr, m.group(1)))
@@ -1373,34 +1388,47 @@ def repl_1_syntaxhighlight(m: re.Match) -> str:
     # Remove references (<ref>...</ref>).
     title = re.sub(r"(?is)<ref\b\s*[^>/]*?>\s*.*?</ref\s*>", "", title)
     # Replace <span>...</span> by stripped content without newlines
-    title = re.sub(r"(?is)<span\b\s*[^>]*?>(.*?)\s*</span\s*>",
-                   lambda m: re.sub(r"\s+", " ", m.group(1)),
-                   title)
+    title = re.sub(
+        r"(?is)<span\b\s*[^>]*?>(.*?)\s*</span\s*>",
+        lambda m: re.sub(r"\s+", " ", m.group(1)),
+        title,
+    )
     # Replace <br/> by comma space (it is used to express alternatives in some
     # declensions)
     title = re.sub(r"(?si)\s*<br\s*/?>\n*", "\n", title)
     # Remove divs with floatright class (generated e.g. by {{ja-kanji|...}})
-    title = re.sub(r'(?si)<div\b[^>]*?\bclass="[^"]*?\bfloatright\b[^>]*?>'
-                   r'((<div\b(<div\b.*?</div\s*>|.)*?</div>)|.)*?'
-                   r'</div\s*>',
-                   "", title)
+    title = re.sub(
+        r'(?si)<div\b[^>]*?\bclass="[^"]*?\bfloatright\b[^>]*?>'
+        r"((<div\b(<div\b.*?</div\s*>|.)*?</div>)|.)*?"
+        r"</div\s*>",
+        "",
+        title,
+    )
     # Remove divs with float: attribute
-    title = re.sub(r'(?si)<div\b[^>]*?\bstyle="[^"]*?\bfloat:[^>]*?>'
-                   r'((<div\b(<div\b.*?</div\s*>|.)*?</div>)|.)*?'
-                   r'</div\s*>',
-                   "", title)
+    title = re.sub(
+        r'(?si)<div\b[^>]*?\bstyle="[^"]*?\bfloat:[^>]*?>'
+        r"((<div\b(<div\b.*?</div\s*>|.)*?</div>)|.)*?"
+        r"</div\s*>",
+        "",
+        title,
+    )
     # Remove <sup> with previewonly class (generated e.g. by {{taxlink|...}})
-    title = re.sub(r'(?si)<sup\b[^>]*?\bclass="[^"<>]*?'
-                   r'\bpreviewonly\b[^>]*?>'
-                   r'.+?</sup\s*>',
-                   "", title)
+    title = re.sub(
+        r'(?si)<sup\b[^>]*?\bclass="[^"<>]*?'
+        r"\bpreviewonly\b[^>]*?>"
+        r".+?</sup\s*>",
+        "",
+        title,
+    )
     # Remove <strong class="error">...</strong>
-    title = re.sub(r'(?si)<strong\b[^>]*?\bclass="[^"]*?\berror\b[^>]*?>'
-                   r'.+?</strong\s*>',
-                   "", title)
+    title = re.sub(
+        r'(?si)<strong\b[^>]*?\bclass="[^"]*?\berror\b[^>]*?>'
+        r".+?</strong\s*>",
+        "",
+        title,
+    )
     # Change <div> and </div> to newlines.  Ditto for tr, li, table, dl, ul, ol
-    title = re.sub(r"(?si)</?(div|tr|li|table|dl|ul|ol)\b[^>]*>",
-                   "\n", title)
+    title = re.sub(r"(?si)</?(div|tr|li|table|dl|ul|ol)\b[^>]*>", "\n", title)
     # Change <dt>, <dd>, </dt> and </dd> into newlines;
     # these generate new rows/lines.
     title = re.sub(r"(?i)</?d[dt]\s*>", "\n", title)
@@ -1408,22 +1436,20 @@ def repl_1_syntaxhighlight(m: re.Match) -> str:
     title = re.sub(r"(?si)</?(td|th)\b[^>]*>", " ", title)
     # Change <sup> ... </sup> to ^
     title = re.sub(r"(?si)<sup\b[^>]*>\s*</sup\s*>", "", title)
-    title = re.sub(r"(?si)<sup\b[^>]*>(.*?)</sup\s*>",
-                   repl_1_sup, title)
+    title = re.sub(r"(?si)<sup\b[^>]*>(.*?)</sup\s*>", repl_1_sup, title)
     # Change <sub> ... </sub> to _
     title = re.sub(r"(?si)<sub\b[^>]*>\s*</sup\s*>", "", title)
-    title = re.sub(r"(?si)<sub\b[^>]*>(.*?)</sub\s*>",
-                   repl_1_sub, title)
+    title = re.sub(r"(?si)<sub\b[^>]*>(.*?)</sub\s*>", repl_1_sub, title)
     # Change <chem> ... </chem> using subscripts for digits
-    title = re.sub(r"(?si)<chem\b[^>]*>(.*?)</chem\s*>",
-                   repl_1_chem, title)
+    title = re.sub(r"(?si)<chem\b[^>]*>(.*?)</chem\s*>", repl_1_chem, title)
     # Change <math> ... </math> using special formatting.
-    title = re.sub(r"(?si)<math\b[^>]*>(.*?)</math\s*>",
-                   repl_1_math, title)
+    title = re.sub(r"(?si)<math\b[^>]*>(.*?)</math\s*>", repl_1_math, title)
     # Change <syntaxhighlight> ... </syntaxhighlight> using special formatting.
-    title = re.sub(r"(?si)<syntaxhighlight\b[^>]*>(.*?)"
-                   r"</syntaxhighlight\s*>",
-                   repl_1_syntaxhighlight, title)
+    title = re.sub(
+        r"(?si)<syntaxhighlight\b[^>]*>(.*?)" r"</syntaxhighlight\s*>",
+        repl_1_syntaxhighlight,
+        title,
+    )
     # Remove any remaining HTML tags.
     if not no_html_strip:
         title = re.sub(r"(?s)<[/!a-zA-Z][^>]*>", "", title)
@@ -1441,7 +1467,7 @@ def repl_1_syntaxhighlight(m: re.Match) -> str:
 
     category_ns_data: NamespaceDataEntry
     # XXX "Category" -> config variable for portability
-    category_ns_data = wxr.wtp.NAMESPACE_DATA.get("Category", {}) # type: ignore[typeddict-item]
+    category_ns_data = wxr.wtp.NAMESPACE_DATA.get("Category", {})  # type: ignore[typeddict-item]
     # Fail if we received empty dict from .get()
     category_ns_names = {category_ns_data["name"]} | set(
         category_ns_data["aliases"]
@@ -1455,22 +1481,30 @@ def repl_1_syntaxhighlight(m: re.Match) -> str:
             "",
             title,
         )
-        title = re.sub(r"(?s)\[\[\s*:?([^]|#<>]+?)\s*(#[^][|<>]*?)?\]\]",
-                       repl_1, title)
-        title = re.sub(r"(?s)\[\[\s*(([a-zA-Z0-9]+)\s*:)?\s*([^][#|<>]+?)"
-                       r"\s*(#[^][|]*?)?\|?\]\]",
-                       repl_link, title)
-        title = re.sub(r"(?s)\[\[\s*([^][|<>]+?)\s*\|"
-                       r"\s*(([^][|]|\[[^]]*\])+?)"
-                       r"(\s*\|\s*(([^]|]|\[[^]]*\])+?))*\s*\]\]",
-                       repl_link_bars, title)
+        title = re.sub(
+            r"(?s)\[\[\s*:?([^]|#<>]+?)\s*(#[^][|<>]*?)?\]\]", repl_1, title
+        )
+        title = re.sub(
+            r"(?s)\[\[\s*(([a-zA-Z0-9]+)\s*:)?\s*([^][#|<>]+?)"
+            r"\s*(#[^][|]*?)?\|?\]\]",
+            repl_link,
+            title,
+        )
+        title = re.sub(
+            r"(?s)\[\[\s*([^][|<>]+?)\s*\|"
+            r"\s*(([^][|]|\[[^]]*\])+?)"
+            r"(\s*\|\s*(([^]|]|\[[^]]*\])+?))*\s*\]\]",
+            repl_link_bars,
+            title,
+        )
         if title == orig:
             break
     # Replace remaining HTML links by the URL.
     while True:
         orig = title
-        title = re.sub(r"\[\s*((https?:|mailto:)?//([^][]+?))\s*\]",
-                       repl_exturl, title)
+        title = re.sub(
+            r"\[\s*((https?:|mailto:)?//([^][]+?))\s*\]", repl_exturl, title
+        )
         if title == orig:
             break
 
@@ -1508,14 +1542,16 @@ def repl_1_syntaxhighlight(m: re.Match) -> str:
     return title
 
 
-def clean_template_args(wxr: WiktextractContext,
-                        ht: dict[Union[int, str], str], # XXX -> "TemplateArgs"
-                        no_strip=False
+def clean_template_args(
+    wxr: WiktextractContext, ht: TemplateArgs, no_strip=False
 ) -> dict[str, str]:
     """Cleans all values in a template argument dictionary and returns the
     cleaned dictionary."""
     assert isinstance(wxr, WiktextractContext)
     assert isinstance(ht, dict)
-    return {clean_value(wxr, str(k), no_html_strip=True):
-            clean_value(wxr, str(v), no_strip=no_strip, no_html_strip=True)
-            for k, v in ht.items()}
+    return {
+        clean_value(wxr, str(k), no_html_strip=True): clean_value(
+            wxr, str(v), no_strip=no_strip, no_html_strip=True
+        )
+        for k, v in ht.items()
+    }
diff --git a/src/wiktextract/extractor/en/page.py b/src/wiktextract/extractor/en/page.py
index 00e924b46..23156cb44 100644
--- a/src/wiktextract/extractor/en/page.py
+++ b/src/wiktextract/extractor/en/page.py
@@ -11,14 +11,22 @@
 from functools import partial
 from re import Pattern
 from typing import (
+    TYPE_CHECKING,
+    Callable,
     Optional,
     Set,
     Union,
+    cast,
 )
 
 from mediawiki_langcodes import get_all_names, name_to_code
 from wikitextprocessor import NodeKind, WikiNode
-from wikitextprocessor.core import TemplateArgs
+from wikitextprocessor.core import (
+    TemplateArgs,
+    TemplateFnCallable,
+    PostTemplateFnCallable,
+)
+from wikitextprocessor.parser import GeneralNode
 from wiktextract.clean import clean_template_args
 from wiktextract.datautils import (
     data_append,
@@ -44,7 +52,11 @@
 from wiktextract.parts_of_speech import PARTS_OF_SPEECH
 from wiktextract.tags import valid_tags
 from wiktextract.translations import parse_translation_item_text
-from wiktextract.type_utils import WordData
+from wiktextract.type_utils import (
+    SenseData,
+    SoundData,
+    WordData,
+)
 from wiktextract.wxr_context import WiktextractContext
 
 from ..ruby import extract_ruby, parse_ruby
@@ -53,174 +65,177 @@
 
 # Matches head tag
 HEAD_TAG_RE: Pattern = re.compile(
-            r"^(head|Han char|arabic-noun|arabic-noun-form|"
-            r"hangul-symbol|syllable-hangul)$|" +
-            r"^(latin|" +
-            "|".join(lang_code for lang_code, *_ in get_all_names("en")) +
-            r")-(" +
-            "|".join([
-                "abbr",
-                "adj",
-                "adjective",
-                "adjective form",
-                "adjective-form",
-                "adv",
-                "adverb",
-                "affix",
-                "animal command",
-                "art",
-                "article",
-                "aux",
-                "bound pronoun",
-                "bound-pronoun",
-                "Buyla",
-                "card num",
-                "card-num",
-                "cardinal",
-                "chunom",
-                "classifier",
-                "clitic",
-                "cls",
-                "cmene",
-                "cmavo",
-                "colloq-verb",
-                "colverbform",
-                "combining form",
-                "combining-form",
-                "comparative",
-                "con",
-                "concord",
-                "conj",
-                "conjunction",
-                "conjug",
-                "cont",
-                "contr",
-                "converb",
-                "daybox",
-                "decl",
-                "decl noun",
-                "def",
-                "dem",
-                "det",
-                "determ",
-                "Deva",
-                "ending",
-                "entry",
-                "form",
-                "fuhivla",
-                "gerund",
-                "gismu",
-                "hanja",
-                "hantu",
-                "hanzi",
-                "head",
-                "ideophone",
-                "idiom",
-                "inf",
-                "indef",
-                "infixed pronoun",
-                "infixed-pronoun",
-                "infl",
-                "inflection",
-                "initialism",
-                "int",
-                "interfix",
-                "interj",
-                "interjection",
-                "jyut",
-                "latin",
-                "letter",
-                "locative",
-                "lujvo",
-                "monthbox",
-                "mutverb",
-                "name",
-                "nisba",
-                "nom",
-                "noun",
-                "noun form",
-                "noun-form",
-                "noun plural",
-                "noun-plural",
-                "nounprefix",
-                "num",
-                "number",
-                "numeral",
-                "ord",
-                "ordinal",
-                "par",
-                "part",
-                "part form",
-                "part-form",
-                "participle",
-                "particle",
-                "past",
-                "past neg",
-                "past-neg",
-                "past participle",
-                "past-participle",
-                "perfect participle",
-                "perfect-participle",
-                "personal pronoun",
-                "personal-pronoun",
-                "pref",
-                "prefix",
-                "phrase",
-                "pinyin",
-                "plural noun",
-                "plural-noun",
-                "pos",
-                "poss-noun",
-                "post",
-                "postp",
-                "postposition",
-                "PP",
-                "pp",
-                "ppron",
-                "pred",
-                "predicative",
-                "prep",
-                "prep phrase",
-                "prep-phrase",
-                "preposition",
-                "present participle",
-                "present-participle",
-                "pron",
-                "prondem",
-                "pronindef",
-                "pronoun",
-                "prop",
-                "proper noun",
-                "proper-noun",
-                "proper noun form",
-                "proper-noun form",
-                "proper noun-form",
-                "proper-noun-form",
-                "prov",
-                "proverb",
-                "prpn",
-                "prpr",
-                "punctuation mark",
-                "punctuation-mark",
-                "regnoun",
-                "rel",
-                "rom",
-                "romanji",
-                "root",
-                "sign",
-                "suff",
-                "suffix",
-                "syllable",
-                "symbol",
-                "verb",
-                "verb form",
-                "verb-form",
-                "verbal noun",
-                "verbal-noun",
-                "verbnec",
-                "vform",
-            ]) +
-            r")(-|/|\+|$)")
+    r"^(head|Han char|arabic-noun|arabic-noun-form|"
+    r"hangul-symbol|syllable-hangul)$|"
+    + r"^(latin|"
+    + "|".join(lang_code for lang_code, *_ in get_all_names("en"))
+    + r")-("
+    + "|".join(
+        [
+            "abbr",
+            "adj",
+            "adjective",
+            "adjective form",
+            "adjective-form",
+            "adv",
+            "adverb",
+            "affix",
+            "animal command",
+            "art",
+            "article",
+            "aux",
+            "bound pronoun",
+            "bound-pronoun",
+            "Buyla",
+            "card num",
+            "card-num",
+            "cardinal",
+            "chunom",
+            "classifier",
+            "clitic",
+            "cls",
+            "cmene",
+            "cmavo",
+            "colloq-verb",
+            "colverbform",
+            "combining form",
+            "combining-form",
+            "comparative",
+            "con",
+            "concord",
+            "conj",
+            "conjunction",
+            "conjug",
+            "cont",
+            "contr",
+            "converb",
+            "daybox",
+            "decl",
+            "decl noun",
+            "def",
+            "dem",
+            "det",
+            "determ",
+            "Deva",
+            "ending",
+            "entry",
+            "form",
+            "fuhivla",
+            "gerund",
+            "gismu",
+            "hanja",
+            "hantu",
+            "hanzi",
+            "head",
+            "ideophone",
+            "idiom",
+            "inf",
+            "indef",
+            "infixed pronoun",
+            "infixed-pronoun",
+            "infl",
+            "inflection",
+            "initialism",
+            "int",
+            "interfix",
+            "interj",
+            "interjection",
+            "jyut",
+            "latin",
+            "letter",
+            "locative",
+            "lujvo",
+            "monthbox",
+            "mutverb",
+            "name",
+            "nisba",
+            "nom",
+            "noun",
+            "noun form",
+            "noun-form",
+            "noun plural",
+            "noun-plural",
+            "nounprefix",
+            "num",
+            "number",
+            "numeral",
+            "ord",
+            "ordinal",
+            "par",
+            "part",
+            "part form",
+            "part-form",
+            "participle",
+            "particle",
+            "past",
+            "past neg",
+            "past-neg",
+            "past participle",
+            "past-participle",
+            "perfect participle",
+            "perfect-participle",
+            "personal pronoun",
+            "personal-pronoun",
+            "pref",
+            "prefix",
+            "phrase",
+            "pinyin",
+            "plural noun",
+            "plural-noun",
+            "pos",
+            "poss-noun",
+            "post",
+            "postp",
+            "postposition",
+            "PP",
+            "pp",
+            "ppron",
+            "pred",
+            "predicative",
+            "prep",
+            "prep phrase",
+            "prep-phrase",
+            "preposition",
+            "present participle",
+            "present-participle",
+            "pron",
+            "prondem",
+            "pronindef",
+            "pronoun",
+            "prop",
+            "proper noun",
+            "proper-noun",
+            "proper noun form",
+            "proper-noun form",
+            "proper noun-form",
+            "proper-noun-form",
+            "prov",
+            "proverb",
+            "prpn",
+            "prpr",
+            "punctuation mark",
+            "punctuation-mark",
+            "regnoun",
+            "rel",
+            "rom",
+            "romanji",
+            "root",
+            "sign",
+            "suff",
+            "suffix",
+            "syllable",
+            "symbol",
+            "verb",
+            "verb form",
+            "verb-form",
+            "verbal noun",
+            "verbal-noun",
+            "verbnec",
+            "vform",
+        ]
+    )
+    + r")(-|/|\+|$)"
+)
 
 FLOATING_TABLE_TEMPLATES: set[str] = {
     # az-suffix-form creates a style=floatright div that is otherwise
@@ -439,8 +454,11 @@
     "wtorw",
 }
 for x in PANEL_PREFIXES & wikipedia_templates:
-    print("WARNING: {!r} in both panel_templates and wikipedia_templates"
-          .format(x))
+    print(
+        "WARNING: {!r} in both panel_templates and wikipedia_templates".format(
+            x
+        )
+    )
 
 # Mapping from a template name (without language prefix) for the main word
 # (e.g., fi-noun, fi-adj, en-verb) to permitted parts-of-speech in which
@@ -482,8 +500,10 @@
 for k, v in template_allowed_pos_map.items():
     for x in v:
         if x not in PARTS_OF_SPEECH:
-            print("BAD PART OF SPEECH {!r} IN template_allowed_pos_map: {}={}"
-                  "".format(x, k, v))
+            print(
+                "BAD PART OF SPEECH {!r} IN template_allowed_pos_map: {}={}"
+                "".format(x, k, v)
+            )
             assert False
 
 
@@ -526,9 +546,10 @@
 # Regexp for matching ignored etymology template names.  This adds certain
 # prefixes to the names listed above.
 ignored_etymology_templates_re = re.compile(
-    r"^((cite-|R:|RQ:).*|" +
-    r"|".join(re.escape(x) for x in ignored_etymology_templates) +
-    r")$")
+    r"^((cite-|R:|RQ:).*|"
+    + r"|".join(re.escape(x) for x in ignored_etymology_templates)
+    + r")$"
+)
 
 # Regexp for matching ignored descendants template names. Right now we just
 # copy the ignored etymology templates
@@ -618,19 +639,38 @@
 
 # Template name component to linkage section listing.  Integer section means
 # default section, starting at that argument.
-template_linkage_mappings: list[list[Union[str, int]]] = [
-    ["syn", "synonyms"],
-    ["synonyms", "synonyms"],
-    ["ant", "antonyms"],
-    ["antonyms", "antonyms"],
-    ["hyp", "hyponyms"],
-    ["hyponyms", "hyponyms"],
-    ["der", "derived"],
-    ["derived terms", "derived"],
-    ["coordinate terms", "coordinate_terms"],
-    ["rel", "related"],
-    ["col", 2],
-]
+# XXX not used anymore, except for the first elements: moved to
+# template_linkages
+# template_linkage_mappings: list[list[Union[str, int]]] = [
+#     ["syn", "synonyms"],
+#     ["synonyms", "synonyms"],
+#     ["ant", "antonyms"],
+#     ["antonyms", "antonyms"],
+#     ["hyp", "hyponyms"],
+#     ["hyponyms", "hyponyms"],
+#     ["der", "derived"],
+#     ["derived terms", "derived"],
+#     ["coordinate terms", "coordinate_terms"],
+#     ["rel", "related"],
+#     ["col", 2],
+# ]
+
+# Template names, this was exctracted from template_linkage_mappings,
+# because the code using template_linkage_mappings was actually not used
+# (but not removed).
+template_linkages: set[str] = {
+    "syn",
+    "synonyms",
+    "ant",
+    "antonyms",
+    "hyp",
+    "hyponyms",
+    "der",
+    "derived terms",
+    "coordinate terms",
+    "rel",
+    "col",
+}
 
 # Maps template name used in a word sense to a linkage field that it adds.
 sense_linkage_templates: dict[str, str] = {
@@ -655,11 +695,11 @@ def decode_html_entities(v: Union[str, int]) -> str:
     return html.unescape(v)
 
 
-def parse_sense_linkage(wxr:
-                        WiktextractContext,
-                        data: WordData,
-                        name: str,
-                        ht: TemplateArgs,
+def parse_sense_linkage(
+    wxr: WiktextractContext,
+    data: SenseData,
+    name: str,
+    ht: TemplateArgs,
 ) -> None:
     """Parses a linkage (synonym, etc) specified in a word sense."""
     assert isinstance(wxr, WiktextractContext)
@@ -670,13 +710,15 @@ def parse_sense_linkage(wxr:
     for i in range(2, 20):
         w = ht.get(i) or ""
         w = clean_node(wxr, data, w)
-        if w.startswith(ns_title_prefix_tuple(wxr, "Thesaurus")):
-            w = w[10:]
+        for alias in ns_title_prefix_tuple(wxr, "Thesaurus"):
+            if w.startswith(alias):
+                w = w[len(alias) :]
+                break
         if not w:
             break
         tags: list[str] = []
         topics: list[str] = []
-        english = None
+        english: Optional[str] = None
         # Try to find qualifiers for this synonym
         q = ht.get("q{}".format(i - 1))
         if q:
@@ -703,7 +745,7 @@ def parse_sense_linkage(wxr:
         alt = None
         m = re.search(r"\(([^)]+)\)$", w)
         if m:
-            w = w[:m.start()].strip()
+            w = w[: m.start()].strip()
             alt = m.group(1)
 
         dt = {"word": w}
@@ -718,15 +760,15 @@ def parse_sense_linkage(wxr:
         data_append(data, field, dt)
 
 
-def parse_language(wxr: WiktextractContext,
-                   langnode: WikiNode,
-                   language: str,
-                   lang_code: str) -> list[WordData]:
+def parse_language(
+    wxr: WiktextractContext, langnode: WikiNode, language: str, lang_code: str
+) -> list[WordData]:
     """Iterates over the text of the page, returning words (parts-of-speech)
     defined on the page one at a time.  (Individual word senses for the
     same part-of-speech are typically encoded in the same entry.)"""
     # imported here to avoid circular import
     from wiktextract.pronunciations import parse_pronunciation
+
     assert isinstance(wxr, WiktextractContext)
     assert isinstance(langnode, WikiNode)
     assert isinstance(language, str)
@@ -737,85 +779,110 @@ def parse_language(wxr: WiktextractContext,
     word = wxr.wtp.title
     unsupported_prefix = "Unsupported titles/"
     if word.startswith(unsupported_prefix):
-        w = word[len(unsupported_prefix):]
+        w = word[len(unsupported_prefix) :]
         if w in unsupported_title_map:
             word = unsupported_title_map[w]
         else:
-            wxr.wtp.error("Unimplemented unsupported title: {}".format(word),
-                      sortid="page/870")
+            wxr.wtp.error(
+                "Unimplemented unsupported title: {}".format(word),
+                sortid="page/870",
+            )
             word = w
     elif word.startswith("Reconstruction:"):
-        word = word[word.find("/") + 1:]
+        word = word[word.find("/") + 1 :]
         is_reconstruction = True
 
-    base_data = {"word": word, "lang": language, "lang_code": lang_code}
+    base_data: WordData = {
+        "word": word,
+        "lang": language,
+        "lang_code": lang_code,
+    }
     if is_reconstruction:
         data_append(base_data, "tags", "reconstruction")
-    sense_data = {}
-    pos_data = {}  # For a current part-of-speech
-    etym_data = {}  # For one etymology
-    pos_datas = []
-    etym_datas = []
-    page_datas = []
+    sense_data: SenseData = {}
+    pos_data: WordData = {}  # For a current part-of-speech
+    etym_data: WordData = {}  # For one etymology
+    pos_datas: list[SenseData] = []
+    etym_datas: list[WordData] = []
+    page_datas: list[WordData] = []
     have_etym = False
-    stack = []
+    stack: list[str] = []  # names of items on the "stack"
 
-    def merge_base(data, base):
+    def merge_base(data: WordData, base: WordData) -> None:
         for k, v in base.items():
             # Copy the value to ensure that we don't share lists or
             # dicts between structures (even nested ones).
             v = copy.deepcopy(v)
             if k not in data:
                 # The list was copied above, so this will not create shared ref
-                data[k] = v
+                data[k] = v  # type: ignore[literal-required]
                 continue
-            if data[k] == v:
+            if data[k] == v:  # type: ignore[literal-required]
                 continue
-            if (isinstance(data[k], (list, tuple)) or
-                       isinstance(v, (list, tuple))):
-                data[k] = list(data[k]) + list(v)
-            elif data[k] != v:
-                wxr.wtp.warning("conflicting values for {} in merge_base: "
-                            "{!r} vs {!r}"
-                            .format(k, data[k], v),
-                            sortid="page/904")
-
-        def complementary_pop(pron, key):
+            if (
+                isinstance(data[k], (list, tuple))  # type: ignore[literal-required]
+                or isinstance(
+                    v,
+                    (list, tuple),  # Should this be "and"?
+                )
+            ):
+                data[k] = list(data[k]) + list(v)  # type: ignore
+            elif data[k] != v:  # type: ignore[literal-required]
+                wxr.wtp.warning(
+                    "conflicting values for {} in merge_base: "
+                    "{!r} vs {!r}".format(k, data[k], v),  # type: ignore[literal-required]
+                    sortid="page/904",
+                )
+
+        def complementary_pop(pron: SoundData, key: str) -> SoundData:
             """Remove unnecessary keys from dict values
             in a list comprehension..."""
             if key in pron:
-                pron.pop(key)
+                pron.pop(key)  # type: ignore
             return pron
 
         # If the result has sounds, eliminate sounds that have a prefix that
         # does not match "word" or one of "forms"
         if "sounds" in data and "word" in data:
             accepted = [data["word"]]
-            accepted.extend(f["form"] for f in data.get("forms", ()))
-            data["sounds"] = list(complementary_pop(s, "pos")
-                                  for s in data["sounds"]
-                                  if "form" not in s or s["form"] in accepted)
+            accepted.extend(f["form"] for f in data.get("forms", dict()))
+            data["sounds"] = list(
+                s
+                for s in data["sounds"]
+                if "form" not in s or s["form"] in accepted
+            )
         # If the result has sounds, eliminate sounds that have a pos that
         # does not match "pos"
         if "sounds" in data and "pos" in data:
-            data["sounds"] = list(s for s in data["sounds"]
-                                  if "pos" not in s or s["pos"] == data["pos"])
+            data["sounds"] = list(
+                complementary_pop(s, "pos")
+                for s in data["sounds"]
+                # "pos" is not a field of SoundData, correctly, so we're
+                # removing it here. It's a kludge on a kludge on a kludge.
+                if "pos" not in s or s["pos"] == data["pos"]  # type: ignore[typeddict-item]
+            )
 
-    def push_sense():
+    def push_sense() -> bool:
         """Starts collecting data for a new word sense.  This returns True
         if a sense was added."""
         nonlocal sense_data
         tags = sense_data.get("tags", ())
-        if (not sense_data.get("glosses") and
-            "translation-hub" not in tags and
-            "no-gloss" not in tags):
+        if (
+            not sense_data.get("glosses")
+            and "translation-hub" not in tags
+            and "no-gloss" not in tags
+        ):
             return False
 
-        if (("participle" in sense_data.get("tags", ()) or
-             "infinitive" in sense_data.get("tags", ())) and
-            "alt_of" not in sense_data and
-            "form_of" not in sense_data and
-            "etymology_text" in etym_data):
+        if (
+            (
+                "participle" in sense_data.get("tags", ())
+                or "infinitive" in sense_data.get("tags", ())
+            )
+            and "alt_of" not in sense_data
+            and "form_of" not in sense_data
+            and "etymology_text" in etym_data
+        ):
             etym = etym_data["etymology_text"]
             etym = etym.split(". ")[0]
             ret = parse_alt_or_inflection_of(wxr, etym, set())
@@ -829,28 +896,29 @@ def push_sense():
                     data_extend(sense_data, "alt_of", lst)
                     data_extend(sense_data, "tags", tags)
 
-        if (not sense_data.get("glosses") and
-            "no-gloss" not in sense_data.get("tags", ())):
+        if not sense_data.get("glosses") and "no-gloss" not in sense_data.get(
+            "tags", ()
+        ):
             data_append(sense_data, "tags", "no-gloss")
 
         pos_datas.append(sense_data)
         sense_data = {}
         return True
 
-    def push_pos():
+    def push_pos() -> None:
         """Starts collecting data for a new part-of-speech."""
         nonlocal pos_data
         nonlocal pos_datas
         push_sense()
         if wxr.wtp.subsection:
-            data = {"senses": pos_datas}
+            data: WordData = {"senses": pos_datas}
             merge_base(data, pos_data)
             etym_datas.append(data)
         pos_data = {}
         pos_datas = []
         wxr.wtp.start_subsection(None)
 
-    def push_etym():
+    def push_etym() -> None:
         """Starts collecting data for a new etymology."""
         nonlocal etym_data
         nonlocal etym_datas
@@ -863,7 +931,7 @@ def push_etym():
         etym_data = {}
         etym_datas = []
 
-    def select_data():
+    def select_data() -> WordData:
         """Selects where to store data (pos or etym) based on whether we
         are inside a pos (part-of-speech)."""
         if wxr.wtp.subsection is not None:
@@ -872,7 +940,9 @@ def select_data():
             return base_data
         return etym_data
 
-    def head_post_template_fn(name, ht, expansion):
+    def head_post_template_fn(
+        name: str, ht: TemplateArgs, expansion: str
+    ) -> Optional[str]:
         """Handles special templates in the head section of a word.  Head
         section is the text after part-of-speech subtitle and before word
         sense list. Typically it generates the bold line for the word, but
@@ -934,15 +1004,15 @@ def head_post_template_fn(name, ht, expansion):
 
         return None
 
-    def parse_part_of_speech(posnode, pos):
+    def parse_part_of_speech(posnode: WikiNode, pos: str) -> None:
         """Parses the subsection for a part-of-speech under a language on
         a page."""
         assert isinstance(posnode, WikiNode)
         assert isinstance(pos, str)
         # print("parse_part_of_speech", pos)
         pos_data["pos"] = pos
-        pre = [[]]  # list of lists
-        lists = [[]]  # list of lists
+        pre: list[list[Union[str, WikiNode]]] = [[]]  # list of lists
+        lists: list[list[WikiNode]] = [[]]  # list of lists
         first_para = True
         first_head_tmplt = True
         collecting_head = True
@@ -965,13 +1035,13 @@ def parse_part_of_speech(posnode, pos):
         floaters, poschildren = recursively_extract(
             posnode.children,
             lambda x: (
-                isinstance(x, WikiNode) and
-                x.kind == NodeKind.TEMPLATE and
-                x.largs[0][0] in FLOATING_TABLE_TEMPLATES
-            )
+                isinstance(x, WikiNode)
+                and x.kind == NodeKind.TEMPLATE
+                and x.largs[0][0] in FLOATING_TABLE_TEMPLATES
+            ),
         )
         tempnode = WikiNode(NodeKind.LEVEL5, 0)
-        tempnode.largs = ['Inflection']
+        tempnode.largs = [["Inflection"]]
         tempnode.children = floaters
         parse_inflection(tempnode, "Floating Div", pos)
         # print(poschildren)
@@ -981,12 +1051,12 @@ def parse_part_of_speech(posnode, pos):
             if not floaters:
                 wxr.wtp.debug(
                     "PoS section without contents",
-                    sortid="en/page/1051/20230612"
+                    sortid="en/page/1051/20230612",
                 )
             else:
                 wxr.wtp.debug(
                     "PoS section without contents except for a floating table",
-                    sortid="en/page/1056/20230612"
+                    sortid="en/page/1056/20230612",
                 )
             return
 
@@ -1019,16 +1089,19 @@ def parse_part_of_speech(posnode, pos):
             elif collecting_head and kind == NodeKind.LINK:
                 # We might collect relevant links as they are often pictures
                 # relating to the word
-                if (len(node.largs[0]) >= 1 and
-                   isinstance(node.largs[0][0], str)):
-                    if node.largs[0][0].startswith(ns_title_prefix_tuple(
-                                                        wxr, "Category")):
+                if len(node.largs[0]) >= 1 and isinstance(
+                    node.largs[0][0], str
+                ):
+                    if node.largs[0][0].startswith(
+                        ns_title_prefix_tuple(wxr, "Category")
+                    ):
                         # [[Category:...]]
                         # We're at the end of the file, probably, so stop
                         # here. Otherwise the head will get garbage.
                         break
-                    if node.largs[0][0].startswith(ns_title_prefix_tuple(
-                                                        wxr, "File")):
+                    if node.largs[0][0].startswith(
+                        ns_title_prefix_tuple(wxr, "File")
+                    ):
                         # Skips file links
                         continue
                 start_of_paragraph = False
@@ -1040,8 +1113,12 @@ def parse_part_of_speech(posnode, pos):
                         lists.append([])  # Lists parallels pre
                         collecting_head = True
                         start_of_paragraph = True
-                elif (collecting_head and
-                      node.sarg not in ("gallery", "ref", "cite", "caption")):
+                elif collecting_head and node.sarg not in (
+                    "gallery",
+                    "ref",
+                    "cite",
+                    "caption",
+                ):
                     start_of_paragraph = False
                     pre[-1].append(node)
                 else:
@@ -1061,21 +1138,23 @@ def parse_part_of_speech(posnode, pos):
                 # skip these templates; panel_templates is already used
                 # to skip certain templates else, but it also applies to
                 # head parsing quite well.
-                if is_panel_template(wxr, node.largs[0][0]):
+                # node.largs[0][0] should always be str, but can't type-check
+                # that.
+                if is_panel_template(wxr, node.largs[0][0]):  # type: ignore[arg-type]
                     continue
                 # skip these templates
                 # if node.largs[0][0] in skip_these_templates_in_head:
-                    # first_head_tmplt = False # no first_head_tmplt at all
-                    # start_of_paragraph = False
-                    # continue
+                # first_head_tmplt = False # no first_head_tmplt at all
+                # start_of_paragraph = False
+                # continue
 
                 if first_head_tmplt and pre[-1]:
                     first_head_tmplt = False
                     start_of_paragraph = False
                     pre[-1].append(node)
                 elif pre[-1] and start_of_paragraph:
-                    pre.append([]) # Switch to the next head
-                    lists.append([]) # lists parallel pre
+                    pre.append([])  # Switch to the next head
+                    lists.append([])  # lists parallel pre
                     collecting_head = True
                     start_of_paragraph = False
                     pre[-1].append(node)
@@ -1092,8 +1171,8 @@ def parse_part_of_speech(posnode, pos):
         # Clean up empty pairs, and fix messes with extra newlines that
         # separate templates that are followed by lists wiktextract issue #314
 
-        cleaned_pre = []
-        cleaned_lists = []
+        cleaned_pre: list[list[Union[str, WikiNode]]] = []
+        cleaned_lists: list[list[WikiNode]] = []
         pairless_pre_index = None
 
         for pre1, ls in zip(pre, lists):
@@ -1102,8 +1181,9 @@ def parse_part_of_speech(posnode, pos):
             if not pre1 and not ls:
                 # skip [] + []
                 continue
-            if not ls and all((isinstance(x, str) and not x.strip())
-                               for x in pre1):
+            if not ls and all(
+                (isinstance(x, str) and not x.strip()) for x in pre1
+            ):
                 # skip ["\n", " "] + []
                 continue
             if ls and not pre1:
@@ -1118,7 +1198,7 @@ def parse_part_of_speech(posnode, pos):
         lists = cleaned_lists
 
         there_are_many_heads = len(pre) > 1
-        header_tags = []
+        header_tags: list[str] = []
 
         if not any(g for g in lists):
             process_gloss_without_list(poschildren, pos, pos_data, header_tags)
@@ -1128,60 +1208,75 @@ def parse_part_of_speech(posnode, pos):
                 #     # don't have gloss list
                 #     # XXX add code here to filter out 'garbage', like text
                 #     # that isn't a head template or head.
-                    # continue
+                # continue
 
                 if all(not sl for sl in lists[i:]):
                     if i == 0:
                         if isinstance(node, str):
-                            wxr.wtp.debug("first head without list of senses,"
-                                      "string: '{}[...]', {}/{}".format(
-                                      node[:20], word, language),
-                                      sortid="page/1689/20221215")
+                            wxr.wtp.debug(
+                                "first head without list of senses,"
+                                "string: '{}[...]', {}/{}".format(
+                                    node[:20], word, language
+                                ),
+                                sortid="page/1689/20221215",
+                            )
                         if isinstance(node, WikiNode):
-                            if node.largs and node.largs[0][0] in ["Han char",]:
+                            if node.largs and node.largs[0][0] in [
+                                "Han char",
+                            ]:
                                 # just ignore these templates
                                 pass
                             else:
-                                wxr.wtp.debug("first head without "
-                                      "list of senses, "
-                                      "template node "
-                                      "{}, {}/{}".format(
-                                      node.largs, word, language),
-                                      sortid="page/1694/20221215")
+                                wxr.wtp.debug(
+                                    "first head without "
+                                    "list of senses, "
+                                    "template node "
+                                    "{}, {}/{}".format(
+                                        node.largs, word, language
+                                    ),
+                                    sortid="page/1694/20221215",
+                                )
                         else:
-                            wxr.wtp.debug("first head without list of senses, "
-                                      "{}/{}".format(
-                                      word, language),
-                                      sortid="page/1700/20221215")
+                            wxr.wtp.debug(
+                                "first head without list of senses, "
+                                "{}/{}".format(word, language),
+                                sortid="page/1700/20221215",
+                            )
                         # no break here so that the first head always
                         # gets processed.
                     else:
                         if isinstance(node, str):
-                            wxr.wtp.debug("later head without list of senses,"
-                                      "string: '{}[...]', {}/{}".format(
-                                      node[:20], word, language),
-                                      sortid="page/1708/20221215")
+                            wxr.wtp.debug(
+                                "later head without list of senses,"
+                                "string: '{}[...]', {}/{}".format(
+                                    node[:20], word, language
+                                ),
+                                sortid="page/1708/20221215",
+                            )
                         if isinstance(node, WikiNode):
-                            wxr.wtp.debug("later head without list of senses,"
-                                      "template node "
-                                      "{}, {}/{}".format(
-                                      node.sarg if node.sarg else node.largs,
-                                      word, language),
-                                      sortid="page/1713/20221215")
+                            wxr.wtp.debug(
+                                "later head without list of senses,"
+                                "template node "
+                                "{}, {}/{}".format(
+                                    node.sarg if node.sarg else node.largs,
+                                    word,
+                                    language,
+                                ),
+                                sortid="page/1713/20221215",
+                            )
                         else:
-                            wxr.wtp.debug("later head without list of senses, "
-                                      "{}/{}".format(
-                                      word, language),
-                                      sortid="page/1719/20221215")
+                            wxr.wtp.debug(
+                                "later head without list of senses, "
+                                "{}/{}".format(word, language),
+                                sortid="page/1719/20221215",
+                            )
                         break
                 head_group = i + 1 if there_are_many_heads else None
                 # print("parse_part_of_speech: {}: {}: pre={}"
-                      # .format(wxr.wtp.section, wxr.wtp.subsection, pre1))
-                process_gloss_header(pre1,
-                                    pos,
-                                    head_group,
-                                    pos_data,
-                                    header_tags)
+                # .format(wxr.wtp.section, wxr.wtp.subsection, pre1))
+                process_gloss_header(
+                    pre1, pos, head_group, pos_data, header_tags
+                )
                 for l in ls:
                     # Parse each list associated with this head.
                     for node in l.children:
@@ -1194,10 +1289,10 @@ def parse_part_of_speech(posnode, pos):
                         # the data is already pushed into a sub-gloss
                         # downstream, unless the higher level has examples
                         # that need to be put somewhere.
-                        common_data = {"tags": list(header_tags)}
+                        common_data: SenseData = {"tags": list(header_tags)}
                         if head_group:
                             common_data["head_nr"] = head_group
-                        parse_sense_node(node, common_data, pos)
+                        parse_sense_node(node, common_data, pos)  # type: ignore[arg-type]
 
         # If there are no senses extracted, add a dummy sense.  We want to
         # keep tags extracted from the head for the dummy sense.
@@ -1211,7 +1306,7 @@ def process_gloss_header(
         header_nodes: list[Union[WikiNode, str]],
         pos_type: str,
         header_group: Optional[int],
-        pos_data: dict,
+        pos_data: WordData,
         header_tags: list[str],
     ) -> None:
         ruby = []
@@ -1223,10 +1318,14 @@ def process_gloss_header(
                 exp.children,
                 lambda x: isinstance(x, WikiNode)
                 and x.kind == NodeKind.HTML
-                and x.sarg == "ruby"
+                and x.sarg == "ruby",
             )
             if rub is not None:
                 for r in rub:
+                    if TYPE_CHECKING:
+                        # we know the lambda above in recursively_extract
+                        # returns only WikiNodes in rub
+                        assert isinstance(r, WikiNode)
                     rt = parse_ruby(wxr, r)
                     if rt is not None:
                         ruby.append(rt)
@@ -1244,27 +1343,30 @@ def process_gloss_header(
             ruby=ruby,
         )
         if "tags" in pos_data:
-            header_tags[:] = pos_data["tags"]
-            del pos_data["tags"]
+            # pos_data can get "tags" data from some source; type-checkers
+            # doesn't like it, so let's ignore it.
+            header_tags[:] = pos_data["tags"]  # type: ignore[typeddict-item]
+            del pos_data["tags"]  # type: ignore[typeddict-item]
         else:
             header_tags.clear()
 
     def process_gloss_without_list(
         nodes: list[Union[WikiNode, str]],
         pos_type: str,
-        pos_data: dict,
+        pos_data: WordData,
         header_tags: list[str],
     ) -> None:
         # gloss text might not inside a list
-        header_nodes = []
-        gloss_nodes = []
+        header_nodes: list[Union[str, WikiNode]] = []
+        gloss_nodes: list[Union[str, WikiNode]] = []
         for node in strip_nodes(nodes):
             if isinstance(node, WikiNode):
                 if node.kind == NodeKind.TEMPLATE:
                     template_name = node.largs[0][0]
-                    if (
-                            template_name == "head"
-                            or template_name.startswith(f"{lang_code}-")
+                    if TYPE_CHECKING:
+                        assert isinstance(template_name, str)
+                    if template_name == "head" or template_name.startswith(
+                        f"{lang_code}-"
                     ):
                         header_nodes.append(node)
                         continue
@@ -1281,7 +1383,11 @@ def process_gloss_without_list(
                 gloss_nodes, pos_type, {"tags": list(header_tags)}
             )
 
-    def parse_sense_node(node, sense_base, pos):
+    def parse_sense_node(
+        node: Union[str, WikiNode],  # never receives str
+        sense_base: SenseData,
+        pos: str,
+    ) -> bool:
         """Recursively (depth first) parse LIST_ITEM nodes for sense data.
         Uses push_sense() to attempt adding data to pos_data in the scope
         of parse_language() when it reaches deep in the recursion. push_sense()
@@ -1292,14 +1398,18 @@ def parse_sense_node(node, sense_base, pos):
         """
         assert isinstance(sense_base, dict)  # Added to every sense deeper in
         if not isinstance(node, WikiNode):
-            wxr.wtp.debug("{}: parse_sense_node called with"
-                      "something that isn't a WikiNode".format(pos),
-                      sortid="page/1287/20230119")
+            # This doesn't seem to ever happen in practice.
+            wxr.wtp.debug(
+                "{}: parse_sense_node called with"
+                "something that isn't a WikiNode".format(pos),
+                sortid="page/1287/20230119",
+            )
             return False
 
         if node.kind != NodeKind.LIST_ITEM:
-            wxr.wtp.debug("{}: non-list-item inside list".format(pos),
-                      sortid="page/1678")
+            wxr.wtp.debug(
+                "{}: non-list-item inside list".format(pos), sortid="page/1678"
+            )
             return False
 
         if node.sarg == ":":
@@ -1315,7 +1425,7 @@ def parse_sense_node(node, sense_base, pos):
         # added |= push_sense() or added |= parse_sense_node(...) to OR.
         added = False
 
-        gloss_template_args = set()
+        gloss_template_args: set[str] = set()
 
         # For LISTs and LIST_ITEMS, their argument is something like
         # "##" or "##:", and using that we can rudimentally determine
@@ -1330,26 +1440,34 @@ def parse_sense_node(node, sense_base, pos):
         # of subglosses below this. The list's
         # argument ends with #, and its depth should
         # be bigger than parent node.
-        subentries = [x for x in children
-                    if isinstance(x, WikiNode) and
-                    x.kind == NodeKind.LIST and
-                    x.sarg == current_depth + "#"]
+        subentries = [
+            x
+            for x in children
+            if isinstance(x, WikiNode)
+            and x.kind == NodeKind.LIST
+            and x.sarg == current_depth + "#"
+        ]
 
         # sublists of examples and quotations. .sarg
         # does not end with "#".
-        others = [x for x in children
-                  if isinstance(x, WikiNode) and
-                  x.kind == NodeKind.LIST and
-                  x.sarg != current_depth + "#"]
+        others = [
+            x
+            for x in children
+            if isinstance(x, WikiNode)
+            and x.kind == NodeKind.LIST
+            and x.sarg != current_depth + "#"
+        ]
 
         # the actual contents of this particular node.
         # can be a gloss (or a template that expands into
         # many glosses which we can't easily pre-expand)
         # or could be an "outer gloss" with more specific
         # subglosses, or could be a qualfier for the subglosses.
-        contents = [x for x in children
-                 if not isinstance(x, WikiNode) or
-                 x.kind != NodeKind.LIST]
+        contents = [
+            x
+            for x in children
+            if not isinstance(x, WikiNode) or x.kind != NodeKind.LIST
+        ]
         # If this entry has sublists of entries, we should combine
         # gloss information from both the "outer" and sublist content.
         # Sometimes the outer gloss
@@ -1371,28 +1489,29 @@ def parse_sense_node(node, sense_base, pos):
                 # copy current node and modify it so it doesn't
                 # loop infinitely.
                 cropped_node = copy.copy(node)
-                cropped_node.children = [x for x in children
-                                        if not (isinstance(x, WikiNode) and
-                                                x.kind == NodeKind.LIST and
-                                                x.sarg == current_depth + "#")]
-                added |= parse_sense_node(cropped_node,
-                                          sense_base,
-                                          pos)
+                cropped_node.children = [
+                    x
+                    for x in children
+                    if not (
+                        isinstance(x, WikiNode)
+                        and x.kind == NodeKind.LIST
+                        and x.sarg == current_depth + "#"
+                    )
+                ]
+                added |= parse_sense_node(cropped_node, sense_base, pos)
                 nonlocal sense_data  # this kludge causes duplicated raw_
-                                     # glosses data if this is not done;
-                                     # if the top-level (cropped_node)
-                                     # does not push_sense() properly or
-                                     # parse_sense_node() returns early,
-                                     # sense_data is not reset. This happens
-                                     # for example when you have a no-gloss
-                                     # string like "(intransitive)":
-                                     # no gloss, push_sense() returns early
-                                     # and sense_data has duplicate data with
-                                     # sense_base
+                # glosses data if this is not done;
+                # if the top-level (cropped_node)
+                # does not push_sense() properly or
+                # parse_sense_node() returns early,
+                # sense_data is not reset. This happens
+                # for example when you have a no-gloss
+                # string like "(intransitive)":
+                # no gloss, push_sense() returns early
+                # and sense_data has duplicate data with
+                # sense_base
                 sense_data = {}
-                added |= parse_sense_node(slc[0],
-                                          sense_base,
-                                          pos)
+                added |= parse_sense_node(slc[0], sense_base, pos)
                 return added
 
         return process_gloss_contents(
@@ -1408,7 +1527,7 @@ def parse_sense_node(node, sense_base, pos):
     def process_gloss_contents(
         contents: list[Union[str, WikiNode]],
         pos: str,
-        sense_base: dict,
+        sense_base: SenseData,
         subentries: list[WikiNode] = [],
         others: list[WikiNode] = [],
         gloss_template_args: Set[str] = set(),
@@ -1430,8 +1549,7 @@ def sense_template_fn(
                 arg = clean_node(wxr, sense_base, ht.get(2, ()))
                 if re.match(r"Q\d+$", arg):
                     data_append(sense_base, "wikidata", arg)
-                data_append(sense_base, "senseid",
-                            langid + ":" + arg)
+                data_append(sense_base, "senseid", langid + ":" + arg)
             if name in sense_linkage_templates:
                 # print(f"SENSE_TEMPLATE_FN: {name}")
                 parse_sense_linkage(wxr, sense_base, name, ht)
@@ -1470,7 +1588,7 @@ def sense_template_fn(
                 if is_gloss:
                     wxr.wtp.warning(
                         "Example template is used for gloss text",
-                        sortid="extractor.en.page.sense_template_fn/1415"
+                        sortid="extractor.en.page.sense_template_fn/1415",
                     )
                 else:
                     return ""
@@ -1483,7 +1601,7 @@ def sense_template_fn(
                     gloss_template_args.add(v)
             return None
 
-        def extract_link_texts(item):
+        def extract_link_texts(item: GeneralNode) -> None:
             """Recursively extracts link texts from the gloss source.  This
             information is used to select whether to remove final "." from
             form_of/alt_of (e.g., ihm/Hunsrik)."""
@@ -1504,8 +1622,11 @@ def extract_link_texts(item):
                 return
             if item.kind == NodeKind.LINK:
                 v = item.largs[-1]
-                if (isinstance(v, list) and len(v) == 1 and
-                    isinstance(v[0], str)):
+                if (
+                    isinstance(v, list)
+                    and len(v) == 1
+                    and isinstance(v[0], str)
+                ):
                     gloss_template_args.add(v[0].strip())
             for x in item.children:
                 extract_link_texts(x)
@@ -1514,11 +1635,16 @@ def extract_link_texts(item):
 
         # get the raw text of non-list contents of this node, and other stuff
         # like tag and category data added to sense_base
+        # cast = no-op type-setter for the type-checker
+        partial_template_fn = cast(
+            TemplateFnCallable,
+            partial(sense_template_fn, is_gloss=True),
+        )
         rawgloss = clean_node(
             wxr,
             sense_base,
             contents,
-            template_fn=partial(sense_template_fn, is_gloss=True),
+            template_fn=partial_template_fn,
             collect_links=True,
         )
 
@@ -1542,7 +1668,7 @@ def extract_link_texts(item):
         strip_ends = [", particularly:"]
         for x in strip_ends:
             if rawgloss.endswith(x):
-                rawgloss = rawgloss[:-len(x)]
+                rawgloss = rawgloss[: -len(x)]
                 break
 
         # The gloss could contain templates that produce more list items.
@@ -1562,19 +1688,19 @@ def extract_link_texts(item):
         if rawgloss and rawgloss not in sense_base.get("raw_glosses", ()):
             data_append(sense_base, "raw_glosses", subglosses[1])
         m = re.match(r"\(([^()]+)\):?\s*", rawgloss)
-                    # ( ..\1.. ): ... or ( ..\1.. ) ...
+        # ( ..\1.. ): ... or ( ..\1.. ) ...
         if m:
             q = m.group(1)
-            rawgloss = rawgloss[m.end():].strip()
+            rawgloss = rawgloss[m.end() :].strip()
             parse_sense_qualifier(wxr, q, sense_base)
         if rawgloss == "A pejorative:":
             data_append(sense_base, "tags", "pejorative")
-            rawgloss = None
+            rawgloss = ""
         elif rawgloss == "Short forms.":
             data_append(sense_base, "tags", "abbreviation")
-            rawgloss = None
+            rawgloss = ""
         elif rawgloss == "Technical or specialized senses.":
-            rawgloss = None
+            rawgloss = ""
         if rawgloss:
             data_append(sense_base, "glosses", rawgloss)
             if rawgloss in ("A person:",):
@@ -1583,15 +1709,20 @@ def extract_link_texts(item):
         # The main recursive call (except for the exceptions at the
         # start of this function).
         for sublist in subentries:
-            if not (isinstance(sublist, WikiNode) and
-                    sublist.kind == NodeKind.LIST):
-                wxr.wtp.debug(f"'{repr(rawgloss[:20])}.' gloss has `subentries`"
-                          f"with items that are not LISTs",
-                          sortid="page/1511/20230119")
+            if not (
+                isinstance(sublist, WikiNode) and sublist.kind == NodeKind.LIST
+            ):
+                wxr.wtp.debug(
+                    f"'{repr(rawgloss[:20])}.' gloss has `subentries`"
+                    f"with items that are not LISTs",
+                    sortid="page/1511/20230119",
+                )
                 continue
             for item in sublist.children:
-                if not (isinstance(item, WikiNode) and
-                        item.kind == NodeKind.LIST_ITEM):
+                if not (
+                    isinstance(item, WikiNode)
+                    and item.kind == NodeKind.LIST_ITEM
+                ):
                     continue
                 # copy sense_base to prevent cross-contamination between
                 # subglosses and other subglosses and superglosses
@@ -1611,20 +1742,22 @@ def extract_link_texts(item):
         if added:
             if examples:
                 # this higher-up gloss has examples that we do not want to skip
-                wxr.wtp.debug("'{}[...]' gloss has examples we want to keep, "
-                          "but there are subglosses."
-                          .format(repr(rawgloss[:30])),
-                          sortid="page/1498/20230118")
+                wxr.wtp.debug(
+                    "'{}[...]' gloss has examples we want to keep, "
+                    "but there are subglosses.".format(repr(rawgloss[:30])),
+                    sortid="page/1498/20230118",
+                )
             else:
                 return True
 
         # Some entries, e.g., "iacebam", have weird sentences in quotes
         # after the gloss, but these sentences don't seem to be intended
         # as glosses.  Skip them.
-        subglosses = list(gl for gl in subglosses
-                          if gl.strip() and
-                          not re.match(r'\s*(\([^)]*\)\s*)?"[^"]*"\s*$',
-                                       gl))
+        subglosses = list(
+            gl
+            for gl in subglosses
+            if gl.strip() and not re.match(r'\s*(\([^)]*\)\s*)?"[^"]*"\s*$', gl)
+        )
 
         if len(subglosses) > 1 and "form_of" not in sense_base:
             gl = subglosses[0].strip()
@@ -1633,8 +1766,7 @@ def extract_link_texts(item):
             parsed = parse_alt_or_inflection_of(wxr, gl, gloss_template_args)
             if parsed is not None:
                 infl_tags, infl_dts = parsed
-                if (infl_dts and "form-of" in infl_tags and
-                    len(infl_tags) == 1):
+                if infl_dts and "form-of" in infl_tags and len(infl_tags) == 1:
                     # Interpret others as a particular form under
                     # "inflection of"
                     data_extend(sense_base, "tags", infl_tags)
@@ -1677,13 +1809,13 @@ def extract_link_texts(item):
                         data_extend(sense_data, k, v)
                 else:
                     assert k not in ("tags", "categories", "topics")
-                    sense_data[k] = v
+                    sense_data[k] = v  # type:ignore[literal-required]
             # Parse the gloss for this particular sense
             m = re.match(r"^\((([^()]|\([^()]*\))*)\):?\s*", gloss)
-                        # (...): ... or (...(...)...): ...
+            # (...): ... or (...(...)...): ...
             if m:
                 parse_sense_qualifier(wxr, m.group(1), sense_data)
-                gloss = gloss[m.end():].strip()
+                gloss = gloss[m.end() :].strip()
 
             # Remove common suffix "[from 14th c.]" and similar
             gloss = re.sub(r"\s\[[^]]*\]\s*$", "", gloss)
@@ -1691,12 +1823,15 @@ def extract_link_texts(item):
             # Check to make sure we don't have unhandled list items in gloss
             ofs = max(gloss.find("#"), gloss.find("* "))
             if ofs > 10 and "(#)" not in gloss:
-                wxr.wtp.debug("gloss may contain unhandled list items: {}"
-                          .format(gloss),
-                          sortid="page/1412")
+                wxr.wtp.debug(
+                    "gloss may contain unhandled list items: {}".format(gloss),
+                    sortid="page/1412",
+                )
             elif "\n" in gloss:
-                wxr.wtp.debug("gloss contains newline: {}".format(gloss),
-                          sortid="page/1416")
+                wxr.wtp.debug(
+                    "gloss contains newline: {}".format(gloss),
+                    sortid="page/1416",
+                )
 
             # Kludge, some glosses have a comma after initial qualifiers in
             # parentheses
@@ -1706,7 +1841,7 @@ def extract_link_texts(item):
             if gloss.endswith(":"):
                 gloss = gloss[:-1].strip()
             if gloss.startswith("N. of "):
-                gloss = "Name of " +  gloss[6:]
+                gloss = "Name of " + gloss[6:]
             if gloss.startswith("†"):
                 data_append(sense_data, "tags", "obsolete")
                 gloss = gloss[1:]
@@ -1729,16 +1864,19 @@ def extract_link_texts(item):
                 if tag not in sense_tags:
                     data_append(sense_data, "tags", tag)
             if countability_tags:
-                if ("countable" not in sense_tags and
-                    "uncountable" not in sense_tags):
+                if (
+                    "countable" not in sense_tags
+                    and "uncountable" not in sense_tags
+                ):
                     data_extend(sense_data, "tags", countability_tags)
 
             # If outer gloss specifies a form-of ("inflection of", see
             # aquamarine/German), try to parse the inner glosses as
             # tags for an inflected form.
             if "form-of" in sense_base.get("tags", ()):
-                parsed = parse_alt_or_inflection_of(wxr, gloss,
-                                                    gloss_template_args)
+                parsed = parse_alt_or_inflection_of(
+                    wxr, gloss, gloss_template_args
+                )
                 if parsed is not None:
                     infl_tags, infl_dts = parsed
                     if not infl_dts and infl_tags:
@@ -1758,18 +1896,23 @@ def extract_link_texts(item):
             split_glosses = []
             for m in re.finditer(r"Abbreviation of ", gloss):
                 if m.start() != position:
-                    split_glosses.append(gloss[position: m.start()])
+                    split_glosses.append(gloss[position : m.start()])
                     position = m.start()
             split_glosses.append(gloss[position:])
             for gloss in split_glosses:
                 # Check if this gloss describes an alt-of or inflection-of
-                if (lang_code != "en" and " " not in gloss and distw([word], gloss) < 0.3):
+                if (
+                    lang_code != "en"
+                    and " " not in gloss
+                    and distw([word], gloss) < 0.3
+                ):
                     # Don't try to parse gloss if it is one word
                     # that is close to the word itself for non-English words
                     # (probable translations of a tag/form name)
                     continue
-                parsed = parse_alt_or_inflection_of(wxr, gloss,
-                                                    gloss_template_args)
+                parsed = parse_alt_or_inflection_of(
+                    wxr, gloss, gloss_template_args
+                )
                 if parsed is None:
                     continue
                 tags, dts = parsed
@@ -1797,7 +1940,7 @@ def extract_link_texts(item):
                         data_append(sense_data, "form_of", dt)
 
         if len(sense_data) == 0:
-            if len(sense_base.get("tags")) == 0:
+            if len(sense_base.get("tags", [])) == 0:
                 del sense_base["tags"]
             sense_data.update(sense_base)
         if push_sense():
@@ -1806,7 +1949,9 @@ def extract_link_texts(item):
             # print("PARSE_SENSE DONE:", pos_datas[-1])
         return added
 
-    def parse_inflection(node, section, pos):
+    def parse_inflection(
+        node: WikiNode, section: str, pos: Optional[str]
+    ) -> None:
         """Parses inflection data (declension, conjugation) from the given
         page.  This retrieves the actual inflection template
         parameters, which are very useful for applications that need
@@ -1818,11 +1963,14 @@ def parse_inflection(node, section, pos):
         # print("parse_inflection:", node)
 
         if pos is None:
-            wxr.wtp.debug("inflection table outside part-of-speech",
-                      sortid="page/1812")
+            wxr.wtp.debug(
+                "inflection table outside part-of-speech", sortid="page/1812"
+            )
             return
 
-        def inflection_template_fn(name, ht):
+        def inflection_template_fn(
+            name: str, ht: TemplateArgs
+        ) -> Optional[str]:
             # print("decl_conj_template_fn", name, ht)
             if is_panel_template(wxr, name):
                 return ""
@@ -1830,8 +1978,11 @@ def inflection_template_fn(name, ht):
                 # These are not to be captured as an exception to the
                 # generic code below
                 return None
-            m = re.search(r"-(conj|decl|ndecl|adecl|infl|conjugation|"
-                          r"declension|inflection|mut|mutation)($|-)", name)
+            m = re.search(
+                r"-(conj|decl|ndecl|adecl|infl|conjugation|"
+                r"declension|inflection|mut|mutation)($|-)",
+                name,
+            )
             if m:
                 args_ht = clean_template_args(wxr, ht)
                 dt = {"name": name, "args": args_ht}
@@ -1844,7 +1995,7 @@ def inflection_template_fn(name, ht):
         text = wxr.wtp.node_to_wikitext(node.children)
 
         # Split text into separate sections for each to-level template
-        brace_matches = re.split("({{+|}}+)", text) # ["{{", "template", "}}"]
+        brace_matches = re.split("({{+|}}+)", text)  # ["{{", "template", "}}"]
         template_sections = []
         template_nesting = 0  # depth of SINGLE BRACES { { nesting } }
         # Because there is the possibility of triple curly braces
@@ -1860,16 +2011,15 @@ def inflection_template_fn(name, ht):
         # print(text)
         # print(repr(brace_matches))
         if len(brace_matches) > 1:
-            tsection = []
+            tsection: list[str] = []
             after_templates = False  # kludge to keep any text
-                                     # before first template
-                                     # with the first template;
-                                     # otherwise, text
-                                     # goes with preceding template
+            # before first template
+            # with the first template;
+            # otherwise, text
+            # goes with preceding template
             for m in brace_matches:
                 if m.startswith("{{"):
-                    if (template_nesting == 0 and
-                        after_templates):
+                    if template_nesting == 0 and after_templates:
                         template_sections.append(tsection)
                         tsection = []
                         # start new section
@@ -1879,12 +2029,13 @@ def inflection_template_fn(name, ht):
                 elif m.startswith("}}"):
                     template_nesting -= len(m)
                     if template_nesting < 0:
-                        wxr.wtp.error("Negatively nested braces, "
-                                  "couldn't split inflection templates, "
-                                  "{}/{} section {}"
-                                  .format(word, language, section),
-                                  sortid="page/1871")
-                        template_sections = [] # use whole text
+                        wxr.wtp.error(
+                            "Negatively nested braces, "
+                            "couldn't split inflection templates, "
+                            "{}/{} section {}".format(word, language, section),
+                            sortid="page/1871",
+                        )
+                        template_sections = []  # use whole text
                         break
                     tsection.append(m)
                 else:
@@ -1904,16 +2055,20 @@ def inflection_template_fn(name, ht):
             for tsection in template_sections:
                 texts.append("".join(tsection))
         if template_nesting != 0:
-            wxr.wtp.error("Template nesting error: "
-                      "template_nesting = {} "
-                      "couldn't split inflection templates, "
-                      "{}/{} section {}"
-                      .format(template_nesting, word, language, section),
-                      sortid="page/1896")
+            wxr.wtp.error(
+                "Template nesting error: "
+                "template_nesting = {} "
+                "couldn't split inflection templates, "
+                "{}/{} section {}".format(
+                    template_nesting, word, language, section
+                ),
+                sortid="page/1896",
+            )
             texts = [text]
         for text in texts:
-            tree = wxr.wtp.parse(text, expand_all=True,
-                             template_fn=inflection_template_fn)
+            tree = wxr.wtp.parse(
+                text, expand_all=True, template_fn=inflection_template_fn
+            )
 
             # Parse inflection tables from the section.  The data is stored
             # under "forms".
@@ -1924,12 +2079,20 @@ def inflection_template_fn(name, ht):
                     template_name = m.group(1)
                     tablecontext = TableContext(template_name)
 
-                parse_inflection_section(wxr, pos_data,
-                                         word, language,
-                                         pos, section, tree,
-                                         tablecontext=tablecontext)
+                parse_inflection_section(
+                    wxr,
+                    pos_data,
+                    word,
+                    language,
+                    pos,
+                    section,
+                    tree,
+                    tablecontext=tablecontext,
+                )
 
-    def get_subpage_section(title, subtitle, seq):
+    def get_subpage_section(
+        title: str, subtitle: str, seq: Union[list[str], tuple[str, ...]]
+    ) -> Optional[Union[WikiNode, str]]:
         """Loads a subpage of the given page, and finds the section
         for the given language, part-of-speech, and section title.  This
         is used for finding translations and other sections on subpages."""
@@ -1942,11 +2105,16 @@ def get_subpage_section(title, subtitle, seq):
         subpage_title = word + "/" + subtitle
         subpage_content = wxr.wtp.get_page_body(subpage_title, 0)
         if subpage_content is None:
-            wxr.wtp.error("/translations not found despite "
-                      "{{see translation subpage|...}}",
-                      sortid="page/1934")
+            wxr.wtp.error(
+                "/translations not found despite "
+                "{{see translation subpage|...}}",
+                sortid="page/1934",
+            )
+            return None
 
-        def recurse(node, seq):
+        def recurse(
+            node: Union[str, WikiNode], seq: Union[list[str], tuple[str, ...]]
+        ) -> Optional[Union[str, WikiNode]]:
             # print(f"seq: {seq}")
             if not seq:
                 return node
@@ -1970,17 +2138,22 @@ def recurse(node, seq):
             subpage_content,
             pre_expand=True,
             additional_expand=ADDITIONAL_EXPAND_TEMPLATES,
-            do_not_pre_expand=DO_NOT_PRE_EXPAND_TEMPLATES
+            do_not_pre_expand=DO_NOT_PRE_EXPAND_TEMPLATES,
         )
         assert tree.kind == NodeKind.ROOT
         ret = recurse(tree, seq)
         if ret is None:
-            wxr.wtp.debug("Failed to find subpage section {}/{} seq {}"
-                      .format(title, subtitle, seq),
-                      sortid="page/1963")
+            wxr.wtp.debug(
+                "Failed to find subpage section {}/{} seq {}".format(
+                    title, subtitle, seq
+                ),
+                sortid="page/1963",
+            )
         return ret
 
-    def parse_linkage(data, field, linkagenode):
+    def parse_linkage(
+        data: WordData, field: str, linkagenode: WikiNode
+    ) -> None:
         assert isinstance(data, dict)
         assert isinstance(field, str)
         assert isinstance(linkagenode, WikiNode)
@@ -1995,7 +2168,11 @@ def parse_linkage(data, field, linkagenode):
         toplevel_text = []
         next_navframe_sense = None  # Used for "(sense):" before NavFrame
 
-        def parse_linkage_item(contents, field, sense):
+        def parse_linkage_item(
+            contents: list[Union[str, WikiNode]],
+            field: str,
+            sense: Optional[str] = None,
+        ):
             assert isinstance(contents, (list, tuple))
             assert isinstance(field, str)
             assert sense is None or isinstance(sense, str)
@@ -2003,11 +2180,13 @@ def parse_linkage_item(contents, field, sense):
             # print("PARSE_LINKAGE_ITEM: {} ({}): {}"
             #    .format(field, sense, contents))
 
-            parts = []
-            ruby = []
-            urls = []
+            parts: list[str] = []
+            ruby: list[tuple[str, str]] = []
+            urls: list[str] = []
 
-            def item_recurse(contents, italic=False):
+            def item_recurse(
+                contents: list[Union[str, WikiNode]], italic=False
+            ) -> None:
                 assert isinstance(contents, (list, tuple))
                 nonlocal sense
                 nonlocal ruby
@@ -2022,24 +2201,34 @@ def item_recurse(contents, italic=False):
                     #        node.sarg if node.sarg else node.largs)
                     if kind == NodeKind.LIST:
                         if parts:
+                            sense1: Optional[str]
                             sense1 = clean_node(wxr, None, parts)
                             if sense1.endswith(":"):
                                 sense1 = sense1[:-1].strip()
                             if sense1.startswith("(") and sense1.endswith(")"):
                                 sense1 = sense1[1:-1].strip()
-                            if sense1.lower() == wxr.config.OTHER_SUBTITLES["translations"]:
+                            if (
+                                sense1.lower()
+                                == wxr.config.OTHER_SUBTITLES["translations"]
+                            ):
                                 sense1 = None
                             # print("linkage item_recurse LIST sense1:", sense1)
-                            parse_linkage_recurse(node.children, field,
-                                                  sense=sense1 or sense)
+                            parse_linkage_recurse(
+                                node.children, field, sense=sense1 or sense
+                            )
                             parts = []
                         else:
                             parse_linkage_recurse(node.children, field, sense)
-                    elif kind in (NodeKind.TABLE, NodeKind.TABLE_ROW,
-                                  NodeKind.TABLE_CELL):
+                    elif kind in (
+                        NodeKind.TABLE,
+                        NodeKind.TABLE_ROW,
+                        NodeKind.TABLE_CELL,
+                    ):
                         parse_linkage_recurse(node.children, field, sense)
-                    elif kind in (NodeKind.TABLE_HEADER_CELL,
-                                  NodeKind.TABLE_CAPTION):
+                    elif kind in (
+                        NodeKind.TABLE_HEADER_CELL,
+                        NodeKind.TABLE_CAPTION,
+                    ):
                         continue
                     elif kind == NodeKind.HTML:
                         classes = (node.attrs.get("class") or "").split()
@@ -2065,37 +2254,42 @@ def item_recurse(contents, italic=False):
                     elif kind == NodeKind.LINK:
                         ignore = False
                         if isinstance(node.largs[0][0], str):
-                            v = node.largs[0][0].strip().lower()
-                            if v.startswith(ns_title_prefix_tuple(wxr,
-                                                            "Category", True) \
-                                            + ns_title_prefix_tuple(wxr,
-                                                            "File", True)):
+                            v1 = node.largs[0][0].strip().lower()
+                            if v1.startswith(
+                                ns_title_prefix_tuple(wxr, "Category", True)
+                                + ns_title_prefix_tuple(wxr, "File", True)
+                            ):
                                 ignore = True
                             if not ignore:
                                 v = node.largs[-1]
-                                if (len(node.largs) == 1 and
-                                    len(v) > 0 and
-                                    isinstance(v[0], str) and
-                                    v[0][0] == ":"):
-                                    v = [v[0][1:]] + list(v[1:])
+                                if (
+                                    len(node.largs) == 1
+                                    and len(v) > 0
+                                    and isinstance(v[0], str)
+                                    and v[0][0] == ":"
+                                ):
+                                    v = [v[0][1:]] + list(v[1:])  # type:ignore
                                 item_recurse(v, italic=italic)
                     elif kind == NodeKind.URL:
                         if len(node.largs) < 2 and node.largs:
                             # Naked url captured
-                            urls.extend(node.largs[-1])
+                            urls.extend(node.largs[-1])  # type:ignore[arg-type]
                             continue
                         if len(node.largs) == 2:
                             # Url from link with text
-                            urls.append(node.largs[0][-1])
+                            urls.append(node.largs[0][-1])  # type:ignore[arg-type]
                         # print(f"{node.largs=!r}")
                         # print("linkage recurse URL {}".format(node))
                         item_recurse(node.largs[-1], italic=italic)
                     elif kind in (NodeKind.PREFORMATTED, NodeKind.BOLD):
                         item_recurse(node.children, italic=italic)
                     else:
-                        wxr.wtp.debug("linkage item_recurse unhandled {}: {}"
-                                  .format(node.kind, node),
-                                  sortid="page/2073")
+                        wxr.wtp.debug(
+                            "linkage item_recurse unhandled {}: {}".format(
+                                node.kind, node
+                            ),
+                            sortid="page/2073",
+                        )
 
             # print("LINKAGE CONTENTS BEFORE ITEM_RECURSE: {!r}"
             #       .format(contents))
@@ -2105,48 +2299,18 @@ def item_recurse(contents, italic=False):
             # print("CLEANED ITEM: {!r}".format(item))
             # print(f"URLS {urls=!r}")
 
-            return parse_linkage_item_text(wxr, word, data, field, item,
-                                           sense, ruby, pos_datas,
-                                           is_reconstruction, urls)
-
-        def parse_linkage_template(node):
-            nonlocal have_panel_template
-            # XXX remove this function but check how to handle the
-            # template_linkage_mappings
-            # print("LINKAGE TEMPLATE:", node)
-
-            def linkage_template_fn(name, ht):
-                # print("LINKAGE_TEMPLATE_FN:", name, ht)
-                nonlocal field
-                nonlocal have_panel_template
-                if is_panel_template(wxr, name):
-                    have_panel_template = True
-                    return ""
-                for prefix, t in template_linkage_mappings:
-                    if re.search(r"(^|[-/\s]){}($|\b|[0-9])".format(prefix),
-                                 name):
-                        f = t if isinstance(t, str) else field
-                        if (name.endswith("-top") or name.endswith("-bottom") or
-                            name.endswith("-mid")):
-                            field = f
-                            return ""
-                        i = t if isinstance(t, int) else 2
-                        while True:
-                            v = ht.get(i, None)
-                            if v is None:
-                                break
-                            v = clean_node(wxr, None, v)
-                            parse_linkage_item(v, f)
-                            i += 1
-                        return ""
-                # print("UNHANDLED LINKAGE TEMPLATE:", name, ht)
-                return None
-
-            # Main body of parse_linkage_template()
-            text = wxr.wtp.node_to_wikitext(node)
-            parsed = wxr.wtp.parse(text, expand_all=True,
-                               template_fn=linkage_template_fn)
-            parse_linkage_recurse(parsed.children, field, None)
+            return parse_linkage_item_text(
+                wxr,
+                word,
+                data,
+                field,
+                item,
+                sense,
+                ruby,
+                pos_datas,
+                is_reconstruction,
+                urls,
+            )
 
         def parse_linkage_recurse(contents, field, sense):
             assert isinstance(contents, (list, tuple))
@@ -2177,9 +2341,12 @@ def parse_linkage_recurse(contents, field, sense):
                     parse_linkage_recurse(node.children, field, sense)
                 elif kind == NodeKind.TABLE_CELL:
                     parse_linkage_item(node.children, field, sense)
-                elif kind in (NodeKind.TABLE_CAPTION,
-                              NodeKind.TABLE_HEADER_CELL,
-                              NodeKind.PREFORMATTED, NodeKind.BOLD):
+                elif kind in (
+                    NodeKind.TABLE_CAPTION,
+                    NodeKind.TABLE_HEADER_CELL,
+                    NodeKind.PREFORMATTED,
+                    NodeKind.BOLD,
+                ):
                     continue
                 elif kind == NodeKind.HTML:
                     # Recurse to process inside the HTML for most tags
@@ -2196,16 +2363,18 @@ def parse_linkage_recurse(contents, field, sense):
                         if sense1.endswith(":"):
                             sense1 = sense1[:-1].strip()
                         if sense and sense1:
-                            wxr.wtp.debug("linkage qualifier-content on multiple "
-                                      "levels: {!r} and {!r}"
-                                      .format(sense, sense1),
-                                      sortid="page/2170")
+                            wxr.wtp.debug(
+                                "linkage qualifier-content on multiple "
+                                "levels: {!r} and {!r}".format(sense, sense1),
+                                sortid="page/2170",
+                            )
                         parse_linkage_recurse(node.children, field, sense1)
                     elif "NavFrame" in classes:
                         # NavFrame uses previously assigned next_navframe_sense
                         # (from a "(sense):" item) and clears it afterwards
-                        parse_linkage_recurse(node.children, field,
-                                              sense or next_navframe_sense)
+                        parse_linkage_recurse(
+                            node.children, field, sense or next_navframe_sense
+                        )
                         next_navframe_sense = None
                     else:
                         parse_linkage_recurse(node.children, field, sense)
@@ -2222,9 +2391,12 @@ def parse_linkage_recurse(contents, field, sense):
                     # initial value
                     parse_linkage_recurse(node.largs[-1], field, sense)
                 else:
-                    wxr.wtp.debug("parse_linkage_recurse unhandled {}: {}"
-                              .format(kind, node),
-                              sortid="page/2196")
+                    wxr.wtp.debug(
+                        "parse_linkage_recurse unhandled {}: {}".format(
+                            kind, node
+                        ),
+                        sortid="page/2196",
+                    )
 
         def linkage_template_fn1(name, ht):
             nonlocal have_panel_template
@@ -2239,10 +2411,14 @@ def parse_zh_synonyms(parsed, data, hdrs, root_word):
                 if isinstance(item, WikiNode):
                     if item.kind == NodeKind.TABLE_ROW:
                         cleaned = clean_node(wxr, None, item.children)
-                        #print("cleaned:", repr(cleaned))
-                        if any(["Variety" in cleaned,
-                               "Location" in cleaned,
-                               "Words" in cleaned]):
+                        # print("cleaned:", repr(cleaned))
+                        if any(
+                            [
+                                "Variety" in cleaned,
+                                "Location" in cleaned,
+                                "Words" in cleaned,
+                            ]
+                        ):
                             pass
                         else:
                             split = cleaned.split("\n")
@@ -2268,11 +2444,15 @@ def parse_zh_synonyms(parsed, data, hdrs, root_word):
                                         if tag in zh_tag_lookup:
                                             tags.extend(zh_tag_lookup[tag])
                                         else:
-                                            print(f"MISSING ZH SYNONYM TAG for root {root_word}, word {words}: {tag}")
+                                            print(
+                                                f"MISSING ZH SYNONYM TAG for root {root_word}, word {words}: {tag}"
+                                            )
                                             sys.stdout.flush()
 
                             for word in words:
-                                data.append({"word": word.strip(), "tags": tags})
+                                data.append(
+                                    {"word": word.strip(), "tags": tags}
+                                )
                     elif item.kind == NodeKind.HTML:
                         cleaned = clean_node(wxr, None, item.children)
                         if "Synonyms of" in cleaned:
@@ -2288,10 +2468,14 @@ def parse_zh_synonyms_list(parsed, data, hdrs, root_word):
                 if isinstance(item, WikiNode):
                     if item.kind == NodeKind.LIST_ITEM:
                         cleaned = clean_node(wxr, None, item.children)
-                        #print("cleaned:", repr(cleaned))
-                        if any(["Variety" in cleaned,
-                               "Location" in cleaned,
-                               "Words" in cleaned]):
+                        # print("cleaned:", repr(cleaned))
+                        if any(
+                            [
+                                "Variety" in cleaned,
+                                "Location" in cleaned,
+                                "Words" in cleaned,
+                            ]
+                        ):
                             pass
                         else:
                             cleaned = cleaned.replace("(", ",")
@@ -2309,11 +2493,15 @@ def parse_zh_synonyms_list(parsed, data, hdrs, root_word):
                                     tags.append(tag)
                                 elif tag in zh_tag_lookup:
                                     tags.extend(zh_tag_lookup[tag])
-                                elif classify_desc(tag) == "romanization" \
-                                        and roman is None:
+                                elif (
+                                    classify_desc(tag) == "romanization"
+                                    and roman is None
+                                ):
                                     roman = tag
                                 else:
-                                    print(f"MISSING ZH SYNONYM TAG (possibly pinyin) - root {root_word}, word {words}: {tag}")
+                                    print(
+                                        f"MISSING ZH SYNONYM TAG (possibly pinyin) - root {root_word}, word {words}: {tag}"
+                                    )
                                     sys.stdout.flush()
 
                             for word in words:
@@ -2328,9 +2516,13 @@ def parse_zh_synonyms_list(parsed, data, hdrs, root_word):
                         if cleaned.find("Synonyms of") >= 0:
                             cleaned = cleaned.replace("Synonyms of ", "")
                             root_word = cleaned
-                        parse_zh_synonyms_list(item.children, data, hdrs, root_word)
+                        parse_zh_synonyms_list(
+                            item.children, data, hdrs, root_word
+                        )
                     else:
-                        parse_zh_synonyms_list(item.children, data, hdrs, root_word)
+                        parse_zh_synonyms_list(
+                            item.children, data, hdrs, root_word
+                        )
 
         def contains_kind(children, nodekind):
             assert isinstance(children, list)
@@ -2345,21 +2537,21 @@ def contains_kind(children, nodekind):
 
         # Main body of parse_linkage()
         text = wxr.wtp.node_to_wikitext(linkagenode.children)
-        parsed = wxr.wtp.parse(text, expand_all=True,
-                           template_fn=linkage_template_fn1)
+        parsed = wxr.wtp.parse(
+            text, expand_all=True, template_fn=linkage_template_fn1
+        )
         if field == "synonyms" and lang_code == "zh":
             synonyms = []
             if contains_kind(parsed.children, NodeKind.LIST):
                 parse_zh_synonyms_list(parsed.children, synonyms, [], "")
             else:
                 parse_zh_synonyms(parsed.children, synonyms, [], "")
-            #print(json.dumps(synonyms, indent=4, ensure_ascii=False))
+            # print(json.dumps(synonyms, indent=4, ensure_ascii=False))
             data_extend(data, "synonyms", synonyms)
         parse_linkage_recurse(parsed.children, field, None)
         if not data.get(field) and not have_panel_template:
             text = "".join(toplevel_text).strip()
-            if ("\n" not in text and "," in text and
-                text.count(",") > 3):
+            if "\n" not in text and "," in text and text.count(",") > 3:
                 if not text.startswith("See "):
                     parse_linkage_item([text], field, None)
 
@@ -2388,8 +2580,10 @@ def parse_translation_item(contents, lang=None):
                 # print("sense <- clean_node: ", sense)
                 idx = sense.find("See also translations at")
                 if idx > 0:
-                    wxr.wtp.debug("Skipping translation see also: {}".format(sense),
-                              sortid="page/2361")
+                    wxr.wtp.debug(
+                        "Skipping translation see also: {}".format(sense),
+                        sortid="page/2361",
+                    )
                     sense = sense[:idx].strip()
                 if sense.endswith(":"):
                     sense = sense[:-1].strip()
@@ -2412,10 +2606,13 @@ def translation_item_template_fn(name, ht):
                     code = ht.get(1)
                     if code:
                         if langcode and code != langcode:
-                            wxr.wtp.debug("inconsistent language codes {} vs "
-                                      "{} in translation item: {!r} {}"
-                                      .format(langcode, code, name, ht),
-                                      sortid="page/2386")
+                            wxr.wtp.debug(
+                                "inconsistent language codes {} vs "
+                                "{} in translation item: {!r} {}".format(
+                                    langcode, code, name, ht
+                                ),
+                                sortid="page/2386",
+                            )
                         langcode = code
                     tr = ht.get(2)
                     if tr:
@@ -2431,8 +2628,9 @@ def translation_item_template_fn(name, ht):
                         langcode = code
                     return None
                 if name == "trans-see":
-                    wxr.wtp.error("UNIMPLEMENTED trans-see template",
-                              sortid="page/2405")
+                    wxr.wtp.error(
+                        "UNIMPLEMENTED trans-see template", sortid="page/2405"
+                    )
                     return ""
                 if name.endswith("-top"):
                     return ""
@@ -2440,28 +2638,41 @@ def translation_item_template_fn(name, ht):
                     return ""
                 if name.endswith("-mid"):
                     return ""
-                #wxr.wtp.debug("UNHANDLED TRANSLATION ITEM TEMPLATE: {!r}"
+                # wxr.wtp.debug("UNHANDLED TRANSLATION ITEM TEMPLATE: {!r}"
                 #             .format(name),
                 #          sortid="page/2414")
                 return None
 
-            sublists = list(x for x in contents
-                            if isinstance(x, WikiNode) and
-                            x.kind == NodeKind.LIST)
-            contents = list(x for x in contents
-                            if not isinstance(x, WikiNode) or
-                            x.kind != NodeKind.LIST)
+            sublists = list(
+                x
+                for x in contents
+                if isinstance(x, WikiNode) and x.kind == NodeKind.LIST
+            )
+            contents = list(
+                x
+                for x in contents
+                if not isinstance(x, WikiNode) or x.kind != NodeKind.LIST
+            )
 
-            item = clean_node(wxr, data, contents,
-                              template_fn=translation_item_template_fn)
+            item = clean_node(
+                wxr, data, contents, template_fn=translation_item_template_fn
+            )
             # print("    TRANSLATION ITEM: {!r}  [{}]".format(item, sense))
 
             # Parse the translation item.
             if item:
-                lang = parse_translation_item_text(wxr, word, data, item, sense,
-                                                   pos_datas, lang, langcode,
-                                                   translations_from_template,
-                                                   is_reconstruction)
+                lang = parse_translation_item_text(
+                    wxr,
+                    word,
+                    data,
+                    item,
+                    sense,
+                    pos_datas,
+                    lang,
+                    langcode,
+                    translations_from_template,
+                    is_reconstruction,
+                )
 
                 # Handle sublists.  They are frequently used for different scripts
                 # for the language and different variants of the language.  We will
@@ -2495,8 +2706,9 @@ def template_fn(name, ht):
                     sense = None
                     sub = ht.get(1, "")
                     if sub:
-                        m = re.match(r"\s*(([^:\d]*)\s*\d*)\s*:\s*([^:]*)\s*",
-                                     sub)
+                        m = re.match(
+                            r"\s*(([^:\d]*)\s*\d*)\s*:\s*([^:]*)\s*", sub
+                        )
                     else:
                         m = None
                     etym = ""
@@ -2507,51 +2719,83 @@ def template_fn(name, ht):
                         etym = m.group(2)
                         pos = m.group(3)
                     if not sub:
-                        wxr.wtp.debug("no part-of-speech in "
-                                  "{{see translation subpage|...}}, "
-                                  "defaulting to just wxr.wtp.section "
-                                  "(= language)",
-                                  sortid="page/2468")
+                        wxr.wtp.debug(
+                            "no part-of-speech in "
+                            "{{see translation subpage|...}}, "
+                            "defaulting to just wxr.wtp.section "
+                            "(= language)",
+                            sortid="page/2468",
+                        )
                         # seq sent to get_subpage_section without sub and pos
-                        seq = [language, wxr.config.OTHER_SUBTITLES["translations"]]
-                    elif (m and etym.lower().strip()
-                                in wxr.config.OTHER_SUBTITLES["etymology"]
-                            and pos.lower() in wxr.config.POS_SUBTITLES):
-                            seq = [language,
-                                   etym_numbered,
-                                   pos,
-                                   wxr.config.OTHER_SUBTITLES["translations"]]
+                        seq = [
+                            language,
+                            wxr.config.OTHER_SUBTITLES["translations"],
+                        ]
+                    elif (
+                        m
+                        and etym.lower().strip()
+                        in wxr.config.OTHER_SUBTITLES["etymology"]
+                        and pos.lower() in wxr.config.POS_SUBTITLES
+                    ):
+                        seq = [
+                            language,
+                            etym_numbered,
+                            pos,
+                            wxr.config.OTHER_SUBTITLES["translations"],
+                        ]
                     elif sub.lower() in wxr.config.POS_SUBTITLES:
                         # seq with sub but not pos
-                        seq = [language,
-                               sub,
-                               wxr.config.OTHER_SUBTITLES["translations"]]
+                        seq = [
+                            language,
+                            sub,
+                            wxr.config.OTHER_SUBTITLES["translations"],
+                        ]
                     else:
                         # seq with sub and pos
                         pos = wxr.wtp.subsection
                         if pos.lower() not in wxr.config.POS_SUBTITLES:
-                            wxr.wtp.debug("unhandled see translation subpage: "
-                                      "language={} sub={} wxr.wtp.subsection={}"
-                                      .format(language, sub, wxr.wtp.subsection),
-                                      sortid="page/2478")
-                        seq = [language,
-                               sub,
-                               pos,
-                               wxr.config.OTHER_SUBTITLES["translations"]]
+                            wxr.wtp.debug(
+                                "unhandled see translation subpage: "
+                                "language={} sub={} wxr.wtp.subsection={}".format(
+                                    language, sub, wxr.wtp.subsection
+                                ),
+                                sortid="page/2478",
+                            )
+                        seq = [
+                            language,
+                            sub,
+                            pos,
+                            wxr.config.OTHER_SUBTITLES["translations"],
+                        ]
                     subnode = get_subpage_section(
-                        wxr.wtp.title, wxr.config.OTHER_SUBTITLES["translations"], seq)
+                        wxr.wtp.title,
+                        wxr.config.OTHER_SUBTITLES["translations"],
+                        seq,
+                    )
                     if subnode is not None:
                         parse_translations(data, subnode)
                     else:
                         # Failed to find the normal subpage section
                         seq = [wxr.config.OTHER_SUBTITLES["translations"]]
                         subnode = get_subpage_section(
-                            wxr.wtp.title, wxr.config.OTHER_SUBTITLES["translations"], seq)
+                            wxr.wtp.title,
+                            wxr.config.OTHER_SUBTITLES["translations"],
+                            seq,
+                        )
                         if subnode is not None:
                             parse_translations(data, subnode)
                     return ""
-                if name in ("c", "C", "categorize", "cat", "catlangname",
-                            "topics", "top", "qualifier", "cln"):
+                if name in (
+                    "c",
+                    "C",
+                    "categorize",
+                    "cat",
+                    "catlangname",
+                    "topics",
+                    "top",
+                    "qualifier",
+                    "cln",
+                ):
                     # These are expanded in the default way
                     return None
                 if name in ("trans-top",):
@@ -2564,8 +2808,12 @@ def template_fn(name, ht):
                         sense_parts = []
                         sense = None
                     return None
-                if name in ("trans-bottom", "trans-mid",
-                            "checktrans-mid", "checktrans-bottom"):
+                if name in (
+                    "trans-bottom",
+                    "trans-mid",
+                    "checktrans-mid",
+                    "checktrans-bottom",
+                ):
                     return None
                 if name == "checktrans-top":
                     sense_parts = []
@@ -2576,11 +2824,17 @@ def template_fn(name, ht):
                     sense_parts = []
                     sense = None
                     return ""
-                wxr.wtp.error("UNIMPLEMENTED parse_translation_template: {} {}"
-                          .format(name, ht),
-                          sortid="page/2517")
+                wxr.wtp.error(
+                    "UNIMPLEMENTED parse_translation_template: {} {}".format(
+                        name, ht
+                    ),
+                    sortid="page/2517",
+                )
                 return ""
-            wxr.wtp.expand(wxr.wtp.node_to_wikitext(node), template_fn=template_fn)
+
+            wxr.wtp.expand(
+                wxr.wtp.node_to_wikitext(node), template_fn=template_fn
+            )
 
         def parse_translation_recurse(xlatnode):
             nonlocal sense
@@ -2590,9 +2844,11 @@ def parse_translation_recurse(xlatnode):
                 if isinstance(node, str):
                     if sense:
                         if not node.isspace():
-                            wxr.wtp.debug("skipping string in the middle of "
-                                      "translations: {}".format(node),
-                                      sortid="page/2530")
+                            wxr.wtp.debug(
+                                "skipping string in the middle of "
+                                "translations: {}".format(node),
+                                sortid="page/2530",
+                            )
                         continue
                     # Add a part to the sense
                     sense_parts.append(node)
@@ -2616,8 +2872,11 @@ def parse_translation_recurse(xlatnode):
                     pass
                 elif kind == NodeKind.TEMPLATE:
                     parse_translation_template(node)
-                elif kind in (NodeKind.TABLE, NodeKind.TABLE_ROW,
-                              NodeKind.TABLE_CELL):
+                elif kind in (
+                    NodeKind.TABLE,
+                    NodeKind.TABLE_ROW,
+                    NodeKind.TABLE_CELL,
+                ):
                     parse_translation_recurse(node)
                 elif kind == NodeKind.HTML:
                     if node.attrs.get("class") == "NavFrame":
@@ -2636,8 +2895,7 @@ def parse_translation_recurse(xlatnode):
                 elif kind in LEVEL_KINDS:
                     # Sub-levels will be recursed elsewhere
                     pass
-                elif kind in (NodeKind.ITALIC,
-                              NodeKind.BOLD):
+                elif kind in (NodeKind.ITALIC, NodeKind.BOLD):
                     parse_translation_recurse(node)
                 elif kind == NodeKind.PREFORMATTED:
                     print("parse_translation_recurse: PREFORMATTED:", node)
@@ -2650,29 +2908,53 @@ def parse_translation_recurse(xlatnode):
                     # handle them.  Note: must be careful not to read other
                     # links, particularly things like in "human being":
                     # "a human being -- see [[man/translations]]" (group title)
-                    if (isinstance(arg0, (list, tuple)) and
-                        arg0 and
-                        isinstance(arg0[0], str) and
-                        arg0[0].endswith("/" + wxr.config.OTHER_SUBTITLES["translations"]) and
-                        arg0[0][:-(1 + len(wxr.config.OTHER_SUBTITLES["translations"]))] == wxr.wtp.title):
-                        wxr.wtp.debug("translations subpage link found on main "
-                                  "page instead "
-                                  "of normal {{see translation subpage|...}}",
-                                  sortid="page/2595")
+                    if (
+                        isinstance(arg0, (list, tuple))
+                        and arg0
+                        and isinstance(arg0[0], str)
+                        and arg0[0].endswith(
+                            "/" + wxr.config.OTHER_SUBTITLES["translations"]
+                        )
+                        and arg0[0][
+                            : -(
+                                1
+                                + len(
+                                    wxr.config.OTHER_SUBTITLES["translations"]
+                                )
+                            )
+                        ]
+                        == wxr.wtp.title
+                    ):
+                        wxr.wtp.debug(
+                            "translations subpage link found on main "
+                            "page instead "
+                            "of normal {{see translation subpage|...}}",
+                            sortid="page/2595",
+                        )
                         sub = wxr.wtp.subsection
                         if sub.lower() in wxr.config.POS_SUBTITLES:
-                            seq = [language, sub, wxr.config.OTHER_SUBTITLES["translations"]]
+                            seq = [
+                                language,
+                                sub,
+                                wxr.config.OTHER_SUBTITLES["translations"],
+                            ]
                             subnode = get_subpage_section(
-                                wxr.wtp.title, wxr.config.OTHER_SUBTITLES["translations"], seq)
+                                wxr.wtp.title,
+                                wxr.config.OTHER_SUBTITLES["translations"],
+                                seq,
+                            )
                             if subnode is not None:
                                 parse_translations(data, subnode)
                         else:
-                            wxr.wtp.errors("/translations link outside "
-                                       "part-of-speech")
+                            wxr.wtp.errors(
+                                "/translations link outside " "part-of-speech"
+                            )
 
-                    if (len(arg0) >= 1 and
-                       isinstance(arg0[0], str) and
-                       not arg0[0].lower().startswith("category:")):
+                    if (
+                        len(arg0) >= 1
+                        and isinstance(arg0[0], str)
+                        and not arg0[0].lower().startswith("category:")
+                    ):
                         for x in node.largs[-1]:
                             if isinstance(x, str):
                                 sense_parts.append(x)
@@ -2681,9 +2963,11 @@ def parse_translation_recurse(xlatnode):
                 elif not sense:
                     sense_parts.append(node)
                 else:
-                    wxr.wtp.debug("skipping text between translation items/senses: "
-                              "{}".format(node),
-                              sortid="page/2621")
+                    wxr.wtp.debug(
+                        "skipping text between translation items/senses: "
+                        "{}".format(node),
+                        sortid="page/2621",
+                    )
 
         # Main code of parse_translation().  We want ``sense`` to be assigned
         # regardless of recursion levels, and thus the code is structured
@@ -2720,17 +3004,25 @@ def etym_post_template_fn(name, ht, expansion):
             if ignore_count == 0:
                 ht = clean_template_args(wxr, ht)
                 expansion = clean_node(wxr, None, expansion)
-                templates.append({"name": name, "args": ht, "expansion": expansion})
+                templates.append(
+                    {"name": name, "args": ht, "expansion": expansion}
+                )
             return None
 
         # Remove any subsections
-        contents = list(x for x in node.children
-                        if not isinstance(x, WikiNode) or
-                        x.kind not in LEVEL_KINDS)
+        contents = list(
+            x
+            for x in node.children
+            if not isinstance(x, WikiNode) or x.kind not in LEVEL_KINDS
+        )
         # Convert to text, also capturing templates using post_template_fn
-        text = clean_node(wxr, None, contents,
-                          template_fn=etym_template_fn,
-                          post_template_fn=etym_post_template_fn)
+        text = clean_node(
+            wxr,
+            None,
+            contents,
+            template_fn=etym_template_fn,
+            post_template_fn=etym_post_template_fn,
+        )
         # Save the collected information.
         data["etymology_text"] = text
         data["etymology_templates"] = templates
@@ -2804,20 +3096,23 @@ def desc_post_template_fn(name, ht, expansion):
                     # same proto-language, then we tag this descendant entry with
                     # "derived"
                     is_derived = (
-                        is_proto_root_derived_section and
-                        (name == "l" or name == "link") and
-                        ("1" in ht and ht["1"] == lang_code)
+                        is_proto_root_derived_section
+                        and (name == "l" or name == "link")
+                        and ("1" in ht and ht["1"] == lang_code)
                     )
                     expansion = clean_node(wxr, None, expansion)
-                    templates.append({
-                        "name": name, "args": ht, "expansion": expansion
-                    })
+                    templates.append(
+                        {"name": name, "args": ht, "expansion": expansion}
+                    )
                 return None
 
-            text = clean_node(wxr, None, children,
-                              template_fn=desc_template_fn,
-                              post_template_fn=desc_post_template_fn
-                             )
+            text = clean_node(
+                wxr,
+                None,
+                children,
+                template_fn=desc_template_fn,
+                post_template_fn=desc_post_template_fn,
+            )
             item_data["templates"] = templates
             item_data["text"] = text
             if is_derived:
@@ -2837,11 +3132,15 @@ def get_sublist_index(list_item):
 
         def get_descendants(node):
             """Appends the data for every list item in every list in node
-             to descendants."""
+            to descendants."""
             for _, c in node_children(node):
-                if (c.kind == NodeKind.TEMPLATE and c.largs
-                    and len(c.largs[0]) == 1 and isinstance(c.largs[0][0], str)
-                    and c.largs[0][0] in unignored_non_list_templates):
+                if (
+                    c.kind == NodeKind.TEMPLATE
+                    and c.largs
+                    and len(c.largs[0]) == 1
+                    and isinstance(c.largs[0][0], str)
+                    and c.largs[0][0] in unignored_non_list_templates
+                ):
                     # Some Descendants sections have no wikitext list. Rather,
                     # the list is entirely generated by a single template (see
                     # e.g. the use of {{CJKV}} in Chinese entries).
@@ -2914,40 +3213,48 @@ def skip_template_fn(name, ht):
             if node.kind not in LEVEL_KINDS:
                 # XXX handle e.g. wikipedia links at the top of a language
                 # XXX should at least capture "also" at top of page
-                if node.kind in (NodeKind.HLINE, NodeKind.LIST,
-                                 NodeKind.LIST_ITEM):
+                if node.kind in (
+                    NodeKind.HLINE,
+                    NodeKind.LIST,
+                    NodeKind.LIST_ITEM,
+                ):
                     continue
                 # print("      UNEXPECTED: {}".format(node))
                 # Clean the node to collect category links
-                clean_node(wxr, etym_data, node,
-                           template_fn=skip_template_fn)
+                clean_node(wxr, etym_data, node, template_fn=skip_template_fn)
                 continue
-            t = clean_node(wxr, etym_data,
-                           node.sarg if node.sarg else node.largs)
+            t = clean_node(
+                wxr, etym_data, node.sarg if node.sarg else node.largs
+            )
             t = t.lower()
             # XXX these counts were never implemented fully, and even this
             # gets discarded: Search STATISTICS_IMPLEMENTATION
             wxr.config.section_counts[t] += 1
             # print("PROCESS_CHILDREN: T:", repr(t))
             if t.startswith(tuple(wxr.config.OTHER_SUBTITLES["pronunciation"])):
-                if t.startswith(tuple(
+                if t.startswith(
+                    tuple(
                         pron_title + " "
-                        for pron_title in
-                        wxr.config.OTHER_SUBTITLES.get("pronunciation", []))):
+                        for pron_title in wxr.config.OTHER_SUBTITLES.get(
+                            "pronunciation", []
+                        )
+                    )
+                ):
                     # Pronunciation 1, etc, are used in Chinese Glyphs,
                     # and each of them may have senses under Definition
                     push_etym()
                     wxr.wtp.start_subsection(None)
                 if wxr.config.capture_pronunciation:
                     data = select_data()
-                    parse_pronunciation(wxr,
-                                        node,
-                                        data,
-                                        etym_data,
-                                        have_etym,
-                                        base_data,
-                                        lang_code,
-                                        )
+                    parse_pronunciation(
+                        wxr,
+                        node,
+                        data,
+                        etym_data,
+                        have_etym,
+                        base_data,
+                        lang_code,
+                    )
             elif t.startswith(tuple(wxr.config.OTHER_SUBTITLES["etymology"])):
                 push_etym()
                 wxr.wtp.start_subsection(None)
@@ -2963,11 +3270,13 @@ def skip_template_fn(name, ht):
                 data = select_data()
                 parse_descendants(data, node)
             elif (
-                t in wxr.config.OTHER_SUBTITLES.get(
+                t
+                in wxr.config.OTHER_SUBTITLES.get(
                     "proto_root_derived_sections", []
                 )
-                and pos == "root" and is_reconstruction and
-                wxr.config.capture_descendants
+                and pos == "root"
+                and is_reconstruction
+                and wxr.config.capture_descendants
             ):
                 data = select_data()
                 parse_descendants(data, node, True)
@@ -2989,17 +3298,20 @@ def skip_template_fn(name, ht):
                     pos = dt["pos"]
                     wxr.wtp.start_subsection(t)
                     if "debug" in dt:
-                        wxr.wtp.debug("{} in section {}"
-                                    .format(dt["debug"], t),
-                                    sortid="page/2755")
+                        wxr.wtp.debug(
+                            "{} in section {}".format(dt["debug"], t),
+                            sortid="page/2755",
+                        )
                     if "warning" in dt:
-                        wxr.wtp.warning("{} in section {}"
-                                    .format(dt["warning"], t),
-                                    sortid="page/2759")
+                        wxr.wtp.warning(
+                            "{} in section {}".format(dt["warning"], t),
+                            sortid="page/2759",
+                        )
                     if "error" in dt:
-                        wxr.wtp.error("{} in section {}"
-                                  .format(dt["error"], t),
-                                  sortid="page/2763")
+                        wxr.wtp.error(
+                            "{} in section {}".format(dt["error"], t),
+                            sortid="page/2763",
+                        )
                     # Parse word senses for the part-of-speech
                     parse_part_of_speech(node, pos)
                     if "tags" in dt:
@@ -3056,10 +3368,10 @@ def usex_template_fn(name, ht):
                         usex_type = "example"
                     elif name in quotation_templates:
                         usex_type = "quotation"
-                    for prefix, t in template_linkage_mappings:
-                        if re.search(r"(^|[-/\s]){}($|\b|[0-9])"
-                                     .format(prefix),
-                                     name):
+                    for prefix in template_linkages:
+                        if re.search(
+                            r"(^|[-/\s]){}($|\b|[0-9])".format(prefix), name
+                        ):
                             return ""
                     return None
 
@@ -3068,23 +3380,32 @@ def usex_template_fn(name, ht):
                 contents = item.children
                 if lang_code == "ja":
                     # print(contents)
-                    if (contents and isinstance(contents, str) and
-                       re.match(r"\s*$", contents[0])):
+                    if (
+                        contents
+                        and isinstance(contents, str)
+                        and re.match(r"\s*$", contents[0])
+                    ):
                         contents = contents[1:]
-                    exp = wxr.wtp.parse(wxr.wtp.node_to_wikitext(contents),
-                                    # post_template_fn=head_post_template_fn,
-                                    expand_all=True)
+                    exp = wxr.wtp.parse(
+                        wxr.wtp.node_to_wikitext(contents),
+                        # post_template_fn=head_post_template_fn,
+                        expand_all=True,
+                    )
                     rub, rest = extract_ruby(wxr, exp.children)
                     if rub:
                         for r in rub:
                             ruby.append(r)
                         contents = rest
-                subtext = clean_node(wxr, sense_base, contents,
-                                     template_fn=usex_template_fn)
-                subtext = re.sub(r"\s*\(please add an English "
-                                 r"translation of this "
-                                 r"(example|usage example|quote)\)",
-                                 "", subtext).strip()
+                subtext = clean_node(
+                    wxr, sense_base, contents, template_fn=usex_template_fn
+                )
+                subtext = re.sub(
+                    r"\s*\(please add an English "
+                    r"translation of this "
+                    r"(example|usage example|quote)\)",
+                    "",
+                    subtext,
+                ).strip()
                 subtext = re.sub(r"\^\([^)]*\)", "", subtext)
                 subtext = re.sub(r"\s*[―—]+$", "", subtext)
                 # print("subtext:", repr(subtext))
@@ -3093,17 +3414,21 @@ def usex_template_fn(name, ht):
                 # print(lines)
 
                 lines = list(re.sub(r"^[#:*]*", "", x).strip() for x in lines)
-                lines = list(x for x in lines
-                             if not re.match(
-                                     r"(Synonyms: |Antonyms: |Hyponyms: |"
-                                     r"Synonym: |Antonym: |Hyponym: |"
-                                     r"Hypernyms: |Derived terms: |"
-                                     r"Related terms: |"
-                                     r"Hypernym: |Derived term: |"
-                                     r"Coordinate terms:|"
-                                     r"Related term: |"
-                                     r"For more quotations using )",
-                                     x))
+                lines = list(
+                    x
+                    for x in lines
+                    if not re.match(
+                        r"(Synonyms: |Antonyms: |Hyponyms: |"
+                        r"Synonym: |Antonym: |Hyponym: |"
+                        r"Hypernyms: |Derived terms: |"
+                        r"Related terms: |"
+                        r"Hypernym: |Derived term: |"
+                        r"Coordinate terms:|"
+                        r"Related term: |"
+                        r"For more quotations using )",
+                        x,
+                    )
+                )
                 tr = ""
                 ref = ""
                 roman = ""
@@ -3112,26 +3437,28 @@ def usex_template_fn(name, ht):
                 #     print(classify_desc(line))
                 if len(lines) == 1 and lang_code != "en":
                     parts = re.split(r"\s*[―—]+\s*", lines[0])
-                    if (len(parts) == 2 and
-                        classify_desc(parts[1]) == "english"):
+                    if len(parts) == 2 and classify_desc(parts[1]) == "english":
                         lines = [parts[0].strip()]
                         tr = parts[1].strip()
-                    elif (len(parts) == 3 and
-                          classify_desc(parts[1]) in ("romanization",
-                                                      "english") and
-                          classify_desc(parts[2]) == "english"):
+                    elif (
+                        len(parts) == 3
+                        and classify_desc(parts[1])
+                        in ("romanization", "english")
+                        and classify_desc(parts[2]) == "english"
+                    ):
                         lines = [parts[0].strip()]
                         roman = parts[1].strip()
                         tr = parts[2].strip()
                     else:
                         parts = re.split(r"\s+-\s+", lines[0])
-                        if (len(parts) == 2 and
-                            classify_desc(parts[1]) == "english"):
+                        if (
+                            len(parts) == 2
+                            and classify_desc(parts[1]) == "english"
+                        ):
                             lines = [parts[0].strip()]
                             tr = parts[1].strip()
                 elif len(lines) > 1:
-                    if any(re.search(r"[]\d:)]\s*$", x)
-                           for x in lines[:-1]):
+                    if any(re.search(r"[]\d:)]\s*$", x) for x in lines[:-1]):
                         ref = []
                         for i in range(len(lines)):
                             if re.match(r"^[#*]*:+(\s*$|\s+)", lines[i]):
@@ -3140,13 +3467,17 @@ def usex_template_fn(name, ht):
                             if re.search(r"[]\d:)]\s*$", lines[i]):
                                 break
                         ref = " ".join(ref)
-                        lines = lines[i + 1:]
-                        if (lang_code != "en" and len(lines) >= 2 and
-                            classify_desc(lines[-1]) == "english"):
+                        lines = lines[i + 1 :]
+                        if (
+                            lang_code != "en"
+                            and len(lines) >= 2
+                            and classify_desc(lines[-1]) == "english"
+                        ):
                             i = len(lines) - 1
-                            while (i > 1 and
-                                   classify_desc(lines[i - 1])
-                                   == "english"):
+                            while (
+                                i > 1
+                                and classify_desc(lines[i - 1]) == "english"
+                            ):
                                 i -= 1
                             tr = "\n".join(lines[i:])
                             lines = lines[:i]
@@ -3155,8 +3486,7 @@ def usex_template_fn(name, ht):
                                 roman = lines[-1].strip()
                                 lines = lines[:-1]
 
-                    elif (lang_code == "en" and
-                          re.match(r"^[#*]*:+", lines[1])):
+                    elif lang_code == "en" and re.match(r"^[#*]*:+", lines[1]):
                         ref = lines[0]
                         lines = lines[1:]
                     elif lang_code != "en" and len(lines) == 2:
@@ -3168,9 +3498,13 @@ def usex_template_fn(name, ht):
                         elif cls1 == "english" and cls2 != "english":
                             tr = lines[0]
                             lines = [lines[1]]
-                        elif (re.match(r"^[#*]*:+", lines[1]) and
-                              classify_desc(re.sub(r"^[#*:]+\s*", "",
-                                                   lines[1])) == "english"):
+                        elif (
+                            re.match(r"^[#*]*:+", lines[1])
+                            and classify_desc(
+                                re.sub(r"^[#*:]+\s*", "", lines[1])
+                            )
+                            == "english"
+                        ):
                             tr = re.sub(r"^[#*:]+\s*", "", lines[1])
                             lines = [lines[0]]
                         elif cls1 == "english" and cls2 == "english":
@@ -3179,20 +3513,27 @@ def usex_template_fn(name, ht):
                             # non-English, as that seems more common.
                             tr = lines[1]
                             lines = [lines[0]]
-                    elif (usex_type != "quotation" and
-                          lang_code != "en" and
-                          len(lines) == 3):
+                    elif (
+                        usex_type != "quotation"
+                        and lang_code != "en"
+                        and len(lines) == 3
+                    ):
                         cls1 = classify_desc(lines[0])
                         cls2 = classify_desc(lines[1])
                         cls3 = classify_desc(lines[2])
-                        if (cls3 == "english" and
-                            cls2 in ["english", "romanization"] and
-                            cls1 != "english"):
+                        if (
+                            cls3 == "english"
+                            and cls2 in ["english", "romanization"]
+                            and cls1 != "english"
+                        ):
                             tr = lines[2].strip()
                             roman = lines[1].strip()
                             lines = [lines[0].strip()]
-                    elif (usex_type == "quotation" and
-                          lang_code != "en" and len(lines) > 2):
+                    elif (
+                        usex_type == "quotation"
+                        and lang_code != "en"
+                        and len(lines) > 2
+                    ):
                         # for x in lines:
                         #     print("  LINE: {}: {}"
                         #           .format(classify_desc(x), x))
@@ -3202,9 +3543,10 @@ def usex_template_fn(name, ht):
                         cls1 = classify_desc(lines[-1])
                         if cls1 == "english":
                             i = len(lines) - 1
-                            while (i > 1 and
-                                   classify_desc(lines[i - 1])
-                                   == "english"):
+                            while (
+                                i > 1
+                                and classify_desc(lines[i - 1]) == "english"
+                            ):
                                 i -= 1
                             tr = "\n".join(lines[i:])
                             lines = lines[:i]
@@ -3215,10 +3557,13 @@ def usex_template_fn(name, ht):
                 tr = re.sub(r"[ \t\r]+", " ", tr).strip()
                 tr = re.sub(r"\[\s*…\s*\]", "[…]", tr)
                 ref = re.sub(r"^[#*:]+\s*", "", ref)
-                ref = re.sub(r", (volume |number |page )?“?"
-                             r"\(please specify ([^)]|\(s\))*\)”?|"
-                             ", text here$",
-                             "", ref)
+                ref = re.sub(
+                    r", (volume |number |page )?“?"
+                    r"\(please specify ([^)]|\(s\))*\)”?|"
+                    ", text here$",
+                    "",
+                    ref,
+                )
                 ref = re.sub(r"\[\s*…\s*\]", "[…]", ref)
                 lines = list(re.sub(r"^[#*:]+\s*", "", x) for x in lines)
                 subtext = "\n".join(x for x in lines if x)
@@ -3226,30 +3571,41 @@ def usex_template_fn(name, ht):
                     m = re.search(r"([.!?])\s+\(([^)]+)\)\s*$", subtext)
                     if m and classify_desc(m.group(2)) == "english":
                         tr = m.group(2)
-                        subtext = subtext[:m.start()] + m.group(1)
+                        subtext = subtext[: m.start()] + m.group(1)
                     elif lines:
                         parts = re.split(r"\s*[―—]+\s*", lines[0])
-                        if (len(parts) == 2 and
-                            classify_desc(parts[1]) == "english"):
+                        if (
+                            len(parts) == 2
+                            and classify_desc(parts[1]) == "english"
+                        ):
                             subtext = parts[0].strip()
                             tr = parts[1].strip()
-                subtext = re.sub(r'^[“"`]([^“"`”\']*)[”"\']$', r"\1",
-                                 subtext)
-                subtext = re.sub(r"(please add an English translation of "
-                                 r"this (quote|usage example))",
-                                 "", subtext)
-                subtext = re.sub(r"\s*→New International Version "
-                                 "translation$",
-                                 "", subtext)  # e.g. pis/Tok Pisin (Bible)
+                subtext = re.sub(r'^[“"`]([^“"`”\']*)[”"\']$', r"\1", subtext)
+                subtext = re.sub(
+                    r"(please add an English translation of "
+                    r"this (quote|usage example))",
+                    "",
+                    subtext,
+                )
+                subtext = re.sub(
+                    r"\s*→New International Version " "translation$",
+                    "",
+                    subtext,
+                )  # e.g. pis/Tok Pisin (Bible)
                 subtext = re.sub(r"[ \t\r]+", " ", subtext).strip()
                 subtext = re.sub(r"\[\s*…\s*\]", "[…]", subtext)
                 note = None
                 m = re.match(r"^\(([^)]*)\):\s+", subtext)
-                if (m is not None and lang_code != "en" and
-                    (m.group(1).startswith("with ") or
-                     classify_desc(m.group(1)) == "english")):
+                if (
+                    m is not None
+                    and lang_code != "en"
+                    and (
+                        m.group(1).startswith("with ")
+                        or classify_desc(m.group(1)) == "english"
+                    )
+                ):
                     note = m.group(1)
-                    subtext = subtext[m.end():]
+                    subtext = subtext[m.end() :]
                 ref = re.sub(r"\s*\(→ISBN\)", "", ref)
                 ref = re.sub(r",\s*→ISBN", "", ref)
                 ref = ref.strip()
@@ -3278,7 +3634,6 @@ def usex_template_fn(name, ht):
 
         return examples
 
-
     # Main code of parse_language()
     # Process the section
     stack.append(language)
@@ -3358,9 +3713,10 @@ def top_template_fn(name, ht):
             if arg.startswith("Q") or arg.startswith("Lexeme:L"):
                 data_append(data, "wikidata", arg)
             return ""
-        wxr.wtp.debug("UNIMPLEMENTED top-level template: {} {}"
-                  .format(name, ht),
-                  sortid="page/2870")
+        wxr.wtp.debug(
+            "UNIMPLEMENTED top-level template: {} {}".format(name, ht),
+            sortid="page/2870",
+        )
         return ""
 
     clean_node(wxr, None, [node], template_fn=top_template_fn)
@@ -3373,9 +3729,9 @@ def fix_subtitle_hierarchy(wxr: WiktextractContext, text: str) -> str:
     # Known lowercase PoS names are in part_of_speech_map
     # Known lowercase linkage section names are in linkage_map
 
-    old = re.split(r"(?m)^(==+)[ \t]*([^= \t]([^=\n]|=[^=])*?)"
-                     r"[ \t]*(==+)[ \t]*$",
-                     text)
+    old = re.split(
+        r"(?m)^(==+)[ \t]*([^= \t]([^=\n]|=[^=])*?)" r"[ \t]*(==+)[ \t]*$", text
+    )
 
     parts = []
     npar = 4  # Number of parentheses in above expression
@@ -3389,22 +3745,29 @@ def fix_subtitle_hierarchy(wxr: WiktextractContext, text: str) -> str:
         level = len(left)
         part = old[i + npar]
         if level != len(right):
-            wxr.wtp.debug("subtitle has unbalanced levels: "
-                      "{!r} has {} on the left and {} on the right"
-                      .format(title, left, right),
-                      sortid="page/2904")
+            wxr.wtp.debug(
+                "subtitle has unbalanced levels: "
+                "{!r} has {} on the left and {} on the right".format(
+                    title, left, right
+                ),
+                sortid="page/2904",
+            )
         lc = title.lower()
         if name_to_code(title, "en") != "":
             if level > 2:
-                wxr.wtp.debug("subtitle has language name {} at level {}"
-                          .format(title, level),
-                          sortid="page/2911")
+                wxr.wtp.debug(
+                    "subtitle has language name {} at level {}".format(
+                        title, level
+                    ),
+                    sortid="page/2911",
+                )
             level = 2
         elif lc.startswith(tuple(wxr.config.OTHER_SUBTITLES["etymology"])):
             if level > 3:
-                wxr.wtp.debug("etymology section {} at level {}"
-                          .format(title, level),
-                          sortid="page/2917")
+                wxr.wtp.debug(
+                    "etymology section {} at level {}".format(title, level),
+                    sortid="page/2917",
+                )
             level = 3
         elif lc.startswith(tuple(wxr.config.OTHER_SUBTITLES["pronunciation"])):
             level = 3
@@ -3473,7 +3836,7 @@ def parse_page(
         text,
         pre_expand=True,
         additional_expand=ADDITIONAL_EXPAND_TEMPLATES,
-        do_not_pre_expand=DO_NOT_PRE_EXPAND_TEMPLATES
+        do_not_pre_expand=DO_NOT_PRE_EXPAND_TEMPLATES,
     )
     # from wikitextprocessor.parser import print_tree
     # print("PAGE PARSE:", print_tree(tree))
@@ -3521,7 +3884,7 @@ def parse_page(
             if "lang" not in data:
                 wxr.wtp.debug(
                     "internal error -- no lang in data: {}".format(data),
-                    sortid="page/3034"
+                    sortid="page/3034",
                 )
                 continue
             for k, v in top_data.items():
@@ -3552,16 +3915,26 @@ def parse_page(
                     if not conjs:
                         continue
                     cpos = dt.get("pos")
-                    if (pos == cpos or
-                        (pos, cpos) in (("noun", "adj"),
-                                        ("noun", "name"),
-                                        ("name", "noun"),
-                                        ("name", "adj"),
-                                        ("adj", "noun"),
-                                        ("adj", "name")) or
-                        (pos == "adj" and cpos == "verb" and
-                         any("participle" in s.get("tags", ())
-                             for s in dt.get("senses", ())))):
+                    if (
+                        pos == cpos
+                        or (pos, cpos)
+                        in (
+                            ("noun", "adj"),
+                            ("noun", "name"),
+                            ("name", "noun"),
+                            ("name", "adj"),
+                            ("adj", "noun"),
+                            ("adj", "name"),
+                        )
+                        or (
+                            pos == "adj"
+                            and cpos == "verb"
+                            and any(
+                                "participle" in s.get("tags", ())
+                                for s in dt.get("senses", ())
+                            )
+                        )
+                    ):
                         data["conjugation"] = list(conjs)  # Copy list!
                         break
         # Add topics from the last sense of a language to its other senses,
@@ -3579,13 +3952,14 @@ def parse_page(
     for x in ret:
         if x["word"] != word:
             if word.startswith("Unsupported titles/"):
-                wxr.wtp.debug(f"UNSUPPORTED TITLE: '{word}' -> '{x['word']}'",
-                                sortid="20231101/3578page.py"
-                            )
+                wxr.wtp.debug(
+                    f"UNSUPPORTED TITLE: '{word}' -> '{x['word']}'",
+                    sortid="20231101/3578page.py",
+                )
             else:
-                wxr.wtp.debug(f"DIFFERENT ORIGINAL TITLE: '{word}' "
-                              f"-> '{x['word']}'",
-                              sortid="20231101/3582page.py"
-                             )
+                wxr.wtp.debug(
+                    f"DIFFERENT ORIGINAL TITLE: '{word}' " f"-> '{x['word']}'",
+                    sortid="20231101/3582page.py",
+                )
             x["original_title"] = word
     return ret
diff --git a/src/wiktextract/extractor/ruby.py b/src/wiktextract/extractor/ruby.py
index 43e2ee38f..1a287758c 100644
--- a/src/wiktextract/extractor/ruby.py
+++ b/src/wiktextract/extractor/ruby.py
@@ -1,8 +1,12 @@
 from typing import List, Optional, Tuple, Union
 
 from wikitextprocessor import NodeKind, WikiNode
-from wikitextprocessor.parser import HTMLNode, LevelNode, TemplateNode
-
+from wikitextprocessor.parser import (
+    GeneralNode,
+    HTMLNode,
+    LevelNode,
+    TemplateNode,
+)
 from wiktextract.page import clean_node
 from wiktextract.wxr_context import WiktextractContext
 
@@ -13,8 +17,9 @@ def parse_ruby(
     """Parse a HTML 'ruby' node for a kanji part and a furigana (ruby) part,
     and return a tuple containing those. Discard the rp-element's parentheses,
     we don't do anything with them."""
-    ruby_nodes = []
-    furi_nodes = []
+    ruby_nodes: list[Union[str, WikiNode]] = []
+    furi_nodes: list[Union[str, WikiNode]] = []  # furi_nodes is technically
+    # just list[WikiNode], but this appeases the type-checker for clean_node()
     for child in node.children:
         if (
             not isinstance(child, WikiNode)
@@ -31,14 +36,14 @@ def parse_ruby(
         # element with an empty something (apparently, seeing as how this
         # works), leaving no trace of the broken ruby element in the final
         # HTML source of the page!
-        return
+        return None
     return ruby_kanji, furigana
 
 
 def extract_ruby(
     wxr: WiktextractContext,
-    contents: Union[WikiNode, List[Union[WikiNode, str]]],
-) -> Tuple[List[Tuple[str]], List[Union[WikiNode, str]]]:
+    contents: GeneralNode,
+) -> tuple[list[tuple[str, str]], list[Union[WikiNode, str]]]:
     # If contents is a list, process each element separately
     extracted = []
     new_contents = []
@@ -69,7 +74,7 @@ def extract_ruby(
     }:
         # Process args and children
         if kind != NodeKind.LINK:
-            new_node = LevelNode(new_node.loc)
+            new_node = LevelNode(kind, new_node.loc)
         new_args = []
         for arg in contents.largs:
             e1, c1 = extract_ruby(wxr, arg)
diff --git a/src/wiktextract/linkages.py b/src/wiktextract/linkages.py
index 5efbbea42..0de6f427d 100644
--- a/src/wiktextract/linkages.py
+++ b/src/wiktextract/linkages.py
@@ -8,21 +8,33 @@
 from wikitextprocessor import Wtp
 from typing import Dict, List, Union, Optional
 from .datautils import split_at_comma_semi, data_append
-from .form_descriptions import (classify_desc, parse_head_final_tags,
-                                parse_sense_qualifier,
-                                head_final_bantu_langs, head_final_bantu_re,
-                                head_final_other_langs, head_final_other_re,
-                                head_final_numeric_langs, head_final_re)
+from .form_descriptions import (
+    classify_desc,
+    parse_head_final_tags,
+    parse_sense_qualifier,
+    head_final_bantu_langs,
+    head_final_bantu_re,
+    head_final_other_langs,
+    head_final_other_re,
+    head_final_numeric_langs,
+    head_final_re,
+)
 from .tags import linkage_beginning_tags
+from .type_utils import WordData
 
 # Linkage will be ignored if it matches this regexp before splitting
 linkage_pre_split_ignore_re = re.compile(
-    r"^(" + "|".join(re.escape(x) for x in [
-        "For more variations, see ",
-        "Signal flag:",
-        "Semaphore:",
-        ]) +
-    r")")
+    r"^("
+    + "|".join(
+        re.escape(x)
+        for x in [
+            "For more variations, see ",
+            "Signal flag:",
+            "Semaphore:",
+        ]
+    )
+    + r")"
+)
 
 # Linkage will be ignored if it has one of these prefixes
 linkage_ignore_prefixes = [
@@ -63,31 +75,40 @@
 
 # Linkage will be ignored if it matches this regexp
 linkage_ignore_re = re.compile(
-    r"^(" + "|".join(re.escape(x) for x in linkage_ignore_whole) +
-    r")$|^(" + "|".join(re.escape(x) for x in linkage_ignore_prefixes) +
-    r")|(" + "|".join(re.escape(x) for x in linkage_ignore_suffixes) +
-    r")$")
+    r"^("
+    + "|".join(re.escape(x) for x in linkage_ignore_whole)
+    + r")$|^("
+    + "|".join(re.escape(x) for x in linkage_ignore_prefixes)
+    + r")|("
+    + "|".join(re.escape(x) for x in linkage_ignore_suffixes)
+    + r")$"
+)
 
 # These prefixes will be removed from linkages, leaving the rest.  This is
 # considered separately for each linkage in a list.
 linkage_remove_prefixes_re = re.compile(
-    r"^(" +
-    r"|".join(re.escape(x) for x in [
-        ":",
-        "see Thesaurus:",
-        "See Thesaurus:",
-        "see also Thesaurus:",
-        "See also Thesaurus:",
-        "see also ",
-        "See also ",
-        "see ",
-        "See ",
-        "from ",
-        "abbreviation of ",
-        "ISO 639-1 code ",
-        "ISO 639-3 code ",
-        "Thesaurus:"]) +
-    ")")
+    r"^("
+    + r"|".join(
+        re.escape(x)
+        for x in [
+            ":",
+            "see Thesaurus:",
+            "See Thesaurus:",
+            "see also Thesaurus:",
+            "See also Thesaurus:",
+            "see also ",
+            "See also ",
+            "see ",
+            "See ",
+            "from ",
+            "abbreviation of ",
+            "ISO 639-1 code ",
+            "ISO 639-3 code ",
+            "Thesaurus:",
+        ]
+    )
+    + ")"
+)
 
 # When removing prefix from linkage, this dictionary can be used to map
 # the removed prefix to a space-separated list of tags to add
@@ -101,17 +122,22 @@
     r"(\s+on (Wikispecies|Wikimedia Commons|"
     r"[A-Z]\w+ Wiktionary|[A-Z]\w+ Wikipedia)\.?|"
     r"\s*[-–] Pre-reform orthography.*)"
-    r"$")
+    r"$"
+)
 
 # Ignore linkage parenthesized sections that contain one of these strings
 linkage_paren_ignore_contains_re = re.compile(
-    r"\b(" +
-    "|".join(re.escape(x) for x in [
-        "from Etymology",
-        "used as",
-        "usage notes",
-        ]) +
-    ")([, ]|$)")
+    r"\b("
+    + "|".join(
+        re.escape(x)
+        for x in [
+            "from Etymology",
+            "used as",
+            "usage notes",
+        ]
+    )
+    + ")([, ]|$)"
+)
 
 taxonomic_ending_map = {
     "superkingdoms": "superkingdom",
@@ -133,7 +159,9 @@
     taxonomic_ending_map[v] = v  # Also add singular -> singular
 taxonomic_ending_re = re.compile(
     r"\s+[-‐‑‒–—]\s+({})$".format(
-        "|".join(re.escape(x) for x in taxonomic_ending_map)))
+        "|".join(re.escape(x) for x in taxonomic_ending_map)
+    )
+)
 
 # Exceptional splits for linkages.  This can be used to fix particular linkages
 # that are not handled correctly by the default code.  This can also be used
@@ -146,10 +174,14 @@
 
 # Truncate linkage word if it matches any of these strings
 linkage_truncate_re = re.compile(
-    "|".join(re.escape(x) for x in [
-        " and its derived terms",
-        " UTF-16 0x214C",
-        ]))
+    "|".join(
+        re.escape(x)
+        for x in [
+            " and its derived terms",
+            " UTF-16 0x214C",
+        ]
+    )
+)
 
 # Regexp for identifying special linkages containing lists of letters, digits,
 # or characters
@@ -161,39 +193,47 @@
     r" digits)(;|$)|"
     r"(^|; )(Letters using |Letters of the |"
     r"Variations of letter )|"
-    r"^(Hiragana|Katakana)$")
+    r"^(Hiragana|Katakana)$"
+)
 
 # Matches an unicode character including any combining diacritics (even if
 # separate characters)
-unicode_dc_re = re.compile(r"\w[{}]|.".format(
-    "".join(chr(x) for x in range(0, 0x110000)
-            if unicodedata.category(chr(x)) == "Mn")))
-
-
-def parse_linkage_item_text(wxr: Wtp,
-                            word: str,
-                            data: Dict[str, Union[list, str, dict]],
-                            field: str,
-                            item: str,
-                            sense: Optional[str],
-                            ruby: list,
-                            pos_datas: list,
-                            is_reconstruction: bool,
-                            urls: Optional[List[str]] = None
-                            ) -> Optional[str]:
+unicode_dc_re = re.compile(
+    r"\w[{}]|.".format(
+        "".join(
+            chr(x)
+            for x in range(0, 0x110000)
+            if unicodedata.category(chr(x)) == "Mn"
+        )
+    )
+)
+
+
+def parse_linkage_item_text(
+    wxr: WiktextractContext,
+    word: str,
+    data: WordData,
+    field: str,
+    item: str,
+    sense: Optional[str],
+    ruby: list,
+    pos_datas: list,
+    is_reconstruction: bool,
+    urls: Optional[List[str]] = None,
+) -> Optional[str]:
     """Parses a linkage item once it has been converted to a string.  This
     may add one or more linkages to ``data`` under ``field``.  This
     returns None or a string that contains thats that should be applied
     to additional linkages (commonly used in tables for Asian characters)."""
     assert isinstance(wxr, WiktextractContext)
-    assert isinstance(word, str)   # Main word (derived from page title)
+    assert isinstance(word, str)  # Main word (derived from page title)
     assert isinstance(data, dict)  # Parsed linkages are stored here under field
     assert isinstance(field, str)  # The field under which to store linkage
-    assert isinstance(item, str)   # The string to parse
+    assert isinstance(item, str)  # The string to parse
     assert sense is None or isinstance(sense, str)
-    assert isinstance(ruby, list)   # Captured ruby (hiragana/katakana) or ""
+    assert isinstance(ruby, list)  # Captured ruby (hiragana/katakana) or ""
     assert isinstance(pos_datas, list)  # List of senses (containing "glosses")
-    assert urls is None or isinstance(urls, list) # Captured urls
+    assert urls is None or isinstance(urls, list)  # Captured urls
     assert is_reconstruction in (True, False)
 
     item = item.replace("()", "")
@@ -229,7 +269,7 @@ def parse_linkage_item_text(wxr: Wtp,
 
     # Replace occurrences of ~ in the item by the page title
     safetitle = wxr.wtp.title.replace("\\", "\\\\")
-    item = item.replace(" ~ ",  " " + safetitle + " ")
+    item = item.replace(" ~ ", " " + safetitle + " ")
     item = re.sub(r"^~ ", safetitle + " ", item)
     item = re.sub(r" ~$", " " + safetitle, item)
 
@@ -239,7 +279,7 @@ def parse_linkage_item_text(wxr: Wtp,
     m = re.search(taxonomic_ending_re, item)
     if m:
         base_english = taxonomic_ending_map[m.group(1)]
-        item = item[:m.start()]
+        item = item[: m.start()]
 
     # Some Korean and Japanese words use "word (romanized): english" pattern
     # Sometimes the parenthesized part contains comma-separated alt and roman.
@@ -248,13 +288,17 @@ def parse_linkage_item_text(wxr: Wtp,
         rom = m.group(2)
         eng = m.group(3)
         rest = m.group(1)
-        if (classify_desc(rest, no_unknown_starts=True) == "other" and
-            classify_desc(eng, no_unknown_starts=True) == "english"):
+        if (
+            classify_desc(rest, no_unknown_starts=True) == "other"
+            and classify_desc(eng, no_unknown_starts=True) == "english"
+        ):
             item = rest
             base_roman = rom
             lst = base_roman.split(", ")
-            if (len(lst) == 2 and
-                classify_desc(lst[0], no_unknown_starts=True) == "other"):
+            if (
+                len(lst) == 2
+                and classify_desc(lst[0], no_unknown_starts=True) == "other"
+            ):
                 base_alt = lst[0]
                 base_roman = lst[1]
             if base_english:
@@ -265,9 +309,10 @@ def parse_linkage_item_text(wxr: Wtp,
     # Many words have tags or similar descriptions in the beginning
     # followed by a colon and one or more linkages (e.g.,
     # panetella/Finnish)
-    m = (re.match(r"^\((([^():]|\([^()]*\))+)\): ([^:]*)$", item) or
-         re.match(r"^([a-zA-Z][-'a-zA-Z0-9 ]*"
-                  r"(\([^()]+\)[-'a-zA-Z0-9 ]*)*): ([^:]*)$", item))
+    m = re.match(r"^\((([^():]|\([^()]*\))+)\): ([^:]*)$", item) or re.match(
+        r"^([a-zA-Z][-'a-zA-Z0-9 ]*" r"(\([^()]+\)[-'a-zA-Z0-9 ]*)*): ([^:]*)$",
+        item,
+    )
     if m:
         desc = m.group(1)
         rest = m.group(len(m.groups()))
@@ -326,12 +371,22 @@ def parse_linkage_item_text(wxr: Wtp,
         e1 = wxr.wtp.page_exists(desc)
         e2 = wxr.wtp.page_exists(rest)
         if cls != "tags":
-            if (cls2 == "tags" or
-                (e1 and not e1) or
-                (e1 and e2 and cls2 == "english" and
-                 cls in ("other", "romanization")) or
-                (not e1 and not e2 and cls2 == "english" and
-                 cls in ("other", "romanization"))):
+            if (
+                cls2 == "tags"
+                or (e1 and not e1)
+                or (
+                    e1
+                    and e2
+                    and cls2 == "english"
+                    and cls in ("other", "romanization")
+                )
+                or (
+                    not e1
+                    and not e2
+                    and cls2 == "english"
+                    and cls in ("other", "romanization")
+                )
+            ):
                 desc, rest = rest, desc  # Looks like swapped syntax
                 cls = cls2
         if re.search(linkage_paren_ignore_contains_re, desc):
@@ -364,48 +419,56 @@ def parse_linkage_item_text(wxr: Wtp,
                 d = pos_datas[idx]
                 gl = "; ".join(d.get("glosses", ()))
                 if not gl:
-                    wxr.wtp.debug("parenthesized numeric linkage prefix, "
-                              "but the referenced sense has no gloss: "
-                              "{}".format(desc),
-                              sortid="linkages/355")
+                    wxr.wtp.debug(
+                        "parenthesized numeric linkage prefix, "
+                        "but the referenced sense has no gloss: "
+                        "{}".format(desc),
+                        sortid="linkages/355",
+                    )
                 elif sense:
                     sense += "; " + gl
                 else:
                     sense = gl
                 item = rest
             else:
-                wxr.wtp.debug("parenthesized numeric linkage prefix, "
-                          "but there is no sense with such index: {}"
-                          .format(desc),
-                          sortid="linkages/365")
+                wxr.wtp.debug(
+                    "parenthesized numeric linkage prefix, "
+                    "but there is no sense with such index: {}".format(desc),
+                    sortid="linkages/365",
+                )
                 item = rest
         else:
-            wxr.wtp.debug("unrecognized linkage prefix: {} desc={} rest={} "
-                      "cls={} cls2={} e1={} e2={}"
-                      .format(item, desc, rest, cls, cls2, e1, e2),
-                      sortid="linkages/371")
+            wxr.wtp.debug(
+                "unrecognized linkage prefix: {} desc={} rest={} "
+                "cls={} cls2={} e1={} e2={}".format(
+                    item, desc, rest, cls, cls2, e1, e2
+                ),
+                sortid="linkages/371",
+            )
             item = rest
 
     base_sense = sense
 
     # Check for certain plural tag forms at end of items list, and apply
     # them to all items if found
-    m = re.search(r" [-‐‑‒–—―] (diminutives|Diminutives|letters|digits|"
-                  r"characters|symbols|tetragrams|letter names|names|"
-                  r"female names|male names|proper nouns|contractions|"
-                  r"nonstandard spellings|verbs|prepositions|postpositions|"
-                  r"interjections|Abbreviations|abbreviations|variants|"
-                  r"ordinals|nouns|phrases|adjectives|adverbs|"
-                  r"augmentatives|pejoratives|compound words|numerals|"
-                  r"Tally marks|surnames|modern nonstandard spellings)$",
-                  item)
+    m = re.search(
+        r" [-‐‑‒–—―] (diminutives|Diminutives|letters|digits|"
+        r"characters|symbols|tetragrams|letter names|names|"
+        r"female names|male names|proper nouns|contractions|"
+        r"nonstandard spellings|verbs|prepositions|postpositions|"
+        r"interjections|Abbreviations|abbreviations|variants|"
+        r"ordinals|nouns|phrases|adjectives|adverbs|"
+        r"augmentatives|pejoratives|compound words|numerals|"
+        r"Tally marks|surnames|modern nonstandard spellings)$",
+        item,
+    )
     if m:
         suffix = m.group(1)
         if base_qualifier:
             base_qualifier += ", " + suffix
         else:
             base_qualifier = suffix
-        item = item[:m.start()]
+        item = item[: m.start()]
 
     # Certain linkage items have space-separated valus.  These are
     # generated by, e.g., certain templates
@@ -443,17 +506,29 @@ def parse_linkage_item_text(wxr: Wtp,
             # Item1 contains " or "
             item2 = re.sub(r"\s*\([^)]*\)", "", item1)
             item2 = re.sub(r"\s+", " ", item2)
-            if ((lang not in head_final_bantu_langs or
-                 not re.search(head_final_bantu_re, item2)) and
-                (lang not in head_final_other_langs or
-                 not re.search(head_final_other_re, item2)) and
-                (not re.search(head_final_re, item2) or
-                 (item2[-1].isdigit() and
-                  lang not in head_final_numeric_langs)) and
-                not re.search(r"\bor\b", wxr.wtp.title) and
-                all(wxr.wtp.title not in x.split(" or ")
+            if (
+                (
+                    lang not in head_final_bantu_langs
+                    or not re.search(head_final_bantu_re, item2)
+                )
+                and (
+                    lang not in head_final_other_langs
+                    or not re.search(head_final_other_re, item2)
+                )
+                and (
+                    not re.search(head_final_re, item2)
+                    or (
+                        item2[-1].isdigit()
+                        and lang not in head_final_numeric_langs
+                    )
+                )
+                and not re.search(r"\bor\b", wxr.wtp.title)
+                and all(
+                    wxr.wtp.title not in x.split(" or ")
                     for x in split_at_comma_semi(item2)
-                    if " or " in x)):
+                    if " or " in x
+                )
+            ):
                 # We can split this item.  Split the non-cleaned version
                 # that still has any intervening parenthesized parts.
                 subitems.extend(split_at_comma_semi(item1, extra=[" or "]))
@@ -482,7 +557,7 @@ def parse_linkage_item_text(wxr: Wtp,
         m = re.search(r"\s*\(“([^”]+)”\)", item1)
         if m:
             t = m.group(1)
-            item1 = (item1[:m.start()] + item1[m.end():]).strip()
+            item1 = (item1[: m.start()] + item1[m.end() :]).strip()
             cls = classify_desc(t)
             if cls == "tags":
                 if qualifier:
@@ -494,20 +569,27 @@ def parse_linkage_item_text(wxr: Wtp,
 
         # Some Korean words use "word (alt, oman, “english”) pattern
         # See 滿/Korean
-        m = re.match(r'([^(),;:]+) \(([^(),;:]+), ([^(),;:]+), '
-                     r'[“”"]([^”“"]+)[“”"]\)$', item1)
-        if (m and
-            classify_desc(m.group(1), no_unknown_starts=True) == "other" and
-            classify_desc(m.group(2), no_unknown_starts=True) == "other"):
+        m = re.match(
+            r"([^(),;:]+) \(([^(),;:]+), ([^(),;:]+), "
+            r'[“”"]([^”“"]+)[“”"]\)$',
+            item1,
+        )
+        if (
+            m
+            and classify_desc(m.group(1), no_unknown_starts=True) == "other"
+            and classify_desc(m.group(2), no_unknown_starts=True) == "other"
+        ):
             alt = m.group(2)
             roman = m.group(3)
             english = m.group(4)
             item1 = m.group(1)
 
         words = item1.split(" ")
-        if (len(words) > 1 and
-            words[0] in linkage_beginning_tags and
-            words[0] != wxr.wtp.title):
+        if (
+            len(words) > 1
+            and words[0] in linkage_beginning_tags
+            and words[0] != wxr.wtp.title
+        ):
             t = linkage_beginning_tags[words[0]]
             item1 = " ".join(words[1:])
             if qualifier:
@@ -543,8 +625,9 @@ def english_repl(m):
         # sometimes both at the beginning and at the end.
         # And sometimes even in the middle, as in e.g.
         # wife/English/Translations/Yiddish
-        while (not script_chars and
-               (not sense or not re.search(script_chars_re, sense))):
+        while not script_chars and (
+            not sense or not re.search(script_chars_re, sense)
+        ):
             par = None
             nonfirst_par = False
             if par is None:
@@ -552,16 +635,17 @@ def english_repl(m):
                 m = re.match(r"\((([^()]|\([^()]*\))*)\):?\s*", item1)
                 if m:
                     par = m.group(1)
-                    item1 = item1[m.end():]
+                    item1 = item1[m.end() :]
                 else:
                     # Try to find a parenthesized part at the end or from the
                     # middle.
-                    m = re.search(r"\s+\((\d|\d\d|[^\d]([^()]|\([^()]*\))*)\)"
-                                  r"(\.$)?",
-                                  item1)
+                    m = re.search(
+                        r"\s+\((\d|\d\d|[^\d]([^()]|\([^()]*\))*)\)" r"(\.$)?",
+                        item1,
+                    )
                     if m:
                         par = m.group(1)
-                        item1 = item1[:m.start()] + item1[m.end():]
+                        item1 = item1[: m.start()] + item1[m.end() :]
                         nonfirst_par = True
             if not par:
                 break
@@ -588,7 +672,7 @@ def english_repl(m):
                         qualifier = par[:idx]
                 else:
                     break
-                par = par[idx + 1:].strip()
+                par = par[idx + 1 :].strip()
 
             # Check for certain comma-separated tags combined
             # with English text at the beginning or end of a
@@ -676,19 +760,22 @@ def english_repl(m):
                         d = pos_datas[idx]
                         gl = "; ".join(d.get("glosses", ()))
                         if not gl:
-                            wxr.wtp.debug("parenthesized number "
-                                      "but the referenced sense has no "
-                                      "gloss: {}".format(par),
-                                      sortid="linkages/665")
+                            wxr.wtp.debug(
+                                "parenthesized number "
+                                "but the referenced sense has no "
+                                "gloss: {}".format(par),
+                                sortid="linkages/665",
+                            )
                         elif sense:
                             sense += "; " + gl
                         else:
                             sense = gl
                     else:
-                        wxr.wtp.debug("parenthesized number but there is "
-                                  "no sense with such index: {}"
-                                  .format(par),
-                                  sortid="linkages/674")
+                        wxr.wtp.debug(
+                            "parenthesized number but there is "
+                            "no sense with such index: {}".format(par),
+                            sortid="linkages/674",
+                        )
                 else:
                     if alt:
                         alt += "; " + par
@@ -706,8 +793,8 @@ def english_repl(m):
             # Remove certain prefixes from linkages
             m = re.match(linkage_remove_prefixes_re, item1)
             if m:
-                prefix = item1[:m.end()]
-                item1 = item1[m.end():]
+                prefix = item1[: m.end()]
+                item1 = item1[m.end() :]
                 if prefix in linkage_remove_prefixes_tags:
                     if qualifier:
                         qualifier += ", " + linkage_remove_prefixes_tags[prefix]
@@ -720,13 +807,13 @@ def english_repl(m):
             # Remove certain suffixes from linkages
             m = re.search(linkage_remove_suffixes_re, item1)
             if m:
-                item1 = item1[:m.start()]
+                item1 = item1[: m.start()]
 
             # Parse linkages with "value = english" syntax (e.g.,
             # väittää/Finnish)
             idx = item1.find(" = ")
             if idx >= 0:
-                eng = item1[idx + 3:]
+                eng = item1[idx + 3 :]
                 if classify_desc(eng, no_unknown_starts=True) == "english":
                     english = eng
                     item1 = item1[:idx]
@@ -736,25 +823,25 @@ def english_repl(m):
                     eng = item1[:idx]
                     if classify_desc(eng, no_unknown_starts=True) == "english":
                         english = eng
-                        item1 = item1[idx + 3:]
+                        item1 = item1[idx + 3 :]
 
             # Parse linkages with "value - english" syntax (e.g.,
             # man/Faroese)
             m = re.search(r" [-‐‑‒–—―] ", item1)
             if m and "(" not in item1:
-                suffix = item1[m.end():]
+                suffix = item1[m.end() :]
                 cls = classify_desc(suffix, no_unknown_starts=True)
                 if cls == "english":
                     # This case intentionally ignores old values from english
                     # (otherwise taxonomic lists fail)
                     english = suffix
-                    item1 = item1[:m.start()]
+                    item1 = item1[: m.start()]
                 elif cls == "tags":
                     if qualifier:
                         qualifier += ", " + suffix
                     else:
                         qualifier = suffix
-                    item1 = item1[:m.start()]
+                    item1 = item1[: m.start()]
 
             # Parse certain tags at the end of the linked term (unless
             # we are in a letters list)
@@ -768,7 +855,7 @@ def english_repl(m):
         m = re.search(linkage_truncate_re, item1)
         if m:
             # suffix = item1[m.start():]  # Currently ignored
-            item1 = item1[:m.start()]
+            item1 = item1[: m.start()]
         if not item1:
             continue  # Ignore empty link targets
         if item1 == word:
@@ -794,9 +881,11 @@ def add(w, r):
             # split as this is used when we have a different number
             # of romanizations than written forms, and don't know
             # which is which.
-            if ((not w or "," not in w) and
-                (not r or "," not in r) and
-                not wxr.wtp.page_exists(w)):
+            if (
+                (not w or "," not in w)
+                and (not r or "," not in r)
+                and not wxr.wtp.page_exists(w)
+            ):
                 lst = w.split("／") if len(w) > 1 else [w]
                 if len(lst) == 1:
                     lst = w.split(" / ")
@@ -811,9 +900,15 @@ def add(w, r):
             # Heuristically remove "." at the end of most linkages
             # (some linkage lists end in a period, but we also have
             # abbreviations that end with a period that should be kept)
-            if (w.endswith(".") and not wxr.wtp.page_exists(w) and
-                (wxr.wtp.page_exists(w[:-1]) or
-                 (len(w) >= 5) and "." not in w[:-1])):
+            if (
+                w.endswith(".")
+                and not wxr.wtp.page_exists(w)
+                and (
+                    wxr.wtp.page_exists(w[:-1])
+                    or (len(w) >= 5)
+                    and "." not in w[:-1]
+                )
+            ):
                 w = w[:-1]
 
             # If we have roman but not alt and the word is ASCII,
@@ -847,8 +942,9 @@ def add(w, r):
             if alt and alt.strip() != w:
                 dt["alt"] = alt.strip()
             if urls:
-                dt["urls"] = [url.strip() for url in urls
-                              if url and isinstance(url, str)]
+                dt["urls"] = [
+                    url.strip() for url in urls if url and isinstance(url, str)
+                ]
             dt["word"] = w
             for old in data.get(field, ()):
                 if dt == old:
@@ -870,9 +966,11 @@ def add(w, r):
         # print("lang={} v={} script_chars={} item1={!r}"
         #       .format(wxr.wtp.section, v, script_chars, item1))
         if v and script_chars:
-            if (len(item1.split()) > 1 or
-                len(list(re.finditer(unicode_dc_re, item1))) == 2 or
-                (len(subitems) > 10 and v in ("Hiragana", "Katakana"))):
+            if (
+                len(item1.split()) > 1
+                or len(list(re.finditer(unicode_dc_re, item1))) == 2
+                or (len(subitems) > 10 and v in ("Hiragana", "Katakana"))
+            ):
                 if v == qualifier:
                     # if sense:
                     #     sense += "; " + qualifier
@@ -881,9 +979,12 @@ def add(w, r):
                     qualifier = None
                 if re.search(r" (letters|digits|script)$", v):
                     qualifier = v  # Also parse as qualifier
-                elif re.search(r"Variations of letter |"
-                               r"Letters using |"
-                               r"Letters of the ", v):
+                elif re.search(
+                    r"Variations of letter |"
+                    r"Letters using |"
+                    r"Letters of the ",
+                    v,
+                ):
                     qualifier = "letter"
                 parts = item1.split(". ")
                 extra = ()
@@ -892,23 +993,28 @@ def add(w, r):
                     item1 = parts[0]
                 # Handle multi-character names for chars in language's
                 # alphabet, e.g., "Ny ny" in P/Hungarian.
-                if (len(subitems) > 20 and len(item1.split()) == 2 and
-                    all(len(x) <= 3 for x in item1.split())):
-                    parts = list(m.group(0) for m in
-                                 re.finditer(r"(\w[\u0300-\u036f]?)+|.",
-                                             item1)
-                                 if not m.group(0).isspace() and
-                                 m.group(0) not in ("(", ")"))
+                if (
+                    len(subitems) > 20
+                    and len(item1.split()) == 2
+                    and all(len(x) <= 3 for x in item1.split())
+                ):
+                    parts = list(
+                        m.group(0)
+                        for m in re.finditer(r"(\w[\u0300-\u036f]?)+|.", item1)
+                        if not m.group(0).isspace()
+                        and m.group(0) not in ("(", ")")
+                    )
                 else:
-                    parts = list(m.group(0) for m in
-                                 re.finditer(r".[\u0300-\u036f]?",
-                                             item1)
-                                 if not m.group(0).isspace() and
-                                 m.group(0) not in ("(", ")"))
+                    parts = list(
+                        m.group(0)
+                        for m in re.finditer(r".[\u0300-\u036f]?", item1)
+                        if not m.group(0).isspace()
+                        and m.group(0) not in ("(", ")")
+                    )
                 for e in extra:
                     idx = e.find(":")
                     if idx >= 0:
-                        e = e[idx + 1:].strip()
+                        e = e[idx + 1 :].strip()
                         if e.endswith("."):
                             e = e[:-1]
                         parts.extend(e.split())
@@ -920,10 +1026,11 @@ def add(w, r):
 
                 rparts = None
                 if roman:
-                    rparts = list(m.group(0) for m in
-                                  re.finditer(r".[\u0300-\u036f]",
-                                              roman)
-                                  if not m.group(0).isspace())
+                    rparts = list(
+                        m.group(0)
+                        for m in re.finditer(r".[\u0300-\u036f]", roman)
+                        if not m.group(0).isspace()
+                    )
                     if len(rparts) != len(parts):
                         rparts = None
                 if not rparts:
diff --git a/src/wiktextract/page.py b/src/wiktextract/page.py
index bf3a7733a..f39e197a7 100644
--- a/src/wiktextract/page.py
+++ b/src/wiktextract/page.py
@@ -8,7 +8,18 @@
 from typing import Any, Callable, Optional, Union
 
 from mediawiki_langcodes import get_all_names, name_to_code
-from wikitextprocessor import NodeKind, WikiNode
+from wikitextprocessor import (
+    NodeKind,
+    WikiNode,
+)
+from wikitextprocessor.core import (
+    TemplateArgs,
+    TemplateFnCallable,
+    PostTemplateFnCallable,
+)
+from wikitextprocessor.parser import (
+    GeneralNode,
+)
 
 from wiktextract.wxr_context import WiktextractContext
 
@@ -56,9 +67,9 @@ def is_panel_template(wxr: WiktextractContext, template_name: str) -> bool:
 
 
 def recursively_extract(
-    contents: Union[WikiNode, list[WikiNode]],
+    contents: Union[WikiNode, str, list[Union[str, WikiNode]]],
     fn: Callable[[Union[WikiNode, list[WikiNode]]], bool],
-) -> tuple[list[WikiNode], list[WikiNode]]:
+) -> tuple[list[Union[str, WikiNode]], list[Union[str, WikiNode]]]:
     """Recursively extracts elements from contents for which ``fn`` returns
     True.  This returns two lists, the extracted elements and the remaining
     content (with the extracted elements removed at each level).  Only
@@ -311,9 +322,9 @@ def remove_duplicate_data(page_data: dict) -> None:
 def clean_node(
     wxr: WiktextractContext,
     sense_data: Optional[Any],
-    wikinode: Union[str, WikiNode, list[Union[str, WikiNode]]],
-    template_fn: Optional[Callable[[str, dict], str]] = None,
-    post_template_fn: Optional[Callable[[str, dict, str], str]] = None,
+    wikinode: GeneralNode,
+    template_fn: Optional[TemplateFnCallable] = None,
+    post_template_fn: Optional[PostTemplateFnCallable] = None,
     collect_links: bool = False,
 ) -> str:
     """
diff --git a/src/wiktextract/type_utils.py b/src/wiktextract/type_utils.py
index 389b541e9..81a26f911 100644
--- a/src/wiktextract/type_utils.py
+++ b/src/wiktextract/type_utils.py
@@ -1,14 +1,170 @@
 from typing import (
-    Union,
+    Sequence,
+    TypedDict,
 )
 
 
-WordData = dict[str, Union[
-                           str,
-                           int,
-                           list[str],
-                           list[list[str]],
-                           "WordData",
-                           list["WordData"]
-                        ]
-                ]
+class AltOf(TypedDict, total=False):
+    word: str
+    extra: str
+
+
+class LinkageData(TypedDict, total=False):
+    alt: str
+    english: str
+    extra: str
+    qualifier: str
+    roman: str
+    ruby: list[Sequence[str]]
+    sense: str
+    source: str
+    tags: list[str]
+    taxonomic: str
+    topics: list[str]
+    urls: list[str]
+    word: str
+
+
+class ExampleData(TypedDict, total=False):
+    english: str
+    note: str
+    ref: str
+    roman: str
+    ruby: list[Sequence[str]]
+    text: str
+    type: str
+
+
+class FormOf(TypedDict, total=False):
+    word: str
+    extra: str
+    roman: str
+
+
+LinkData = list[Sequence[str]]
+
+
+class TemplateData(TypedDict, total=False):
+    args: dict[str, str]
+    expansion: str
+    name: str
+
+
+class DescendantData(TypedDict, total=False):
+    depth: int
+    tags: list[str]
+    templates: TemplateData
+    text: str
+
+
+class FormData(TypedDict, total=False):
+    form: str
+    head_nr: int
+    ipa: str
+    roman: str
+    ruby: list[Sequence[str]]
+    source: str
+    tags: list[str]
+    topics: list[str]
+
+
+SoundData = TypedDict(
+    "SoundData",
+    {
+        "audio": str,
+        "audio-ipa": str,
+        "enpr": str,
+        "form": str,
+        "homophone": str,
+        "ipa": str,
+        "mp3_url": str,
+        "note": str,
+        "ogg_url": str,
+        "other": str,
+        "rhymes": str,
+        "tags": list[str],
+        "text": str,
+        "topics": list[str],
+        "zh-pron": str,
+    },
+    total=False,
+)
+
+
+class TranslationData(TypedDict, total=False):
+    alt: str
+    code: str
+    english: str
+    lang: str
+    note: str
+    roman: str
+    sense: str
+    tags: list[str]
+    taxonomic: str
+    topics: list[str]
+    word: str
+
+
+class SenseData(TypedDict, total=False):
+    alt_of: list[AltOf]
+    antonyms: list[LinkageData]
+    categories: list[str]
+    compound_of: list[AltOf]
+    coordinate_terms: list[LinkageData]
+    examples: list[ExampleData]
+    form_of: list[FormOf]
+    glosses: list[str]
+    head_nr: int
+    holonyms: list[LinkageData]
+    hypernyms: list[LinkageData]
+    hyponyms: list[LinkageData]
+    instances: list[LinkageData]
+    links: list[LinkData]
+    meronyms: list[LinkageData]
+    qualifier: str
+    raw_glosses: list[str]
+    related: list[LinkageData]
+    senseid: list[str]
+    synonyms: list[LinkageData]
+    tags: list[str]
+    topics: list[str]
+    wikidata: list[str]
+    wikipedia: list[str]
+
+
+class WordData(TypedDict, total=False):
+    abbreviations: list[LinkageData]
+    alt_of: list[AltOf]
+    antonyms: list[LinkageData]
+    categories: list[str]
+    coordinate_terms: list[LinkageData]
+    derived: list[LinkageData]
+    descendants: list[DescendantData]
+    etymology_number: int
+    etymology_templates: list[TemplateData]
+    etymology_text: str
+    form_of: list[FormOf]
+    forms: list[FormData]
+    head_templates: list[TemplateData]
+    holonyms: list[LinkageData]
+    hyphenation: list[str]
+    hypernyms: list[LinkageData]
+    hyponyms: list[LinkageData]
+    inflection_templates: list[TemplateData]
+    instances: list[LinkageData]
+    lang: str
+    lang_code: str
+    meronyms: list[LinkageData]
+    original_title: str
+    pos: str
+    proverbs: list[LinkageData]
+    redirects: list[str]
+    related: list[LinkageData]
+    senses: list[SenseData]
+    sounds: list[SoundData]
+    synonyms: list[LinkageData]
+    translations: list[TranslationData]
+    troponyms: list[LinkageData]
+    wikidata: list[str]
+    wikipedia: list[str]
+    word: str