diff --git a/pyproject.toml b/pyproject.toml
index 9b0bbe936..ef42ff744 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -82,3 +82,11 @@ select = [
     "I",  # isort
     "W",  # pycodestyle warning
 ]
+
+[tool.mypy]
+mypy_path = "typestubs"
+python_version = 3.9
+
+[[tool.mypy.overrides]]
+module = "importlib_resources.*"
+ignore_missing_imports = true
diff --git a/src/wiktextract/categories.py b/src/wiktextract/categories.py
index 52658d0cc..1cc9efe56 100644
--- a/src/wiktextract/categories.py
+++ b/src/wiktextract/categories.py
@@ -2,6 +2,13 @@
 #
 # Copyright (c) 2021 Tatu Ylonen.  See file LICENSE and https://ylonen.org
 
+from wikitextprocessor.core import NamespaceDataEntry
+from typing import (
+    Any,
+    Optional,
+    TypedDict,
+    Union,
+)
 from wiktextract.wxr_context import WiktextractContext
 from .page import clean_node
 
@@ -65,16 +72,39 @@
 return export
 """
 
-def extract_categories(wxr: WiktextractContext):
+CategoryEntry = TypedDict(
+    "CategoryEntry",
+    {
+        "name": str,
+        "desc": str,
+        "clean_desc": str,
+        "children": list[str],
+        "sort": list[str],
+    },
+    total=False,
+)
+
+CategoryReturn = TypedDict(
+    "CategoryReturn",
+    {
+        "roots": list[str],
+        "nodes": dict[str, CategoryEntry],
+    },
+    total=False,
+)
+
+def extract_categories(wxr: WiktextractContext) -> CategoryReturn:
     """Extracts the category tree from Wiktionary."""
-    module_ns = wxr.wtp.NAMESPACE_DATA.get("Module", {})
+    module_ns: Optional[NamespaceDataEntry] = wxr.wtp.NAMESPACE_DATA.get(
+                                                            "Module", None)
+    assert module_ns is not None
     module_ns_local_name = module_ns.get("name")
     module_ns_id = module_ns.get("id")
     wxr.wtp.add_page(f"{module_ns_local_name}:wiktextract cat tree",
                  module_ns_id, LUA_CODE, model="Scribunto")
     wxr.wtp.start_page("Wiktextract category tree extraction")
     rawdata = wxr.wtp.expand("{{#invoke:wiktextract cat tree|main}}")
-    ht = {}
+    ht: dict[str, CategoryEntry] = {}
     for line in rawdata.split("\n"):
         if not line:
             continue
@@ -97,7 +127,7 @@ def extract_categories(wxr: WiktextractContext):
             parent_name_lc = parent_name.lower()
             parent_sort = parts[i + 1]
             if parent_name_lc not in ht:
-                p = {"name": parent_name}
+                p: CategoryEntry  = {"name": parent_name}
                 ht[parent_name_lc] = p
             else:
                 p = ht[parent_name_lc]
@@ -109,10 +139,10 @@ def extract_categories(wxr: WiktextractContext):
                     p["sort"] = []
                 p["sort"].append(parent_sort)
 
-    seen = set()
-    is_child = set()
+    seen: set[str] = set()
+    is_child: set[str] = set()
 
-    def recurse(name):
+    def recurse(name: str) -> None:
         if name in seen:
             return
         seen.add(name)
@@ -125,8 +155,8 @@ def recurse(name):
         for child in v.get("children", ()):
             is_child.add(child.lower())
 
-    notseen = set(x.lower() for x in ht.keys()) - seen - is_child
-    notseen = list(ht[x]["name"] for x in sorted(notseen))
+    notseen_set = set(x.lower() for x in ht.keys()) - seen - is_child
+    notseen = list(ht[x]["name"] for x in sorted(notseen_set))
     #if notseen:
     #    print("NOT SEEN:", "; ".join(notseen))
 
@@ -137,7 +167,7 @@ def recurse(name):
 
     roots = ["Fundamental"]
     roots.extend(notseen)
-    ret = {"roots": roots, "nodes": ht}
+    ret: CategoryReturn = {"roots": roots, "nodes": ht}
     # import json
     # print(json.dumps(ret, sort_keys=True, indent=2))
     return ret
diff --git a/src/wiktextract/clean.py b/src/wiktextract/clean.py
index b5c37ff62..6ba82173e 100644
--- a/src/wiktextract/clean.py
+++ b/src/wiktextract/clean.py
@@ -9,14 +9,20 @@
 import re
 import html
 import unicodedata
+from typing import (
+    Callable,
+    Optional,
+    Union
+)
 from wikitextprocessor.common import MAGIC_FIRST, MAGIC_LAST
+from wikitextprocessor.core import NamespaceDataEntry
 from .wxr_context import WiktextractContext
 
 ######################################################################
 # Cleaning values into plain text.
 ######################################################################
 
-superscript_ht = {
+superscript_ht: dict[str, str] = {
     "0": "⁰",
     "1": "¹",
     "2": "²",
@@ -91,7 +97,7 @@
     "∞": "\u2002᪲"  # This is a KLUDGE
 }
 
-subscript_ht = {
+subscript_ht: dict[str, str] = {
     "0": "₀",
     "1": "₁",
     "2": "₂",
@@ -131,7 +137,7 @@
     "χ": "ᵪ",
 }
 
-def to_superscript(text):
+def to_superscript(text: str) -> str:
     "Converts text to superscript."
     if not text:
         return ""
@@ -141,7 +147,7 @@ def to_superscript(text):
         return "^" + text
     return "^({})".format(text)
 
-def to_subscript(text):
+def to_subscript(text: str) -> str:
     """Converts text to subscript."""
     if not text:
         return ""
@@ -151,14 +157,14 @@ def to_subscript(text):
         return "_" + text
     return "_({})".format(text)
 
-def to_chem(text):
+def to_chem(text: str) -> str:
     """Converts text to chemical formula, making digits subscript."""
     return "".join(to_subscript(x) if x.isdigit() else x
                    for x in text)
 
 # Mapping from Latex names to Unicode characters/strings.  This is the
 # default mapping (some cases are handled specially in the code).
-math_map = {
+math_map: dict[str, str] = {
     # XXX should probably change greek characters to non-slanted ones?
     "AC": "∿",
     "APLcomment": "⍝",
@@ -912,7 +918,7 @@ def to_chem(text):
     "mathrm": "",
 }
 
-mathcal_map = {
+mathcal_map: dict[str, str] = {
     "A": "𝒜",
     "B": "ℬ",
     "C": "𝒞",
@@ -967,7 +973,7 @@ def to_chem(text):
     "z": "𝓏",
 }
 
-mathfrak_map = {
+mathfrak_map: dict[str, str]= {
     "A": "𝔄",
     "B": "𝔅",
     "C": "ℭ",
@@ -994,7 +1000,7 @@ def to_chem(text):
     "Z": "ℨ",
 }
 
-mathbb_map = {
+mathbb_map: dict[str, str] = {
     "A": "𝔸",
     "B": "𝔹",
     "C": "ℂ",
@@ -1064,23 +1070,24 @@ def to_chem(text):
     "9": "𝟡",
 }
 
-def mathcal_fn(text):
+def mathcal_fn(text: str) -> str:
     return "".join(mathcal_map.get(x, x) for x in text)
 
-def mathfrak_fn(text):
+def mathfrak_fn(text: str) -> str:
     return "".join(mathfrak_map.get(x, x) for x in text)
 
-def mathbb_fn(text):
+def mathbb_fn(text: str) -> str:
     return "".join(mathbb_map.get(x, x) for x in text)
 
-def to_math(text):
+def to_math(text: str) -> str:
     """Converts a mathematical formula to ASCII."""
     # print("to_math: {!r}".format(text))
-    magic_vec = []
+    magic_vec: list[str] = []
 
-    def expand(text):
+    def expand(text: str) -> str:
         while True:
             orig = text
+            # formatting with {:c} converts input into character
             text = re.sub(r"[{:c}-{:c}]".format(MAGIC_FIRST, MAGIC_LAST),
                           lambda m: magic_vec[ord(m.group(0)) - MAGIC_FIRST],
                           text)
@@ -1088,14 +1095,18 @@ def expand(text):
                 break
         return text
 
-    def recurse(text):
-        def math_magic(text, left, right, fn):
-            regexp = r"{}([^{}{}]+){}".format(
+    def recurse(text: str) -> str:
+        def math_magic(text: str,
+                        left: str,
+                        right: str,
+                        fn: Callable[[str], str]
+        ) -> str:
+            regexp_str = r"{}([^{}{}]+){}".format(
                 re.escape(left), re.escape(left),
                 re.escape(right), re.escape(right))
-            regexp = re.compile(regexp)
+            regexp = re.compile(regexp_str)
 
-            def repl(m):
+            def repl(m: re.Match) -> str:
                 magic = chr(MAGIC_FIRST + len(magic_vec))
                 t = fn(m.group(1)).strip()
                 magic_vec.append(t)
@@ -1108,8 +1119,8 @@ def repl(m):
                     break
             return text
 
-        def expand_group(v):
-            fn = None
+        def expand_group(v: str) -> str:
+            fn: Optional[Callable[[str], str]] = None
             if re.match(r"\\mathcal\b", v):
                 fn = mathcal_fn
                 v = v[8:].strip()
@@ -1181,7 +1192,7 @@ def expand_group(v):
             v = expand(v)
             return v
 
-        parts = []
+        parts: list[str] = []
         while True:
             orig = text
             text = math_magic(text, "{", "}", recurse)
@@ -1223,7 +1234,7 @@ def expand_group(v):
     return text
 
 
-def bold_follows(parts, i):
+def bold_follows(parts: list[str], i: int) -> bool:
     """Checks if there is a bold (''') in parts after parts[i].  We allow
     intervening italics ('')."""
     parts = parts[i + 1:]
@@ -1235,7 +1246,7 @@ def bold_follows(parts, i):
     return False
 
 
-def remove_italic_and_bold(text):
+def remove_italic_and_bold(text: str) -> str:
     """Based on token_iter in wikitextprocessor"""
     assert isinstance(text, str)
     lines = re.split(r"(\n+)", text)  # Lines and separators
@@ -1300,7 +1311,11 @@ def remove_italic_and_bold(text):
     new_text_parts = new_text_parts[:-1] # remove last \n
     return "".join(new_text_parts)
 
-def clean_value(wxr, title, no_strip=False, no_html_strip=False):
+def clean_value(wxr: WiktextractContext,
+                title: str,
+                no_strip=False,
+                no_html_strip=False
+) -> str:
     """Cleans a title or value into a normal string.  This should basically
     remove any Wikimedia formatting from it: HTML tags, templates, links,
     emphasis, etc.  This will also merge multiple whitespaces into one
@@ -1308,9 +1323,10 @@ def clean_value(wxr, title, no_strip=False, no_html_strip=False):
     assert isinstance(wxr, WiktextractContext)
     assert isinstance(title, str)
 
-    def repl_1(m):
+    def repl_1(m: re.Match) -> str:
         return clean_value(wxr, m.group(1), no_strip=True)
-    def repl_exturl(m):
+
+    def repl_exturl(m: re.Match) -> str:
         args = re.split(r"\s+", m.group(1))
         i = 0
         while i < len(args) - 1:
@@ -1318,33 +1334,33 @@ def repl_exturl(m):
                 break
             i += 1
         return " ".join(args[i:])
-    def repl_link(m):
+    def repl_link(m: re.Match) -> str:
         if m.group(2) and m.group(2).lower() in ("file", "image"):
             return ""
         v = m.group(3).split("|")
         return clean_value(wxr, v[0], no_strip=True)
-    def repl_link_bars(m):
+    def repl_link_bars(m: re.Match) -> str:
         lnk = m.group(1)
         if re.match(r"(?si)(File|Image)\s*:", lnk):
             return ""
         return clean_value(wxr, m.group(4) or m.group(2) or "",
                            no_strip=True)
 
-    def repl_1_sup(m):
+    def repl_1_sup(m: re.Match) -> str:
         return to_superscript(clean_value(wxr, m.group(1)))
 
-    def repl_1_sub(m):
+    def repl_1_sub(m: re.Match) -> str:
         return to_subscript(clean_value(wxr, m.group(1)))
 
-    def repl_1_chem(m):
+    def repl_1_chem(m: re.Match) -> str:
         return to_chem(clean_value(wxr, m.group(1)))
 
-    def repl_1_math(m):
+    def repl_1_math(m: re.Match) -> str:
         v = to_math(m.group(1))
         # print("to_math:", ascii(v))
         return v
 
-    def repl_1_syntaxhighlight(m):
+    def repl_1_syntaxhighlight(m: re.Match) -> str:
         # Content is preformatted
         return "\n" + m.group(1).strip() + "\n"
 
@@ -1423,9 +1439,12 @@ def repl_1_syntaxhighlight(m):
     title = re.sub(r"\[//[^]\s]+\s+edit\s*\]", "", title)
     # Replace links by their text
 
-    category_ns_data = wxr.wtp.NAMESPACE_DATA.get("Category", {})
-    category_ns_names = {category_ns_data.get("name")} | set(
-        category_ns_data.get("aliases")
+    category_ns_data: Optional[NamespaceDataEntry]
+    # XXX "Category" -> config variable for portability
+    category_ns_data = wxr.wtp.NAMESPACE_DATA.get("Category", None)
+    assert category_ns_data is not None
+    category_ns_names = {category_ns_data["name"]} | set(
+        category_ns_data["aliases"]
     )
     category_names_pattern = rf"(?:{'|'.join(category_ns_names)})"
     while True:
@@ -1489,7 +1508,10 @@ def repl_1_syntaxhighlight(m):
     return title
 
 
-def clean_template_args(wxr, ht, no_strip=False):
+def clean_template_args(wxr: WiktextractContext,
+                        ht: dict[Union[int, str], str], # XXX -> "TemplateArgs"
+                        no_strip=False
+) -> dict[str, str]:
     """Cleans all values in a template argument dictionary and returns the
     cleaned dictionary."""
     assert isinstance(wxr, WiktextractContext)
diff --git a/src/wiktextract/config.py b/src/wiktextract/config.py
index 801df8bf6..8652ac78e 100644
--- a/src/wiktextract/config.py
+++ b/src/wiktextract/config.py
@@ -6,15 +6,33 @@
 import collections
 import json
 import sys
-from typing import Callable, Optional
+from typing import (
+    Callable,
+    Iterable,
+    Optional,
+    TypedDict,
+    Union,
+)
 
-from wikitextprocessor.core import CollatedErrorReturnData
+from wikitextprocessor.core import ErrorMessageData, CollatedErrorReturnData
 
 if sys.version_info < (3, 10):
     from importlib_resources import files
 else:
     from importlib.resources import files
 
+SoundFileRedirects = dict[str, str]
+
+POSSubtitleData = TypedDict(
+    "POSSubtitleData",
+    {
+        "pos": str,
+        "debug": str,
+        "tags": list[str],
+    },
+    total=False,
+)
+
 
 class WiktionaryConfig:
     """This class holds configuration data for Wiktionary parsing."""
@@ -54,19 +72,19 @@ class WiktionaryConfig:
 
     def __init__(
         self,
-        dump_file_lang_code="en",
-        capture_language_codes={"en", "mul"},
-        capture_translations=True,
-        capture_pronunciation=True,
-        capture_linkages=True,
-        capture_compounds=True,
-        capture_redirects=True,
-        capture_examples=True,
-        capture_etymologies=True,
-        capture_inflections=True,
-        capture_descendants=True,
-        verbose=False,
-        expand_tables=False,
+        dump_file_lang_code: str = "en",
+        capture_language_codes: Optional[Iterable[str]] = {"en", "mul"},
+        capture_translations = True,
+        capture_pronunciation = True,
+        capture_linkages = True,
+        capture_compounds = True,
+        capture_redirects = True,
+        capture_examples = True,
+        capture_etymologies = True,
+        capture_inflections = True,
+        capture_descendants = True,
+        verbose = False,
+        expand_tables = False,
     ):
         if capture_language_codes is not None:
             assert isinstance(capture_language_codes, (list, tuple, set))
@@ -101,13 +119,19 @@ def __init__(
         self.section_counts: dict[str, int] = collections.defaultdict(int)
         # Some fields related to errors
         # The word currently being processed.
-        self.word = None
-        self.errors = []
-        self.warnings = []
-        self.debugs = []
-        self.redirects = {}
+        self.word: Optional[str] = None
+        self.errors: list[ErrorMessageData] = []
+        self.warnings: list[ErrorMessageData] = []
+        self.debugs: list[ErrorMessageData] = []
+        self.redirects: SoundFileRedirects = {}
         self.data_folder = files("wiktextract") / "data" / dump_file_lang_code
+        self.POS_SUBTITLES: Optional[dict[str, POSSubtitleData]] = None
+        self.POS_TYPES: Optional[set[str]] = None
+        self.LINKAGE_SUBTITLES: Optional[dict[str, str]] = None
+        self.OTHER_SUBTITLES: Optional[dict[str, Union[str, list[str]]]] = None
+        # set the above three in the function below
         self.init_subtitles()
+        self.ZH_PRON_TAGS: Optional[list[str]] = None
         self.set_attr_from_json("ZH_PRON_TAGS", "zh_pron_tags.json")
         self.analyze_templates = True  # find templates that need pre-expand
         self.extract_thesaurus_pages = True
@@ -149,13 +173,14 @@ def set_attr_from_json(
     def init_subtitles(self) -> None:
         self.set_attr_from_json("LINKAGE_SUBTITLES", "linkage_subtitles.json")
         self.set_attr_from_json("POS_SUBTITLES", "pos_subtitles.json")
-        self.POS_TYPES = set(x["pos"] for x in self.POS_SUBTITLES.values())
-        for k, v in self.POS_SUBTITLES.items():
-            if "tags" in v:
-                assert isinstance(v["tags"], (list, tuple))
+        if self.POS_SUBTITLES is not None:
+            self.POS_TYPES = set(x["pos"] for x in self.POS_SUBTITLES.values())
+            for k, v in self.POS_SUBTITLES.items():
+                if "tags" in v:
+                    assert isinstance(v["tags"], (list, tuple))
         self.set_attr_from_json("OTHER_SUBTITLES", "other_subtitles.json")
 
-    def load_edition_settings(self):
+    def load_edition_settings(self) -> None:
         file_path = self.data_folder / "config.json"
         if file_path.exists():
             with file_path.open(encoding="utf-8") as f: