diff --git a/json_schema/fr.json b/json_schema/fr.json
deleted file mode 100644
index 80567ca9b..000000000
--- a/json_schema/fr.json
+++ /dev/null
@@ -1,338 +0,0 @@
-{
-  "$schema": "https://json-schema.org/draft/2020-12/schema",
-  "$id": "https://kaikki.org/fr.json",
-  "title": "French Wiktionary",
-  "description": "JSON schema of the French Wiktionary extractor",
-  "type": "object",
-  "properties": {
-    "lang_name": {
-      "description": "Localized language name of the word",
-      "type": "string"
-    },
-    "lang_code": {
-      "description": "Wiktionary language code",
-      "type": "string"
-    },
-    "word": {
-      "description": "word string",
-      "type": "string"
-    },
-    "pos": {
-      "description": "Part of speech type",
-      "type": "string"
-    },
-    "pos_title": {
-      "description": "Original POS title for matching etymology texts",
-      "type": "string"
-    },
-    "etymology_texts": {
-      "description": "Etymology list",
-      "type": "array",
-      "items": {
-        "type": "string"
-      }
-    },
-    "senses": {
-      "description": "Sense list",
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/sense"
-      }
-    },
-    "forms": {
-      "description": "Inflection forms list",
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/form"
-      }
-    },
-    "sounds": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/sound"
-      }
-    },
-    "translations": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/translation"
-      }
-    },
-    "synonyms": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "hyponyms": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "hypernyms": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "holonyms": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "meronyms": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "derived": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "troponyms": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "paronyms": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "related": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "abbreviation": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "proverbs": {
-      "type": "array",
-      "items": {
-        "$ref": "#/$defs/linkage"
-      }
-    },
-    "title": {
-      "description": "Redirect page source title",
-      "type": "string"
-    },
-    "redirect": {
-      "description": "Redirect page target title",
-      "type": "string"
-    },
-    "categories": {
-      "type": "array",
-      "items": {
-        "type": "string"
-      }
-    },
-    "notes": {
-      "type": "array",
-      "items": {
-        "type": "string"
-      }
-    }
-  },
-  "$defs": {
-    "sense": {
-      "type": "object",
-      "properties": {
-        "glosses": {
-          "type": "array",
-          "items": {
-            "type": "string"
-          }
-        },
-        "tags": {
-          "type": "array",
-          "items": {
-            "type": "string"
-          }
-        },
-        "categories": {
-          "type": "array",
-          "items": {
-            "type": "string"
-          }
-        },
-        "examples": {
-          "type": "array",
-          "items": {
-            "$ref": "#/$defs/example"
-          }
-        }
-      }
-    },
-    "example": {
-      "type": "object",
-      "properties": {
-        "text": {
-          "description": "Example usage sentence",
-          "type": "string"
-        },
-        "translation": {
-          "description": "French translation of the example sentence",
-          "type": "string"
-        },
-        "roman": {
-          "description": "Romanization of the example sentence",
-          "type": "string"
-        },
-        "ref": {
-          "description": "Source of the sentence, like book title and page number",
-          "type": "string"
-        },
-        "type": {
-          "description": "This value is 'quotation' if 'source' exists",
-          "type": "string",
-          "enum": [
-            "example",
-            "quotation"
-          ]
-        }
-      }
-    },
-    "form": {
-      "type": "object",
-      "properties": {
-        "form": {
-          "type": "string"
-        },
-        "tags": {
-          "type": "array",
-          "items": {
-            "type": "string"
-          }
-        },
-        "ipas": {
-          "description": "has more than one ipa",
-          "type": "array",
-          "items": {
-            "type": "string"
-          }
-        },
-        "ipa": {
-          "description": "only has one ipa",
-          "type": "string"
-        },
-        "source": {
-          "description": "form line template name",
-          "type": "string"
-        }
-      }
-    },
-    "sound": {
-      "type": "object",
-      "properties": {
-        "zh-pron": {
-          "description": "Chinese word pronunciation",
-          "type": "string"
-        },
-        "ipa": {
-          "description": "International Phonetic Alphabet",
-          "type": "string"
-        },
-        "audio": {
-          "description": "Audio file name",
-          "type": "string"
-        },
-        "wav_url": {
-          "type": "string"
-        },
-        "ogg_url": {
-          "type": "string"
-        },
-        "mp3_url": {
-          "type": "string"
-        }
-      }
-    },
-    "translation": {
-      "type": "object",
-      "properties": {
-        "lang_code": {
-          "description": "Wiktionary language code of the translation term",
-          "type": "string"
-        },
-        "lang_name": {
-          "description": "Translation language name",
-          "type": "string"
-        },
-        "word": {
-          "description": "Translation term",
-          "type": "string"
-        },
-        "sense": {
-          "description": "Translation gloss",
-          "type": "string"
-        },
-        "tags": {
-          "type": "array",
-          "items": {
-            "type": "string"
-          }
-        },
-        "roman": {
-          "type": "string"
-        },
-        "traditional_writing": {
-          "description": "Alternative writting for Chinese, Korean and Mongolian",
-          "type": "string"
-        }
-      }
-    },
-    "linkage": {
-      "type": "object",
-      "properties": {
-        "word": {
-          "type": "string"
-        },
-        "tags": {
-          "type": "array",
-          "items": {
-            "type": "string"
-          }
-        },
-        "roman": {
-          "type": "string"
-        },
-        "alt": {
-          "description": "ALternative form",
-          "type": "string"
-        },
-        "translation": {
-          "description": "French translation",
-          "type": "string"
-        },
-        "sense": {
-          "description": "Definition of the word",
-          "type": "string"
-        },
-        "sense_index": {
-          "description": "Number of the definition, start from 1",
-          "type": "integer"
-        },
-        "lang_name": {
-          "description": "Localized language name of the word, for the 'Dérivés dans d’autres langues' section",
-          "type": "string"
-        },
-        "lang_code": {
-          "description": "Wiktionary language code, for the 'Dérivés dans d’autres langues' section",
-          "type": "string"
-        }
-      }
-    }
-  }
-}
diff --git a/src/wiktextract/extractor/fr/etymology.py b/src/wiktextract/extractor/fr/etymology.py
index 09b979ecb..26ca0afd8 100644
--- a/src/wiktextract/extractor/fr/etymology.py
+++ b/src/wiktextract/extractor/fr/etymology.py
@@ -1,17 +1,19 @@
 from collections import defaultdict
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Optional, Union
 
 from wikitextprocessor import NodeKind, WikiNode
 from wikitextprocessor.parser import TemplateNode
 from wiktextract.page import LEVEL_KINDS, clean_node
 from wiktextract.wxr_context import WiktextractContext
 
-EtymologyData = Dict[str, List[str]]
+from .models import WordEntry
+
+EtymologyData = dict[str, list[str]]
 
 
 def extract_etymology(
     wxr: WiktextractContext,
-    nodes: List[Union[WikiNode, str]],
+    nodes: list[Union[WikiNode, str]],
 ) -> Optional[EtymologyData]:
     etymology_dict: EtymologyData = defaultdict(list)
     level_node_index = len(nodes)
@@ -62,7 +64,7 @@ def extract_etymology(
 
 def find_pos_in_etymology_list(
     wxr: WiktextractContext, list_item_node: WikiNode
-) -> Optional[Tuple[str, str]]:
+) -> Optional[tuple[str, str]]:
     """
     Return tuple of POS title and etymology text if the passed lis item node
     starts with italic POS node or POS template, otherwise return None.
@@ -96,26 +98,27 @@ def find_pos_in_etymology_list(
 
 
 def insert_etymology_data(
-    lang_code: str, page_data: List[Dict], etymology_data: EtymologyData
+    lang_code: str, page_data: list[WordEntry], etymology_data: EtymologyData
 ) -> None:
     """
     Insert list of etymology data extracted from the level 3 node to each sense
     dictionary matches the language and POS.
     """
-    sense_dict = {}  # group by pos title
+    sense_dict = defaultdict(list)  # group by pos title
     for sense_data in page_data:
-        if sense_data.get("lang_code") == lang_code:
-            sense_dict[sense_data.get("pos_title")] = sense_data
+        if sense_data.lang_code == lang_code:
+            sense_dict[sense_data.pos_title].append(sense_data)
 
     for pos_title, etymology_texts in etymology_data.items():
         if pos_title is None:  # add to all sense dictionaries
-            for sense_data in sense_dict.values():
-                sense_data["etymology_texts"] = etymology_texts
+            for sense_data_list in sense_dict.values():
+                for sense_data in sense_data_list:
+                    sense_data.etymology_texts = etymology_texts
         elif pos_title in sense_dict:
-            sense_dict[pos_title]["etymology_texts"] = etymology_texts
+            for sense_data in sense_dict[pos_title]:
+                sense_data.etymology_texts = etymology_texts
         elif pos_title.removesuffix(" 1") in sense_dict:
             # an index number is added in the etymology section but not added in
             # POS title
-            sense_dict[pos_title.removesuffix(" 1")][
-                "etymology_texts"
-            ] = etymology_texts
+            for sense_data in sense_dict[pos_title.removesuffix(" 1")]:
+                sense_data.etymology_texts = etymology_texts
diff --git a/src/wiktextract/extractor/fr/form_line.py b/src/wiktextract/extractor/fr/form_line.py
index 097d4039e..e315e0007 100644
--- a/src/wiktextract/extractor/fr/form_line.py
+++ b/src/wiktextract/extractor/fr/form_line.py
@@ -1,18 +1,18 @@
-from collections import defaultdict
-from typing import Dict, List, Union
+from typing import Union
 
 from wikitextprocessor import NodeKind, WikiNode
 from wikitextprocessor.parser import HTMLNode, TemplateNode
 from wiktextract.page import clean_node
 from wiktextract.wxr_context import WiktextractContext
 
+from .models import Form, Sound, WordEntry
 from .pronunciation import PRON_TEMPLATES, process_pron_template
 
 
 def extract_form_line(
     wxr: WiktextractContext,
-    page_data: List[Dict],
-    nodes: List[Union[WikiNode, str]],
+    page_data: list[WordEntry],
+    nodes: list[Union[WikiNode, str]],
 ) -> None:
     """
     Ligne de forme
@@ -27,9 +27,7 @@ def extract_form_line(
             if node.template_name in PRON_TEMPLATES:
                 ipa_text = process_pron_template(wxr, node)
                 if len(ipa_text) > 0:
-                    page_data[-1]["sounds"].append(
-                        defaultdict(list, {"ipa": ipa_text})
-                    )
+                    page_data[-1].sounds.append(Sound(ipa=ipa_text))
             elif node.template_name == "équiv-pour":
                 process_equiv_pour_template(wxr, node, page_data)
             elif node.template_name.startswith("zh-mot"):
@@ -42,18 +40,18 @@ def extract_form_line(
                     tag.startswith("(")
                     and tag.endswith(")")
                     and pre_template_name in PRON_TEMPLATES
-                    and len(page_data[-1].get("sounds", [])) > 0
+                    and len(page_data[-1].sounds) > 0
                 ):
                     # it's the location of the previous IPA template
-                    page_data[-1]["sounds"][-1]["tags"].append(tag.strip("()"))
+                    page_data[-1].sounds[-1].tags.append(tag.strip("()"))
                 elif len(tag.strip("()")) > 0:
-                    page_data[-1]["tags"].append(tag.strip("()"))
+                    page_data[-1].tags.append(tag.strip("()"))
 
             pre_template_name = node.template_name
 
 
 def process_equiv_pour_template(
-    wxr: WiktextractContext, node: TemplateNode, page_data: List[Dict]
+    wxr: WiktextractContext, node: TemplateNode, page_data: list[WordEntry]
 ) -> None:
     # equivalent form: https://fr.wiktionary.org/wiki/Modèle:équiv-pour
     expanded_node = wxr.wtp.parse(
@@ -64,20 +62,20 @@ def process_equiv_pour_template(
         if child.kind == NodeKind.ITALIC:
             form_tag = clean_node(wxr, None, child).strip("() ")
         elif isinstance(child, HTMLNode) and child.tag == "bdi":
-            form_data = {
-                "form": clean_node(wxr, None, child),
-                "source": "form line template 'équiv-pour'",
-            }
+            form_data = Form(
+                form=clean_node(wxr, None, child),
+                source="form line template 'équiv-pour'",
+            )
             if len(form_tag) > 0:
-                form_data["tags"] = [form_tag]
-            if len(form_data["form"]) > 0:
-                page_data[-1]["forms"].append(form_data)
+                form_data.tags = [form_tag]
+            if len(form_data.form) > 0:
+                page_data[-1].forms.append(form_data)
 
 
 def process_zh_mot_template(
     wxr: WiktextractContext,
     node: TemplateNode,
-    page_data: List[Dict],
+    page_data: list[WordEntry],
 ) -> None:
     # Chinese form line template: zh-mot, zh-mot-s, zh-mot-t
     # https://fr.wiktionary.org/wiki/Modèle:zh-mot
@@ -88,30 +86,29 @@ def process_zh_mot_template(
     )
     for template_node in node.find_child(NodeKind.TEMPLATE):
         if template_node.template_name.lower() == "lang":
-            page_data[-1]["sounds"].append(
-                {
-                    "zh-pron": clean_node(wxr, None, template_node),
-                    "tags": ["Pinyin"],
-                }
+            page_data[-1].sounds.append(
+                Sound(
+                    zh_pron=clean_node(wxr, None, template_node),
+                    tags=["Pinyin"],
+                )
             )
         elif template_node.template_name in ("pron", "prononciation"):
-            page_data[-1]["sounds"].append(
-                {"ipa": clean_node(wxr, None, template_node)}
+            page_data[-1].sounds.append(
+                Sound(ipa=clean_node(wxr, None, template_node))
             )
 
 
 def process_ja_mot_template(
     wxr: WiktextractContext,
     template_node: TemplateNode,
-    page_data: List[Dict],
+    page_data: list[WordEntry],
 ) -> None:
     # Japanese form line template: https://fr.wiktionary.org/wiki/Modèle:ja-mot
     expanded_node = wxr.wtp.parse(
         wxr.wtp.node_to_wikitext(template_node), expand_all=True
     )
     existing_forms = {
-        existing_form.get("form")
-        for existing_form in page_data[-1].get("forms", [])
+        existing_form.form for existing_form in page_data[-1].forms
     }
     for index, node in expanded_node.find_html("span", with_index=True):
         # the first span tag is the word, the second is Hepburn romanization
@@ -120,7 +117,7 @@ def process_ja_mot_template(
             if form_text not in existing_forms:
                 # avoid adding duplicated form data extracted from
                 # inflection table before the form line
-                page_data[-1]["forms"].append(
-                    {"form": form_text, "tags": ["romanization"]}
+                page_data[-1].forms.append(
+                    Form(form=form_text, tags=["romanization"])
                 )
             break
diff --git a/src/wiktextract/extractor/fr/gloss.py b/src/wiktextract/extractor/fr/gloss.py
index c63a1abbd..8b69d5b97 100644
--- a/src/wiktextract/extractor/fr/gloss.py
+++ b/src/wiktextract/extractor/fr/gloss.py
@@ -1,17 +1,18 @@
 from collections import defaultdict
-from typing import Dict, List
 
 from wikitextprocessor import NodeKind, WikiNode
 from wikitextprocessor.parser import TemplateNode
 from wiktextract.page import clean_node
 from wiktextract.wxr_context import WiktextractContext
 
+from .models import Example, Sense, WordEntry
+
 
 def extract_gloss(
     wxr: WiktextractContext,
-    page_data: List[Dict],
+    page_data: list[WordEntry],
     list_node: WikiNode,
-    parent_glosses: List[str] = [],
+    parent_glosses: list[str] = [],
 ) -> None:
     for list_item_node in list_node.find_child(NodeKind.LIST_ITEM):
         gloss_nodes = list(
@@ -19,7 +20,7 @@ def extract_gloss(
                 NodeKind.LIST, include_empty_str=True
             )
         )
-        gloss_data = defaultdict(list)
+        gloss_data = Sense()
         gloss_start = 0
         # process modifier, theme tempaltes before gloss text
         # https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_de_tous_les_modèles/Précisions_de_sens
@@ -33,9 +34,9 @@ def extract_gloss(
                     gloss_start = index + 1
                     tag = expanded_text.strip("() \n")
                     if len(tag) > 0:
-                        gloss_data["tags"].append(tag)
+                        gloss_data.tags.append(tag)
                     if "categories" in categories_data:
-                        gloss_data["categories"].extend(
+                        gloss_data.categories.extend(
                             categories_data["categories"]
                         )
 
@@ -54,7 +55,7 @@ def extract_gloss(
                 and isinstance(gloss_nodes[index + 1], str)
                 and gloss_nodes[index + 1].strip() == ")"
             ):
-                gloss_data["tags"].append(clean_node(wxr, None, node))
+                gloss_data.tags.append(clean_node(wxr, None, node))
                 tag_indexes |= {index - 1, index, index + 1}
                 continue
 
@@ -64,12 +65,12 @@ def extract_gloss(
             if index not in tag_indexes
         ]
         gloss_text = clean_node(wxr, gloss_data, gloss_only_nodes)
-        gloss_data["glosses"] = parent_glosses + [gloss_text]
-        page_data[-1]["senses"].append(gloss_data)
+        gloss_data.glosses = parent_glosses + [gloss_text]
+        page_data[-1].senses.append(gloss_data)
         for nest_gloss_list in list_item_node.find_child(NodeKind.LIST):
             if nest_gloss_list.sarg.endswith("#"):
                 extract_gloss(
-                    wxr, page_data, nest_gloss_list, gloss_data["glosses"]
+                    wxr, page_data, nest_gloss_list, gloss_data.glosses
                 )
             elif nest_gloss_list.sarg.endswith("*"):
                 extract_examples(wxr, gloss_data, nest_gloss_list)
@@ -77,7 +78,7 @@ def extract_gloss(
 
 def extract_examples(
     wxr: WiktextractContext,
-    gloss_data: Dict,
+    gloss_data: Sense,
     example_list_node: WikiNode,
 ) -> None:
     for example_node in example_list_node.find_child(NodeKind.LIST_ITEM):
@@ -102,18 +103,17 @@ def extract_examples(
                 for node in example_node_children
                 if node != source_template
             ]
-            example_data = {"type": "example"}
-            example_data["text"] = clean_node(wxr, None, example_nodes)
+            example_data = Example()
+            example_data.text = clean_node(wxr, None, example_nodes)
             if source_template is not None:
-                example_data["ref"] = clean_node(
-                    wxr, None, source_template
-                ).strip("— ()")
-                example_data["type"] = "quotation"
-            gloss_data["examples"].append(example_data)
+                example_data.ref = clean_node(wxr, None, source_template).strip(
+                    "— ()"
+                )
+            gloss_data.examples.append(example_data)
 
 
 def process_exemple_template(
-    wxr: WiktextractContext, node: TemplateNode, gloss_data: Dict
+    wxr: WiktextractContext, node: TemplateNode, gloss_data: Sense
 ) -> None:
     # https://fr.wiktionary.org/wiki/Modèle:exemple
     # https://fr.wiktionary.org/wiki/Modèle:ja-exemple
@@ -132,15 +132,11 @@ def process_exemple_template(
         node.template_parameters.get(3, node.template_parameters.get("tr", "")),
     )
     source = clean_node(wxr, None, node.template_parameters.get("source", ""))
-    example_data = {"type": "example"}
-    if len(text) > 0:
-        example_data["text"] = clean_node(wxr, None, text)
-    if len(translation) > 0:
-        example_data["translation"] = clean_node(wxr, None, translation)
-    if len(transcription) > 0:
-        example_data["roman"] = clean_node(wxr, None, transcription)
-    if len(source) > 0:
-        example_data["ref"] = clean_node(wxr, None, source)
-        example_data["type"] = "quotation"
-    if "text" in example_data:
-        gloss_data["examples"].append(example_data)
+    example_data = Example(
+        text=clean_node(wxr, None, text),
+        translation=clean_node(wxr, None, translation),
+        roman=clean_node(wxr, None, transcription),
+        ref=clean_node(wxr, None, source),
+    )
+    if len(example_data.text) > 0:
+        gloss_data.examples.append(example_data)
diff --git a/src/wiktextract/extractor/fr/inflection.py b/src/wiktextract/extractor/fr/inflection.py
index a7817fc5e..f21d81f75 100644
--- a/src/wiktextract/extractor/fr/inflection.py
+++ b/src/wiktextract/extractor/fr/inflection.py
@@ -1,19 +1,17 @@
-from collections import defaultdict
-from copy import deepcopy
 from dataclasses import dataclass
-from typing import Dict, List
 
 from wikitextprocessor import NodeKind, WikiNode
 from wikitextprocessor.parser import TemplateNode
 from wiktextract.page import clean_node
 from wiktextract.wxr_context import WiktextractContext
 
-from .pronunciation import insert_ipa, is_ipa_text
+from .models import Form, WordEntry
+from .pronunciation import is_ipa_text
 
 
 def extract_inflection(
     wxr: WiktextractContext,
-    page_data: List[Dict],
+    page_data: list[WordEntry],
     template_node: TemplateNode,
 ) -> None:
     # inflection templates
@@ -47,7 +45,7 @@ class ColspanHeader:
 
 def process_inflection_table(
     wxr: WiktextractContext,
-    page_data: List[Dict],
+    page_data: list[WordEntry],
     node: WikiNode,
 ) -> None:
     expanded_node = wxr.wtp.parse(
@@ -92,7 +90,7 @@ def process_inflection_table(
 
         column_cell_index = 0
         for column_num, table_cell in enumerate(table_row_nodes):
-            form_data = defaultdict(list)
+            form_data = Form()
             if isinstance(table_cell, WikiNode):
                 if table_cell.kind == NodeKind.TABLE_HEADER_CELL:
                     if any(
@@ -140,38 +138,57 @@ def process_inflection_table(
                         if is_ipa_text(table_cell_line):
                             insert_ipa(form_data, table_cell_line)
                         elif (
-                            table_cell_line != page_data[-1].get("word")
+                            table_cell_line != page_data[-1].word
                             and table_cell_line not in IGNORE_TABLE_CELL
                         ):
-                            if "form" not in form_data:
-                                form_data["form"] = table_cell_line
+                            if "form" not in form_data.model_fields_set:
+                                form_data.form = table_cell_line
                             else:
-                                form_data["form"] += " " + table_cell_line
+                                form_data.form += " " + table_cell_line
                     for colspan_header in colspan_headers:
                         if (
                             column_cell_index >= colspan_header.index
                             and column_cell_index
                             < colspan_header.index + colspan_header.span
                         ):
-                            form_data["tags"].append(colspan_header.text)
+                            form_data.tags.append(colspan_header.text)
                     if (
                         "colspan" not in table_cell.attrs
                         and len(column_headers) > column_cell_index
                         and column_headers[column_cell_index].lower()
                         not in IGNORE_TABLE_HEADERS
                     ):
-                        form_data["tags"].append(
-                            column_headers[column_cell_index]
-                        )
+                        form_data.tags.append(column_headers[column_cell_index])
 
                     if len(row_headers) > 0:
-                        form_data["tags"].extend(row_headers)
-                    if "form" in form_data:
-                        for form in form_data["form"].split(" ou "):
-                            new_form_data = deepcopy(form_data)
-                            new_form_data["form"] = form
-                            page_data[-1]["forms"].append(new_form_data)
+                        form_data.tags.extend(row_headers)
+                    if "form" in form_data.model_fields_set:
+                        for form in form_data.form.split(" ou "):
+                            new_form_data = form_data.model_copy(deep=True)
+                            new_form_data.form = form
+                            page_data[-1].forms.append(new_form_data)
 
                     colspan_text = table_cell.attrs.get("colspan", "1")
                     if colspan_text.isdigit():
                         column_cell_index += int(colspan_text)
+
+
+def split_ipa(text: str) -> list[str]:
+    # break IPA text if it contains "ou"(or)
+    if " ou " in text:
+        # two ipa texts in the same line: "en-conj-rég" template
+        return text.split(" ou ")
+    if text.startswith("ou "):
+        return [text.removeprefix("ou ")]
+    if text.endswith(" Prononciation ?\\"):
+        # inflection table templates use a edit link when the ipa data is
+        # missing, and the link usually ends with " Prononciation ?"
+        return ""
+    return [text]
+
+
+def insert_ipa(form: Form, ipa_text: str) -> None:
+    ipa_data = split_ipa(ipa_text)
+    if len(ipa_data) == 0:
+        return
+    form.ipas.extend(ipa_data)
diff --git a/src/wiktextract/extractor/fr/linkage.py b/src/wiktextract/extractor/fr/linkage.py
index c1b7a3fd6..81fdd981a 100644
--- a/src/wiktextract/extractor/fr/linkage.py
+++ b/src/wiktextract/extractor/fr/linkage.py
@@ -1,17 +1,15 @@
-from collections import defaultdict
-from typing import Union
-
 from wikitextprocessor import NodeKind, WikiNode
 from wikitextprocessor.parser import TemplateNode
 from wiktextract.page import clean_node
 from wiktextract.wxr_context import WiktextractContext
 
 from ..share import split_tag_text
+from .models import Linkage, WordEntry
 
 
 def extract_linkage(
     wxr: WiktextractContext,
-    page_data: list[dict],
+    page_data: list[WordEntry],
     level_node: WikiNode,
     section_type: str,
 ) -> None:
@@ -28,7 +26,7 @@ def extract_linkage(
 
 def process_derives_autres_list(
     wxr: WiktextractContext,
-    page_data: list[dict],
+    page_data: list[WordEntry],
     level_node: WikiNode,
 ):
     # drrive to other languages list
@@ -41,18 +39,14 @@ def process_derives_autres_list(
                 lang_name = clean_node(wxr, None, template_node)
             elif template_node.template_name == "lien":
                 word = clean_node(wxr, None, template_node)
-                page_data[-1]["derived"].append(
-                    {
-                        "lang_code": lang_code,
-                        "lang_name": lang_name,
-                        "word": word,
-                    }
+                page_data[-1].derived.append(
+                    Linkage(lang_code=lang_code, lang_name=lang_name, word=word)
                 )
 
 
 def process_linkage_list(
     wxr: WiktextractContext,
-    page_data: list[dict],
+    page_data: list[WordEntry],
     level_node: WikiNode,
     linkage_type: str,
 ) -> None:
@@ -76,20 +70,20 @@ def process_linkage_list(
                 sense_index = int(sense_index_text)
             continue
 
-        linkage_data = defaultdict(list)
+        linkage_data = Linkage()
         if len(sense_text) > 0:
-            linkage_data["sense"] = sense_text
+            linkage_data.sense = sense_text
         if sense_index != 0:
-            linkage_data["sense_index"] = sense_index
+            linkage_data.sense_index = sense_index
         pending_tag = ""
         for index, child_node in enumerate(  # remove nested lists
             template_or_list_node.invert_find_child(NodeKind.LIST)
         ):
-            if index == 0 or "word" not in linkage_data:
+            if index == 0 or "word" not in linkage_data.model_fields_set:
                 if isinstance(child_node, TemplateNode):
                     process_linkage_template(wxr, child_node, linkage_data)
                 else:
-                    linkage_data["word"] = clean_node(wxr, None, child_node)
+                    linkage_data.word = clean_node(wxr, None, child_node)
             else:
                 tag_text = (
                     child_node
@@ -108,8 +102,9 @@ def process_linkage_list(
                     pending_tag = ""
                 elif tag_text.strip() in {",", "/"}:
                     # list item has more than one word
-                    page_data[-1][linkage_type].append(linkage_data)
-                    linkage_data = defaultdict(list)
+                    pre_data = getattr(page_data[-1], linkage_type)
+                    pre_data.append(linkage_data)
+                    linkage_data = Linkage()
                     continue
                 elif len(pending_tag) > 0:
                     pending_tag += tag_text
@@ -117,18 +112,19 @@ def process_linkage_list(
 
                 for tag in split_tag_text(tag_text):
                     if tag.startswith("— "):
-                        linkage_data["translation"] = tag.removeprefix("— ")
+                        linkage_data.translation = tag.removeprefix("— ")
                     elif len(tag) > 0:
-                        linkage_data["tags"].append(tag)
+                        linkage_data.tags.append(tag)
 
-        if "word" in linkage_data:
-            page_data[-1][linkage_type].append(linkage_data)
+        if "word" in linkage_data.model_fields_set:
+            pre_data = getattr(page_data[-1], linkage_type)
+            pre_data.append(linkage_data)
 
 
 def process_linkage_template(
     wxr: WiktextractContext,
     node: TemplateNode,
-    linkage_data: dict[str, Union[str, list[str]]],
+    linkage_data: Linkage,
 ) -> None:
     if node.template_name == "lien":
         process_lien_template(wxr, node, linkage_data)
@@ -139,7 +135,7 @@ def process_linkage_template(
 def process_lien_template(
     wxr: WiktextractContext,
     node: TemplateNode,
-    linkage_data: dict[str, Union[str, list[str]]],
+    linkage_data: Linkage,
 ) -> None:
     # link word template: https://fr.wiktionary.org/wiki/Modèle:lien
     word = clean_node(
@@ -147,13 +143,13 @@ def process_lien_template(
         None,
         node.template_parameters.get("dif", node.template_parameters.get(1)),
     )
-    linkage_data["word"] = word
+    linkage_data.word = word
     if "tr" in node.template_parameters:
-        linkage_data["roman"] = clean_node(
+        linkage_data.roman = clean_node(
             wxr, None, node.template_parameters.get("tr")
         )
     if "sens" in node.template_parameters:
-        linkage_data["translation"] = clean_node(
+        linkage_data.translation = clean_node(
             wxr, None, node.template_parameters.get("sens")
         )
 
@@ -161,17 +157,15 @@ def process_lien_template(
 def process_zh_lien_template(
     wxr: WiktextractContext,
     node: TemplateNode,
-    linkage_data: dict[str, Union[str, list[str]]],
+    linkage_data: Linkage,
 ) -> None:
     # https://fr.wiktionary.org/wiki/Modèle:zh-lien
-    linkage_data["word"] = clean_node(
-        wxr, None, node.template_parameters.get(1)
-    )
-    linkage_data["roman"] = clean_node(
+    linkage_data.word = clean_node(wxr, None, node.template_parameters.get(1))
+    linkage_data.roman = clean_node(
         wxr, None, node.template_parameters.get(2)
     )  # pinyin
     traditional_form = clean_node(
         wxr, None, node.template_parameters.get(3, "")
     )
     if len(traditional_form) > 0:
-        linkage_data["alt"] = traditional_form
+        linkage_data.alt = traditional_form
diff --git a/src/wiktextract/extractor/fr/models.py b/src/wiktextract/extractor/fr/models.py
new file mode 100644
index 000000000..7a25d4b0d
--- /dev/null
+++ b/src/wiktextract/extractor/fr/models.py
@@ -0,0 +1,109 @@
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class FrenchBaseModel(BaseModel):
+    model_config = ConfigDict(
+        extra="ignore",
+        strict=True,
+        validate_assignment=True,
+        validate_default=True,
+    )
+
+
+class Example(FrenchBaseModel):
+    text: str = Field("", description="Example usage sentence")
+    translation: str = Field(
+        "", description="French translation of the example sentence"
+    )
+    roman: str = Field("", description="Romanization of the example sentence")
+    ref: str = Field(
+        "",
+        description="Source of the sentence, like book title and page number",
+    )
+
+
+class Form(FrenchBaseModel):
+    form: str = ""
+    tags: list[str] = []
+    ipas: list[str] = []
+    source: str = Field("", description="Form line template name")
+
+
+class Sound(FrenchBaseModel):
+    zh_pron: str = Field("", description="Chinese word pronunciation")
+    ipa: str = Field("", description="International Phonetic Alphabet")
+    audio: str = Field("", description="Audio file name")
+    wav_url: str = ""
+    oga_url: str = ""
+    ogg_url: str = ""
+    mp3_url: str = ""
+    opus_url: str = ""
+    tags: list[str] = []
+
+
+class Translation(FrenchBaseModel):
+    lang_code: str = Field(
+        "", description="Wiktionary language code of the translation term"
+    )
+    lang_name: str = Field("", description="Translation language name")
+    word: str = Field("", description="Translation term")
+    sense: str = Field("", description="Translation gloss")
+    tags: list[str] = []
+    roman: str = ""
+    traditional_writing: str = Field(
+        "", description="Alternative writting for Chinese, Korean and Mongolian"
+    )
+
+
+class Linkage(FrenchBaseModel):
+    word: str = ""
+    tags: list[str] = []
+    roman: str = ""
+    alt: str = Field("", description="ALternative form")
+    translation: str = Field("", description="French translation")
+    sense: str = Field("", description="Definition of the word")
+    sense_index: int = Field(
+        0, ge=0, description="Number of the definition, start from 1"
+    )
+    lang_name: str = Field("", description="Localized language name")
+    lang_code: str = Field("", description="Wiktionary language code")
+
+
+class Sense(FrenchBaseModel):
+    glosses: list[str] = []
+    tags: list[str] = []
+    categories: list[str] = []
+    examples: list[Example] = []
+
+
+class WordEntry(FrenchBaseModel):
+    model_config = ConfigDict(title="French Wiktionary")
+
+    word: str = Field(description="Word string")
+    lang_code: str = Field(description="Wiktionary language code")
+    lang_name: str = Field(description="Localized language name")
+    pos: str = Field("", description="Part of speech type")
+    pos_title: str = Field(
+        "", description="Original POS title for matching etymology texts"
+    )
+    etymology_texts: list[str] = Field([], description="Etymology list")
+    senses: list[Sense] = Field([], description="Sense list")
+    forms: list[Form] = Field([], description="Inflection forms list")
+    sounds: list[Sound] = []
+    translations: list[Translation] = []
+    synonyms: list[Linkage] = []
+    hyponyms: list[Linkage] = []
+    hypernyms: list[Linkage] = []
+    holonyms: list[Linkage] = []
+    meronyms: list[Linkage] = []
+    derived: list[Linkage] = []
+    troponyms: list[Linkage] = []
+    paronyms: list[Linkage] = []
+    related: list[Linkage] = []
+    abbreviation: list[Linkage] = []
+    proverbs: list[Linkage] = []
+    title: str = Field("", description="Redirect page source title")
+    redirect: str = Field("", description="Redirect page target title")
+    categories: list[str] = []
+    notes: list[str] = []
+    tags: list[str] = []
diff --git a/src/wiktextract/extractor/fr/note.py b/src/wiktextract/extractor/fr/note.py
index b6c559712..e2b4a1633 100644
--- a/src/wiktextract/extractor/fr/note.py
+++ b/src/wiktextract/extractor/fr/note.py
@@ -1,14 +1,14 @@
-from typing import Any, Dict, List
-
 from wikitextprocessor import NodeKind, WikiNode
 from wikitextprocessor.parser import TemplateNode
 from wiktextract.page import clean_node
 from wiktextract.wxr_context import WiktextractContext
 
+from .models import WordEntry
+
 
 def extract_note(
     wxr: WiktextractContext,
-    page_data: List[Dict[str, Any]],
+    page_data: list[WordEntry],
     level_node: WikiNode,
 ) -> None:
     # Save paragraph and list item texts to a list of string.
@@ -25,20 +25,20 @@ def extract_note(
                     wxr, page_data[-1], list_item_node.children
                 )
                 if len(note_text) > 0:
-                    page_data[-1]["notes"].append(note_text)
+                    page_data[-1].notes.append(note_text)
             continue
 
         note_paragraph_nodes.append(child)
         if isinstance(child, str) and child.endswith("\n"):
             note_text = clean_node(wxr, page_data[-1], note_paragraph_nodes)
             if len(note_text) > 0:
-                page_data[-1]["notes"].append(note_text)
+                page_data[-1].notes.append(note_text)
             note_paragraph_nodes.clear()
 
 
 def process_note_template(
     wxr: WiktextractContext,
-    page_data: List[Dict[str, Any]],
+    page_data: list[WordEntry],
     template_node: TemplateNode,
 ) -> None:
     expaned_template = wxr.wtp.parse(
diff --git a/src/wiktextract/extractor/fr/page.py b/src/wiktextract/extractor/fr/page.py
index 9c22d47e2..27704a00d 100644
--- a/src/wiktextract/extractor/fr/page.py
+++ b/src/wiktextract/extractor/fr/page.py
@@ -1,7 +1,5 @@
-import copy
 import logging
-from collections import defaultdict
-from typing import Dict, List, Optional
+from typing import Any, Optional
 
 from wikitextprocessor import NodeKind, WikiNode
 from wikitextprocessor.parser import TemplateNode
@@ -13,6 +11,7 @@
 from .gloss import extract_gloss, process_exemple_template
 from .inflection import extract_inflection
 from .linkage import extract_linkage
+from .models import WordEntry
 from .note import extract_note
 from .pronunciation import extract_pronunciation
 from .translation import extract_translation
@@ -32,10 +31,10 @@
 
 def parse_section(
     wxr: WiktextractContext,
-    page_data: List[Dict],
-    base_data: Dict,
+    page_data: list[WordEntry],
+    base_data: WordEntry,
     level_node: WikiNode,
-) -> Optional[List[EtymologyData]]:
+) -> Optional[list[EtymologyData]]:
     # Page structure: https://fr.wiktionary.org/wiki/Wiktionnaire:Structure_des_pages
     for level_node_template in level_node.find_content(NodeKind.TEMPLATE):
         if level_node_template.template_name == "S":
@@ -78,14 +77,14 @@ def parse_section(
                 and section_type in wxr.config.LINKAGE_SUBTITLES
             ):
                 if len(page_data) == 0:
-                    page_data.append(copy.deepcopy(base_data))
+                    page_data.append(base_data.model_copy(deep=True))
                 extract_linkage(
                     wxr,
                     page_data,
                     level_node,
                     section_type,
                 )
-                if page_data[-1].keys() == base_data.keys():
+                if page_data[-1] == base_data:
                     page_data.pop()  # no data was added
             elif (
                 wxr.config.capture_translations
@@ -100,41 +99,41 @@ def parse_section(
                 pass
             elif section_type in wxr.config.OTHER_SUBTITLES["notes"]:
                 if len(page_data) == 0:
-                    page_data.append(copy.deepcopy(base_data))
+                    page_data.append(base_data.model_copy(deep=True))
                 extract_note(wxr, page_data, level_node)
-                if page_data[-1].keys() == base_data.keys():
+                if page_data[-1] == base_data:
                     page_data.pop()  # no data was added
 
 
 def process_pos_block(
     wxr: WiktextractContext,
-    page_data: List[Dict],
-    base_data: Dict,
+    page_data: list[WordEntry],
+    base_data: WordEntry,
     pos_title_node: TemplateNode,
     pos_argument: str,
     pos_title: str,
 ):
     pos_type = wxr.config.POS_SUBTITLES[pos_argument]["pos"]
-    if len(page_data) == 0 or "pos" in page_data[-1]:
-        page_data.append(copy.deepcopy(base_data))
-    page_data[-1]["pos"] = pos_type
-    page_data[-1]["pos_title"] = pos_title
+    if len(page_data) == 0 or "pos" not in page_data[-1].model_fields_set:
+        page_data.append(base_data.model_copy(deep=True))
+    page_data[-1].pos = pos_type
+    page_data[-1].pos_title = pos_title
     child_nodes = list(pos_title_node.filter_empty_str_child())
     form_line_start = 0  # Ligne de forme
     gloss_start = len(child_nodes)
-    lang_code = page_data[-1].get("lang_code")
+    lang_code = page_data[-1].lang_code
     for index, child in enumerate(child_nodes):
         if isinstance(child, WikiNode):
             if child.kind == NodeKind.TEMPLATE:
                 template_name = child.template_name
                 if (
                     template_name.endswith("-exemple")
-                    and len(page_data[-1].get("senses", [])) > 0
+                    and len(page_data[-1].senses) > 0
                 ):
                     # zh-exemple and ja-exemple expand to list thus are not the
                     # child of gloss list item.
                     process_exemple_template(
-                        wxr, child, page_data[-1]["senses"][-1]
+                        wxr, child, page_data[-1].senses[-1]
                     )
                 elif template_name.startswith(("zh-mot", "ja-mot")):
                     # skip form line templates
@@ -155,7 +154,7 @@ def process_pos_block(
 
 def parse_page(
     wxr: WiktextractContext, page_title: str, page_text: str
-) -> List[Dict[str, str]]:
+) -> list[dict[str, Any]]:
     if wxr.config.verbose:
         logging.info(f"Parsing page: {page_title}")
 
@@ -170,23 +169,27 @@ def parse_page(
         additional_expand=ADDITIONAL_EXPAND_TEMPLATES,
     )
 
-    page_data = []
+    page_data: list[WordEntry] = []
     for level2_node in tree.find_child(NodeKind.LEVEL2):
         for subtitle_template in level2_node.find_content(NodeKind.TEMPLATE):
             # https://fr.wiktionary.org/wiki/Modèle:langue
             # https://fr.wiktionary.org/wiki/Wiktionnaire:Liste_des_langues
             if subtitle_template.template_name == "langue":
-                base_data = defaultdict(list, {"word": wxr.wtp.title})
+                categories = {}
                 lang_code = subtitle_template.template_parameters.get(1)
                 if (
                     wxr.config.capture_language_codes is not None
                     and lang_code not in wxr.config.capture_language_codes
                 ):
                     continue
-                lang_name = clean_node(wxr, base_data, subtitle_template)
+                lang_name = clean_node(wxr, categories, subtitle_template)
                 wxr.wtp.start_section(lang_name)
-                base_data["lang_name"] = lang_name
-                base_data["lang_code"] = lang_code
+                base_data = WordEntry(
+                    word=wxr.wtp.title,
+                    lang_code=lang_code,
+                    lang_name=lang_name,
+                    categories=categories.get("categories", []),
+                )
                 etymology_data: Optional[EtymologyData] = None
                 for level3_node in level2_node.find_child(NodeKind.LEVEL3):
                     new_etymology_data = parse_section(
@@ -198,4 +201,4 @@ def parse_page(
                 if etymology_data is not None:
                     insert_etymology_data(lang_code, page_data, etymology_data)
 
-    return page_data
+    return [m.model_dump(exclude_defaults=True) for m in page_data]
diff --git a/src/wiktextract/extractor/fr/pronunciation.py b/src/wiktextract/extractor/fr/pronunciation.py
index 319ef5aed..61e934d28 100644
--- a/src/wiktextract/extractor/fr/pronunciation.py
+++ b/src/wiktextract/extractor/fr/pronunciation.py
@@ -1,45 +1,40 @@
-from collections import defaultdict
-from copy import deepcopy
-from typing import Dict, List, Union
-
 from wikitextprocessor import NodeKind, WikiNode
 from wikitextprocessor.parser import TemplateNode
 from wiktextract.extractor.share import create_audio_url_dict
 from wiktextract.page import clean_node
 from wiktextract.wxr_context import WiktextractContext
 
+from .models import Sound, WordEntry
+
 
 def extract_pronunciation(
     wxr: WiktextractContext,
-    page_data: List[Dict],
+    page_data: list[WordEntry],
     level_node: WikiNode,
-    base_data: Dict[str, str],
+    base_data: WordEntry,
 ) -> None:
     sound_data = []
-    lang_code = base_data.get("lang_code")
+    lang_code = base_data.lang_code
     for list_node in level_node.find_child(NodeKind.LIST):
         for list_item_node in list_node.find_child(NodeKind.LIST_ITEM):
             sound_data.extend(
-                process_pron_list_item(
-                    wxr, list_item_node, defaultdict(list), lang_code
-                )
+                process_pron_list_item(wxr, list_item_node, Sound(), lang_code)
             )
 
     if len(sound_data) == 0:
         return
     if len(page_data) == 0:
-        page_data.append(deepcopy(base_data))
+        page_data.append(base_data.model_copy(deep=True))
 
     if level_node.kind == NodeKind.LEVEL3:
         # Add extracted sound data to all sense dictionaries that have the same
         # language code when the prononciation subtitle is a level 3 title node.
         # Otherwise only add to the last one.
-        lang_code = page_data[-1].get("lang_code")
         for sense_data in page_data:
-            if sense_data.get("lang_code") == lang_code:
-                sense_data["sounds"].extend(sound_data)
+            if sense_data.lang_code == lang_code:
+                sense_data.sounds.extend(sound_data)
     else:
-        page_data[-1]["sounds"].extend(sound_data)
+        page_data[-1].sounds.extend(sound_data)
 
 
 PRON_TEMPLATES = frozenset(
@@ -59,56 +54,57 @@ def extract_pronunciation(
 def process_pron_list_item(
     wxr: WiktextractContext,
     list_item_node: WikiNode,
-    sound_data: Dict[str, Union[str, List[str]]],
+    sound_data: Sound,
     lang_code: str,
-) -> List[Dict[str, Union[str, List[str]]]]:
-    pron_key = "zh-pron" if lang_code == "zh" else "ipa"
+) -> list[Sound]:
+    pron_key = "zh_pron" if lang_code == "zh" else "ipa"
 
     for template_node in list_item_node.find_child(NodeKind.TEMPLATE):
         if template_node.template_name in PRON_TEMPLATES:
             pron_text = process_pron_template(wxr, template_node)
             if len(pron_text) > 0:
-                sound_data[pron_key] = pron_text
+                setattr(sound_data, pron_key, pron_text)
         elif template_node.template_name in {"écouter", "audio", "pron-rég"}:
             process_ecouter_template(wxr, template_node, sound_data)
         else:
             sound_tag = clean_node(wxr, None, template_node)
             if sound_tag.startswith("(") and sound_tag.endswith(")"):
                 sound_tag = sound_tag.strip("()")
-            sound_data["tags"].append(sound_tag)
+            sound_data.tags.append(sound_tag)
 
     if list_item_node.contain_node(NodeKind.LIST):
         returned_data = []
         for bold_node in list_item_node.find_child(NodeKind.BOLD):
-            sound_data["tags"].append(clean_node(wxr, None, bold_node))
+            sound_data.tags.append(clean_node(wxr, None, bold_node))
 
         for nest_list_item in list_item_node.find_child_recursively(
             NodeKind.LIST_ITEM
         ):
-            new_sound_data = deepcopy(sound_data)
+            new_sound_data = sound_data.model_copy(deep=True)
             process_pron_list_item(
                 wxr, nest_list_item, new_sound_data, lang_code
             )
-            if pron_key in new_sound_data:
+            if pron_key in new_sound_data.model_fields_set:
                 returned_data.append(new_sound_data)
 
         return returned_data
-    elif len(sound_data) > 0:
-        if pron_key not in sound_data:
+    elif len(sound_data.model_dump(exclude_defaults=True)) > 0:
+        if pron_key not in sound_data.model_fields_set:
             for child in list_item_node.filter_empty_str_child():
                 if isinstance(child, str):
                     if child.strip().startswith(": "):
                         # IPA text after "language : "
-                        sound_data[pron_key] = (
-                            child.strip().removeprefix(": ").strip()
+                        setattr(
+                            sound_data,
+                            pron_key,
+                            child.strip().removeprefix(": ").strip(),
                         )
                     elif len(child.strip()) > 0 and child.strip() != ":":
                         # language text before ":"
-                        sound_data["tags"].append(child.strip())
+                        sound_data.tags.append(child.strip())
 
-        if pron_key in sound_data or "audio" in sound_data:
+        if len({pron_key, "audio"} & sound_data.model_fields_set) > 0:
             return [sound_data]
-
     return []
 
 
@@ -129,7 +125,7 @@ def process_pron_template(
 def process_ecouter_template(
     wxr: WiktextractContext,
     template_node: TemplateNode,
-    sound_data: Dict[str, Union[str, List[str]]],
+    sound_data: Sound,
 ) -> None:
     # sound file template: https://fr.wiktionary.org/wiki/Modèle:écouter
     location = clean_node(
@@ -148,11 +144,19 @@ def process_ecouter_template(
         wxr, None, template_node.template_parameters.get("audio", "")
     )
     if len(location) > 0:
-        sound_data["tags"].append(location)
+        sound_data.tags.append(location)
     if len(ipa) > 0:
-        sound_data["ipa"] = ipa
+        sound_data.ipa = ipa
     if len(audio_file) > 0:
-        sound_data.update(create_audio_url_dict(audio_file))
+        audio_data = create_audio_url_dict(audio_file)
+        for key, value in audio_data.items():
+            if key in sound_data.model_fields:
+                setattr(sound_data, key, value)
+            else:
+                wxr.wtp.debug(
+                    f"{key=} not defined in Sound",
+                    sortid="fr.pronunciation/156",
+                )
 
 
 def is_ipa_text(text: str) -> bool:
@@ -164,43 +168,3 @@ def is_ipa_text(text: str) -> bool:
         # ipa text in a new line
         return True
     return False
-
-
-def split_ipa(text: str) -> Union[List[str], str]:
-    # break IPA text if it contains "ou"(or)
-    if " ou " in text:
-        # two ipa texts in the same line: "en-conj-rég" template
-        return text.split(" ou ")
-    if text.startswith("ou "):
-        return text.removeprefix("ou ")
-    if text.endswith(" Prononciation ?\\"):
-        # inflection table templates use a edit link when the ipa data is
-        # missing, and the link usually ends with " Prononciation ?"
-        return ""
-    return text
-
-
-def insert_ipa(
-    target_dict: Dict[str, Union[str, List[str]]], ipa_text: str
-) -> None:
-    # insert IPA text to a dictionary, and merge values of the key "ipa" and
-    # "ipas", `target_dict` is created by `defaultdict(list)`.
-    ipa_data = split_ipa(ipa_text)
-    if len(ipa_data) == 0:
-        return
-
-    if isinstance(ipa_data, str):
-        if "ipas" in target_dict:
-            target_dict["ipas"].append(ipa_data)
-        elif "ipa" in target_dict:
-            target_dict["ipas"].append(target_dict["ipa"])
-            target_dict["ipas"].append(ipa_data)
-            del target_dict["ipa"]
-        else:
-            target_dict["ipa"] = ipa_data
-    elif isinstance(ipa_data, list):
-        if "ipa" in target_dict:
-            target_dict["ipas"].append(target_dict["ipa"])
-            del target_dict["ipa"]
-
-        target_dict["ipas"].extend(ipa_data)
diff --git a/src/wiktextract/extractor/fr/translation.py b/src/wiktextract/extractor/fr/translation.py
index c5dd112f9..484b8fa07 100644
--- a/src/wiktextract/extractor/fr/translation.py
+++ b/src/wiktextract/extractor/fr/translation.py
@@ -1,16 +1,17 @@
-from collections import defaultdict
-from typing import Dict, List, Union
+from typing import Optional
 
 from wikitextprocessor import NodeKind, WikiNode
 from wikitextprocessor.parser import TemplateNode
 from wiktextract.page import clean_node
 from wiktextract.wxr_context import WiktextractContext
 
+from .models import Translation, WordEntry
+
 
 def extract_translation(
-    wxr: WiktextractContext, page_data: List[Dict], level_node: WikiNode
+    wxr: WiktextractContext, page_data: list[WordEntry], level_node: WikiNode
 ) -> None:
-    base_translation_data = defaultdict(list)
+    base_translation_data = Translation()
     for level_node_child in level_node.filter_empty_str_child():
         if isinstance(level_node_child, WikiNode):
             if level_node_child.kind == NodeKind.TEMPLATE:
@@ -42,8 +43,8 @@ def extract_translation(
 def process_italic_node(
     wxr: WiktextractContext,
     italic_node: WikiNode,
-    previous_node: Union[WikiNode, None],
-    page_data: List[Dict],
+    previous_node: Optional[WikiNode],
+    page_data: list[WordEntry],
 ) -> None:
     # add italic text after a "trad" template as a tag
     tag = clean_node(wxr, None, italic_node)
@@ -53,16 +54,16 @@ def process_italic_node(
         and previous_node is not None
         and previous_node.kind == NodeKind.TEMPLATE
         and previous_node.template_name.startswith("trad")
-        and len(page_data[-1].get("translations", [])) > 0
+        and len(page_data[-1].translations) > 0
     ):
-        page_data[-1]["translations"][-1]["tags"].append(tag.strip("()"))
+        page_data[-1].translations[-1].tags.append(tag.strip("()"))
 
 
 def process_translation_templates(
     wxr: WiktextractContext,
     template_node: TemplateNode,
-    page_data: List[Dict],
-    base_translation_data: Dict[str, str],
+    page_data: list[WordEntry],
+    base_translation_data: Translation,
 ) -> None:
     if template_node.template_name == "trad-fin":
         # ignore translation end template
@@ -73,13 +74,13 @@ def process_translation_templates(
         if sense_parameter is not None:
             sense_text = clean_node(wxr, None, sense_parameter)
             if len(sense_text) > 0:
-                base_translation_data["sense"] = sense_text
+                base_translation_data.sense = sense_text
     elif template_node.template_name == "T":
         # Translation language: https://fr.wiktionary.org/wiki/Modèle:T
-        base_translation_data[
-            "lang_code"
-        ] = template_node.template_parameters.get(1)
-        base_translation_data["lang_name"] = clean_node(
+        base_translation_data.lang_code = template_node.template_parameters.get(
+            1
+        )
+        base_translation_data.lang_name = clean_node(
             wxr, page_data[-1], template_node
         )
     elif template_node.template_name.startswith("trad"):
@@ -104,22 +105,22 @@ def process_translation_templates(
         translation_traditional_writing = clean_node(
             wxr, None, template_node.template_parameters.get("tradi", "")
         )
-        translation_data = base_translation_data.copy()
-        translation_data["word"] = translation_term
+        translation_data = base_translation_data.model_copy(deep=True)
+        translation_data.word = translation_term
         if len(translation_roman) > 0:
-            translation_data["roman"] = translation_roman
+            translation_data.roman = translation_roman
         if len(translation_traditional_writing) > 0:
-            translation_data[
-                "traditional_writing"
-            ] = translation_traditional_writing
+            translation_data.traditional_writing = (
+                translation_traditional_writing
+            )
         if 3 in template_node.template_parameters:
             expaned_node = wxr.wtp.parse(
                 wxr.wtp.node_to_wikitext(template_node), expand_all=True
             )
             for gender_node in expaned_node.find_child(NodeKind.ITALIC):
-                translation_data["tags"] = [clean_node(wxr, None, gender_node)]
+                translation_data.tags = [clean_node(wxr, None, gender_node)]
                 break
-        page_data[-1]["translations"].append(translation_data)
-    elif len(page_data[-1].get("translations", [])) > 0:
+        page_data[-1].translations.append(translation_data)
+    elif len(page_data[-1].translations) > 0:
         tag = clean_node(wxr, None, template_node).strip("()")
-        page_data[-1]["translations"][-1]["tags"].append(tag)
+        page_data[-1].translations[-1].tags.append(tag)
diff --git a/tests/test_fr_etymology.py b/tests/test_fr_etymology.py
index 3b49b1ce4..a31e8b7b5 100644
--- a/tests/test_fr_etymology.py
+++ b/tests/test_fr_etymology.py
@@ -1,5 +1,4 @@
-import unittest
-from collections import defaultdict
+from unittest import TestCase
 
 from wikitextprocessor import Wtp
 from wiktextract.config import WiktionaryConfig
@@ -7,10 +6,11 @@
     extract_etymology,
     insert_etymology_data,
 )
+from wiktextract.extractor.fr.models import WordEntry
 from wiktextract.wxr_context import WiktextractContext
 
 
-class TestEtymology(unittest.TestCase):
+class TestEtymology(TestCase):
     def setUp(self) -> None:
         self.wxr = WiktextractContext(
             Wtp(lang_code="fr"), WiktionaryConfig(dump_file_lang_code="fr")
@@ -51,21 +51,29 @@ def test_list_etymologies(self):
             },
         )
         page_data = [
-            defaultdict(
-                list,
-                {"lang_code": "fr", "pos": "noun", "pos_title": "Nom commun 1"},
+            WordEntry(
+                word="test",
+                lang_code="fr",
+                lang_name="Français",
+                pos="noun",
+                pos_title="Nom commun 1",
             ),
-            defaultdict(
-                list,
-                {"lang_code": "fr", "pos": "noun", "pos_title": "Nom commun 2"},
+            WordEntry(
+                word="test",
+                lang_code="fr",
+                lang_name="Français",
+                pos="noun",
+                pos_title="Nom commun 2",
             ),
         ]
         insert_etymology_data("fr", page_data, etymology_data)
         self.assertEqual(
-            page_data,
+            [d.model_dump(exclude_defaults=True) for d in page_data],
             [
                 {
+                    "word": "test",
                     "lang_code": "fr",
+                    "lang_name": "Français",
                     "pos": "noun",
                     "pos_title": "Nom commun 1",
                     "etymology_texts": [
@@ -74,7 +82,9 @@ def test_list_etymologies(self):
                     ],
                 },
                 {
+                    "word": "test",
                     "lang_code": "fr",
+                    "lang_name": "Français",
                     "pos": "noun",
                     "pos_title": "Nom commun 2",
                     "etymology_texts": [
@@ -106,25 +116,36 @@ def test_indent_etymology_with_pos_template(self):
             },
         )
         page_data = [
-            defaultdict(
-                list,
-                {"lang_code": "fr", "pos": "noun", "pos_title": "Nom commun 1"},
+            WordEntry(
+                word="test",
+                lang_code="fr",
+                lang_name="Français",
+                pos="noun",
+                pos_title="Nom commun 1",
             ),
-            defaultdict(
-                list,
-                {"lang_code": "fr", "pos": "noun", "pos_title": "Nom commun 2"},
+            WordEntry(
+                word="test",
+                lang_code="fr",
+                lang_name="Français",
+                pos="noun",
+                pos_title="Nom commun 2",
             ),
-            defaultdict(
-                list,
-                {"lang_code": "fr", "pos": "intj", "pos_title": "Interjection"},
+            WordEntry(
+                word="test",
+                lang_code="fr",
+                lang_name="Français",
+                pos="intj",
+                pos_title="Interjection",
             ),
         ]
         insert_etymology_data("fr", page_data, etymology_data)
         self.assertEqual(
-            page_data,
+            [d.model_dump(exclude_defaults=True) for d in page_data],
             [
                 {
+                    "word": "test",
                     "lang_code": "fr",
+                    "lang_name": "Français",
                     "pos": "noun",
                     "pos_title": "Nom commun 1",
                     "etymology_texts": [
@@ -132,7 +153,9 @@ def test_indent_etymology_with_pos_template(self):
                     ],
                 },
                 {
+                    "word": "test",
                     "lang_code": "fr",
+                    "lang_name": "Français",
                     "pos": "noun",
                     "pos_title": "Nom commun 2",
                     "etymology_texts": [
@@ -140,7 +163,9 @@ def test_indent_etymology_with_pos_template(self):
                     ],
                 },
                 {
+                    "word": "test",
                     "lang_code": "fr",
+                    "lang_name": "Français",
                     "pos": "intj",
                     "pos_title": "Interjection",
                     "etymology_texts": [
diff --git a/tests/test_fr_form_line.py b/tests/test_fr_form_line.py
index 5f2c59b75..0f702e4f8 100644
--- a/tests/test_fr_form_line.py
+++ b/tests/test_fr_form_line.py
@@ -1,5 +1,4 @@
-import unittest
-from collections import defaultdict
+from unittest import TestCase
 from unittest.mock import patch
 
 from wikitextprocessor import Wtp
@@ -8,10 +7,11 @@
     extract_form_line,
     process_zh_mot_template,
 )
+from wiktextract.extractor.fr.models import WordEntry
 from wiktextract.wxr_context import WiktextractContext
 
 
-class TestFormLine(unittest.TestCase):
+class TestFormLine(TestCase):
     def setUp(self) -> None:
         self.wxr = WiktextractContext(
             Wtp(lang_code="fr"), WiktionaryConfig(dump_file_lang_code="fr")
@@ -20,50 +20,50 @@ def setUp(self) -> None:
     def tearDown(self) -> None:
         self.wxr.wtp.close_db_conn()
 
-    @patch(
-        "wiktextract.extractor.fr.pronunciation.clean_node",
-        return_value="/lə nɔ̃/",
-    )
-    def test_ipa(self, mock_clean_node):
-        self.wxr.wtp.start_page("")
-        root = self.wxr.wtp.parse("'''le nom''' {{pron|lə nɔ̃|fr}}")
-        page_data = [defaultdict(list)]
+    def test_ipa(self):
+        self.wxr.wtp.start_page("bonjour")
+        self.wxr.wtp.add_page("Modèle:pron", 10, "\\bɔ̃.ʒuʁ\\")
+        root = self.wxr.wtp.parse("'''bonjour''' {{pron|bɔ̃.ʒuʁ|fr}}")
+        page_data = [
+            WordEntry(word="bonjour", lang_code="fr", lang_name="Français")
+        ]
         extract_form_line(self.wxr, page_data, root.children)
-        self.assertEqual(page_data, [{"sounds": [{"ipa": "/lə nɔ̃/"}]}])
+        self.assertEqual(
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].sounds],
+            [{"ipa": "\\bɔ̃.ʒuʁ\\"}],
+        )
 
-    @patch(
-        "wiktextract.extractor.fr.form_line.clean_node", return_value="masculin"
-    )
-    def test_gender(self, mock_clean_node):
-        self.wxr.wtp.start_page("")
-        root = self.wxr.wtp.parse("'''le nom''' {{m}}")
-        page_data = [defaultdict(list)]
+    def test_gender(self):
+        self.wxr.wtp.start_page("bonjour")
+        self.wxr.wtp.add_page("Modèle:m", 10, "masculin")
+        root = self.wxr.wtp.parse("'''bonjour''' {{m}}")
+        page_data = [
+            WordEntry(word="bonjour", lang_code="fr", lang_name="Français")
+        ]
         extract_form_line(self.wxr, page_data, root.children)
-        self.assertEqual(page_data, [{"tags": ["masculin"]}])
+        self.assertEqual(page_data[-1].tags, ["masculin"])
 
     def test_zh_mot(self):
-        self.wxr.wtp.start_page("")
+        self.wxr.wtp.start_page("马")
         self.wxr.wtp.add_page("Modèle:zh-mot", 10, body="{{lang}} {{pron}}")
         self.wxr.wtp.add_page("Modèle:lang", 10, body="mǎ")
         self.wxr.wtp.add_page("Modèle:pron", 10, body="\\ma̠˨˩˦\\")
         root = self.wxr.wtp.parse("{{zh-mot|马|mǎ}}")
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         process_zh_mot_template(self.wxr, root.children[0], page_data)
         self.assertEqual(
-            page_data,
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].sounds],
             [
-                {
-                    "sounds": [
-                        {"tags": ["Pinyin"], "zh-pron": "mǎ"},
-                        {"ipa": "\\ma̠˨˩˦\\"},
-                    ]
-                }
+                {"tags": ["Pinyin"], "zh_pron": "mǎ"},
+                {"ipa": "\\ma̠˨˩˦\\"},
             ],
         )
 
     def test_ipa_location_tag(self):
         # https://fr.wiktionary.org/wiki/basket-ball
-        self.wxr.wtp.start_page("")
+        self.wxr.wtp.start_page("basket-ball")
         self.wxr.wtp.add_page("Modèle:pron", 10, body="{{{1}}}")
         self.wxr.wtp.add_page("Modèle:FR", 10, body="(France)")
         self.wxr.wtp.add_page("Modèle:CA", 10, body="(Canada)")
@@ -71,35 +71,40 @@ def test_ipa_location_tag(self):
         root = self.wxr.wtp.parse(
             "{{pron|bas.kɛt.bol|fr}} {{FR|nocat=1}} ''ou'' {{pron|bas.kɛt.bɔl|fr}} {{FR|nocat=1}} ''ou'' {{pron|bas.kɛt.bɑl|fr}} {{CA|nocat=1}} {{m}}"
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="basket-ball", lang_code="fr", lang_name="Français")
+        ]
         extract_form_line(self.wxr, page_data, root.children)
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "tags": ["masculin"],
-                    "sounds": [
-                        {"ipa": "bas.kɛt.bol", "tags": ["France"]},
-                        {"ipa": "bas.kɛt.bɔl", "tags": ["France"]},
-                        {"ipa": "bas.kɛt.bɑl", "tags": ["Canada"]},
-                    ],
-                }
-            ],
+            page_data[-1].model_dump(exclude_defaults=True),
+            {
+                "word": "basket-ball",
+                "lang_code": "fr",
+                "lang_name": "Français",
+                "tags": ["masculin"],
+                "sounds": [
+                    {"ipa": "bas.kɛt.bol", "tags": ["France"]},
+                    {"ipa": "bas.kɛt.bɔl", "tags": ["France"]},
+                    {"ipa": "bas.kɛt.bɑl", "tags": ["Canada"]},
+                ],
+            },
         )
 
     def test_template_in_pron_argument(self):
-        # https://fr.wiktionary.org/wiki/minéral argileux
+        # https://fr.wiktionary.org/wiki/minéral_argileux
         self.wxr.wtp.start_page("")
         self.wxr.wtp.add_page("Modèle:pron", 10, body="{{{1}}}")
         self.wxr.wtp.add_page("Modèle:liaison", 10, body="‿")
         root = self.wxr.wtp.parse(
             "'''minéral argileux''' {{pron|mi.ne.ʁa.l{{liaison|fr}}aʁ.ʒi.lø|fr}}"
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_form_line(self.wxr, page_data, root.children)
         self.assertEqual(
-            page_data,
-            [{"sounds": [{"ipa": "mi.ne.ʁa.l‿aʁ.ʒi.lø"}]}],
+            page_data[-1].sounds[0].model_dump(exclude_defaults=True),
+            {"ipa": "mi.ne.ʁa.l‿aʁ.ʒi.lø"},
         )
 
     @patch(
@@ -112,40 +117,37 @@ def test_equiv_pour_template(self, mock_node_to_wikitext):
         root = self.wxr.wtp.parse(
             "{{équiv-pour|un homme|auteur|2egenre=une personne non-binaire|2egenre1=autaire|2egenre2=auteurice|2egenre3=auteur·ice|lang=fr}}"
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="autrice", lang_code="fr", lang_name="Français")
+        ]
         extract_form_line(self.wxr, page_data, root.children)
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "forms": [
-                        {
-                            "form": "auteur",
-                            "tags": ["pour un homme, on dit"],
-                            "source": "form line template 'équiv-pour'",
-                        },
-                        {
-                            "form": "autaire",
-                            "tags": [
-                                "pour une personne non-binaire, on peut dire"
-                            ],
-                            "source": "form line template 'équiv-pour'",
-                        },
-                        {
-                            "form": "auteurice",
-                            "tags": [
-                                "pour une personne non-binaire, on peut dire"
-                            ],
-                            "source": "form line template 'équiv-pour'",
-                        },
-                        {
-                            "form": "auteur·ice",
-                            "tags": [
-                                "pour une personne non-binaire, on peut dire"
-                            ],
-                            "source": "form line template 'équiv-pour'",
-                        },
-                    ]
-                }
-            ],
+            page_data[-1].model_dump(exclude_defaults=True),
+            {
+                "word": "autrice",
+                "lang_code": "fr",
+                "lang_name": "Français",
+                "forms": [
+                    {
+                        "form": "auteur",
+                        "tags": ["pour un homme, on dit"],
+                        "source": "form line template 'équiv-pour'",
+                    },
+                    {
+                        "form": "autaire",
+                        "tags": ["pour une personne non-binaire, on peut dire"],
+                        "source": "form line template 'équiv-pour'",
+                    },
+                    {
+                        "form": "auteurice",
+                        "tags": ["pour une personne non-binaire, on peut dire"],
+                        "source": "form line template 'équiv-pour'",
+                    },
+                    {
+                        "form": "auteur·ice",
+                        "tags": ["pour une personne non-binaire, on peut dire"],
+                        "source": "form line template 'équiv-pour'",
+                    },
+                ],
+            },
         )
diff --git a/tests/test_fr_gloss.py b/tests/test_fr_gloss.py
index dbcfa7c96..573f20f0c 100644
--- a/tests/test_fr_gloss.py
+++ b/tests/test_fr_gloss.py
@@ -1,15 +1,15 @@
-import unittest
-from collections import defaultdict
+from unittest import TestCase
 from unittest.mock import patch
 
 from wikitextprocessor import Page, Wtp
 from wiktextract.config import WiktionaryConfig
 from wiktextract.extractor.fr.gloss import extract_gloss
+from wiktextract.extractor.fr.models import WordEntry
 from wiktextract.extractor.fr.page import process_pos_block
 from wiktextract.wxr_context import WiktextractContext
 
 
-class TestFrGloss(unittest.TestCase):
+class TestFrGloss(TestCase):
     def setUp(self) -> None:
         self.wxr = WiktextractContext(
             Wtp(lang_code="fr"), WiktionaryConfig(dump_file_lang_code="fr")
@@ -29,22 +29,18 @@ def tearDown(self) -> None:
     def test_theme_templates(self, mock_get_page):
         self.wxr.wtp.start_page("")
         root = self.wxr.wtp.parse("# {{sportifs|fr}} gloss.\n#* example")
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_gloss(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].senses],
             [
                 {
-                    "senses": [
-                        {
-                            "glosses": ["gloss."],
-                            "tags": ["Sport"],
-                            "categories": ["Sportifs en français"],
-                            "examples": [
-                                {"text": "example", "type": "example"}
-                            ],
-                        }
-                    ]
+                    "glosses": ["gloss."],
+                    "tags": ["Sport"],
+                    "categories": ["Sportifs en français"],
+                    "examples": [{"text": "example"}],
                 }
             ],
         )
@@ -54,26 +50,23 @@ def test_example_template(self):
         root = self.wxr.wtp.parse(
             "# gloss.\n#* {{exemple|text|translation|roman|source=source}}"
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_gloss(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].senses],
             [
                 {
-                    "senses": [
+                    "glosses": ["gloss."],
+                    "examples": [
                         {
-                            "glosses": ["gloss."],
-                            "examples": [
-                                {
-                                    "text": "text",
-                                    "translation": "translation",
-                                    "roman": "roman",
-                                    "ref": "source",
-                                    "type": "quotation",
-                                }
-                            ],
+                            "text": "text",
+                            "translation": "translation",
+                            "roman": "roman",
+                            "ref": "source",
                         }
-                    ]
+                    ],
                 }
             ],
         )
@@ -87,31 +80,28 @@ def test_example_source_template(self, mock_node_to_html):
         root = self.wxr.wtp.parse(
             "# gloss.\n#* example {{source|source_title}}"
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_gloss(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].senses],
             [
                 {
-                    "senses": [
+                    "glosses": ["gloss."],
+                    "examples": [
                         {
-                            "glosses": ["gloss."],
-                            "examples": [
-                                {
-                                    "text": "example",
-                                    "ref": "source_title",
-                                    "type": "quotation",
-                                }
-                            ],
+                            "text": "example",
+                            "ref": "source_title",
                         }
-                    ]
+                    ],
                 }
             ],
         )
 
     def test_zh_exemple_template(self):
         # https://fr.wiktionary.org/wiki/马
-        self.wxr.wtp.start_page("")
+        self.wxr.wtp.start_page("马")
         root = self.wxr.wtp.parse(
             "=== {{S|nom|zh}} ===\n# Cheval.\n{{zh-exemple|这匹'''马'''很大。|Ce cheval est grand.|Zhè pǐ '''mǎ''' hěn dà.<br/>⠌⠢⠆ ⠏⠊⠄ ⠍⠔⠄ ⠓⠴⠄ ⠙⠔⠆⠐⠆}}"
         )
@@ -119,32 +109,32 @@ def test_zh_exemple_template(self):
         process_pos_block(
             self.wxr,
             page_data,
-            defaultdict(list),
+            WordEntry(word="马", lang_code="zh", lang_name="Chinois"),
             root.children[0],
             "nom",
             "Nom commun",
         )
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "pos": "noun",
-                    "pos_title": "Nom commun",
-                    "senses": [
-                        {
-                            "glosses": ["Cheval."],
-                            "examples": [
-                                {
-                                    "text": "这匹马很大。",
-                                    "translation": "Ce cheval est grand.",
-                                    "roman": "Zhè pǐ mǎ hěn dà.\n⠌⠢⠆ ⠏⠊⠄ ⠍⠔⠄ ⠓⠴⠄ ⠙⠔⠆⠐⠆",
-                                    "type": "example",
-                                }
-                            ],
-                        }
-                    ],
-                }
-            ],
+            page_data[-1].model_dump(exclude_defaults=True),
+            {
+                "word": "马",
+                "lang_code": "zh",
+                "lang_name": "Chinois",
+                "pos": "noun",
+                "pos_title": "Nom commun",
+                "senses": [
+                    {
+                        "glosses": ["Cheval."],
+                        "examples": [
+                            {
+                                "text": "这匹马很大。",
+                                "translation": "Ce cheval est grand.",
+                                "roman": "Zhè pǐ mǎ hěn dà.\n⠌⠢⠆ ⠏⠊⠄ ⠍⠔⠄ ⠓⠴⠄ ⠙⠔⠆⠐⠆",
+                            }
+                        ],
+                    }
+                ],
+            },
         )
 
     def test_variante_de(self):
@@ -161,18 +151,16 @@ def test_variante_de(self):
         root = self.wxr.wtp.parse(
             "# {{désuet|en}} {{sports|en}} {{indénombrable|en}} {{variante de|basketball|en}}."
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_gloss(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].senses],
             [
                 {
-                    "senses": [
-                        {
-                            "glosses": ["Variante de basketball."],
-                            "tags": ["Désuet", "Sport", "Indénombrable"],
-                        }
-                    ]
+                    "glosses": ["Variante de basketball."],
+                    "tags": ["Désuet", "Sport", "Indénombrable"],
                 }
             ],
         )
@@ -183,17 +171,13 @@ def test_italic_tag(self):
         root = self.wxr.wtp.parse(
             "# (''localement'') [[bassin#Nom_commun|Bassin]], [[lavoir#Nom_commun|lavoir]]."
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_gloss(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "senses": [
-                        {"glosses": ["Bassin, lavoir."], "tags": ["localement"]}
-                    ]
-                }
-            ],
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].senses],
+            [{"glosses": ["Bassin, lavoir."], "tags": ["localement"]}],
         )
 
     def test_not_italic_tag(self):
@@ -202,18 +186,16 @@ def test_not_italic_tag(self):
         root = self.wxr.wtp.parse(
             "# [[oiseau|Oiseau]] aquatique de taille moyenne du genre ''[[Rhynchops]]''."
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_gloss(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].senses],
             [
                 {
-                    "senses": [
-                        {
-                            "glosses": [
-                                "Oiseau aquatique de taille moyenne du genre Rhynchops."
-                            ]
-                        }
+                    "glosses": [
+                        "Oiseau aquatique de taille moyenne du genre Rhynchops."
                     ]
                 }
             ],
@@ -224,11 +206,13 @@ def test_preserve_space_between_tags(self):
         # the space between italic node and the link node should be preserved
         self.wxr.wtp.start_page("becs-en-ciseaux")
         root = self.wxr.wtp.parse("# ''Pluriel de'' [[bec-en-ciseaux]].")
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_gloss(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
-            [{"senses": [{"glosses": ["Pluriel de bec-en-ciseaux."]}]}],
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].senses],
+            [{"glosses": ["Pluriel de bec-en-ciseaux."]}],
         )
 
     @patch(
@@ -241,18 +225,16 @@ def test_template_is_not_tag(self, mock_get_page):
         root = self.wxr.wtp.parse(
             "# {{lien|autrice|fr|dif=Autrice}}, [[celle]] qui est à l’[[origine]] de [[quelque chose]]."
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_gloss(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].senses],
             [
                 {
-                    "senses": [
-                        {
-                            "glosses": [
-                                "Autrice, celle qui est à l’origine de quelque chose."
-                            ]
-                        }
+                    "glosses": [
+                        "Autrice, celle qui est à l’origine de quelque chose."
                     ]
                 }
             ],
@@ -268,16 +250,17 @@ def test_nest_gloss(self):
 ##* nest example
             """
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_gloss(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data[-1]["senses"],
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].senses],
             [
                 {
                     "examples": [
                         {
                             "text": "example 1",
-                            "type": "example",
                         }
                     ],
                     "glosses": [
@@ -288,7 +271,6 @@ def test_nest_gloss(self):
                     "examples": [
                         {
                             "text": "nest example",
-                            "type": "example",
                         }
                     ],
                     "glosses": [
diff --git a/tests/test_fr_inflection.py b/tests/test_fr_inflection.py
index 3a1ed2bc7..20e66eb8a 100644
--- a/tests/test_fr_inflection.py
+++ b/tests/test_fr_inflection.py
@@ -1,15 +1,15 @@
-import unittest
-from collections import defaultdict
+from unittest import TestCase
 from unittest.mock import patch
 
 from wikitextprocessor import Wtp
 from wikitextprocessor.parser import TemplateNode
 from wiktextract.config import WiktionaryConfig
 from wiktextract.extractor.fr.inflection import extract_inflection
+from wiktextract.extractor.fr.models import WordEntry
 from wiktextract.wxr_context import WiktextractContext
 
 
-class TestInflection(unittest.TestCase):
+class TestInflection(TestCase):
     def setUp(self) -> None:
         self.wxr = WiktextractContext(
             Wtp(lang_code="fr"), WiktionaryConfig(dump_file_lang_code="fr")
@@ -32,12 +32,14 @@ def tearDown(self) -> None:
         """,
     )
     def test_fr_reg(self, mock_node_to_wikitext):
-        page_data = [defaultdict(list, {"word": "productrice"})]
+        page_data = [
+            WordEntry(word="productrice", lang_code="fr", lang_name="Français")
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("productrice")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [{"form": "productrices", "tags": ["Pluriel"]}],
         )
 
@@ -59,25 +61,27 @@ def test_fr_reg(self, mock_node_to_wikitext):
     )
     def test_fr_accord_al(self, mock_node_to_wikitext):
         # https://fr.wiktionary.org/wiki/animal#Adjectif
-        page_data = [defaultdict(list, {"word": "animal", "lang_code": "fr"})]
+        page_data = [
+            WordEntry(word="animal", lang_code="fr", lang_name="Français")
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("animal")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [
                 {
-                    "ipa": "\\a.ni.mo\\",
+                    "ipas": ["\\a.ni.mo\\"],
                     "tags": ["Pluriel", "Masculin"],
                     "form": "animaux",
                 },
                 {
-                    "ipa": "\\a.ni.mal\\",
+                    "ipas": ["\\a.ni.mal\\"],
                     "tags": ["Singulier", "Féminin"],
                     "form": "animale",
                 },
                 {
-                    "ipa": "\\a.ni.mal\\",
+                    "ipas": ["\\a.ni.mal\\"],
                     "tags": ["Pluriel", "Féminin"],
                     "form": "animales",
                 },
@@ -96,12 +100,14 @@ def test_fr_accord_al(self, mock_node_to_wikitext):
     def test_multiple_lines_ipa(self, mock_node_to_wikitext):
         # https://fr.wiktionary.org/wiki/ration#Nom_commun_2
         # template "en-nom-rég"
-        page_data = [defaultdict(list, {"lang_code": "en", "word": "ration"})]
+        page_data = [
+            WordEntry(word="ration", lang_code="en", lang_name="Anglais")
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("ration")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [
                 {
                     "ipas": ["\\ˈɹæʃ.ənz\\", "\\ˈɹeɪʃ.ənz\\"],
@@ -124,12 +130,14 @@ def test_multiple_lines_ipa(self, mock_node_to_wikitext):
     def test_single_line_multiple_ipa(self, mock_node_to_wikitext):
         # https://fr.wiktionary.org/wiki/ration#Verbe
         # template "en-conj-rég"
-        page_data = [defaultdict(list, {"lang_code": "en", "word": "ration"})]
+        page_data = [
+            WordEntry(word="ration", lang_code="en", lang_name="Anglais")
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("ration")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [
                 {
                     "ipas": ["\\ˈɹæʃ.ən\\", "\\ˈɹeɪʃ.ən\\"],
@@ -152,12 +160,14 @@ def test_single_line_multiple_ipa(self, mock_node_to_wikitext):
     def test_invalid_ipa(self, mock_node_to_wikitext):
         # https://fr.wiktionary.org/wiki/animal#Nom_commun_3
         # template "ast-accord-mf"
-        page_data = [defaultdict(list, {"lang_code": "en", "word": "animal"})]
+        page_data = [
+            WordEntry(word="animal", lang_code="en", lang_name="Français")
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("animal")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [{"tags": ["Pluriel"], "form": "animales"}],
         )
 
@@ -175,12 +185,12 @@ def test_invalid_ipa(self, mock_node_to_wikitext):
     def test_no_column_headers(self, mock_node_to_wikitext):
         # https://fr.wiktionary.org/wiki/一万#Nom_commun
         # template "zh-formes"
-        page_data = [defaultdict(list, {"lang_code": "zh", "word": "一万"})]
+        page_data = [WordEntry(word="一万", lang_code="zh", lang_name="Chinois")]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("一万")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [{"tags": ["Traditionnel"], "form": "一萬"}],
         )
 
@@ -198,12 +208,14 @@ def test_no_column_headers(self, mock_node_to_wikitext):
     )
     def test_lt_décl_as(self, mock_node_to_wikitext):
         # empty table cells should be ignored
-        page_data = [defaultdict(list, {"lang_code": "lt", "word": "abadai"})]
+        page_data = [
+            WordEntry(word="abadai", lang_code="lt", lang_name="Lituanien")
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("abadai")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [{"tags": ["Singulier", "Nominatif"], "form": "abadas"}],
         )
 
@@ -229,23 +241,24 @@ def test_lt_décl_as(self, mock_node_to_wikitext):
 |}""",
     )
     def test_fr_accord_s(self, mock_node_to_wikitext):
-        # https://fr.wiktionary.org/wiki/
-        page_data = [defaultdict(list, {"lang_code": "fr", "word": "aastais"})]
+        page_data = [
+            WordEntry(word="aastais", lang_code="fr", lang_name="Français")
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("aastais")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [
                 {
                     "tags": ["Singulier", "Féminin"],
                     "form": "aastaise",
-                    "ipa": "\\a.a.stɛz\\",
+                    "ipas": ["\\a.a.stɛz\\"],
                 },
                 {
                     "tags": ["Pluriel", "Féminin"],
                     "form": "aastaises",
-                    "ipa": "\\a.a.stɛz\\",
+                    "ipas": ["\\a.a.stɛz\\"],
                 },
             ],
         )
@@ -268,31 +281,35 @@ def test_fr_accord_s(self, mock_node_to_wikitext):
     )
     def test_fr_accord_personne(self, mock_node_to_wikitext):
         # https://fr.wiktionary.org/wiki/enculé_de_ta_race
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(
+                word="enculé de ta race", lang_code="fr", lang_name="Français"
+            )
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("enculé de ta race")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [
                 {
                     "form": "enculé de ma race",
-                    "ipa": "\\ɑ̃.ky.ˌle.də.ma.ˈʁas\\",
+                    "ipas": ["\\ɑ̃.ky.ˌle.də.ma.ˈʁas\\"],
                     "tags": ["Singulier", "1ᵉ personne", "Masculin"],
                 },
                 {
                     "form": "enculés de notre race",
-                    "ipa": "\\ɑ̃.ky.ˌle.də.nɔ.tʁə.ˈʁas\\",
+                    "ipas": ["\\ɑ̃.ky.ˌle.də.nɔ.tʁə.ˈʁas\\"],
                     "tags": ["Pluriel", "1ᵉ personne", "Masculin"],
                 },
                 {
                     "form": "enculée de ma race",
-                    "ipa": "\\ɑ̃.ky.ˌle.də.ma.ˈʁas\\",
+                    "ipas": ["\\ɑ̃.ky.ˌle.də.ma.ˈʁas\\"],
                     "tags": ["Singulier", "1ᵉ personne", "Féminin"],
                 },
                 {
                     "form": "enculées de notre race",
-                    "ipa": "\\ɑ̃.ky.ˌle.də.ma.ˈʁas\\",
+                    "ipas": ["\\ɑ̃.ky.ˌle.də.ma.ˈʁas\\"],
                     "tags": ["Pluriel", "1ᵉ personne", "Féminin"],
                 },
             ],
@@ -320,12 +337,14 @@ def test_fr_accord_personne(self, mock_node_to_wikitext):
     )
     def test_ro_nom_tab(self, mock_node_to_wikitext):
         # https://fr.wiktionary.org/wiki/fenil#Nom_commun_4
-        page_data = [defaultdict(list, {"word": "fenil"})]
+        page_data = [
+            WordEntry(word="fenil", lang_code="fr", lang_name="Français")
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("fenil")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [
                 {
                     "form": "fenilul",
@@ -364,12 +383,14 @@ def test_ro_nom_tab(self, mock_node_to_wikitext):
     )
     def test_sv_nom_c_ar(self, mock_node_to_wikitext):
         # https://fr.wiktionary.org/wiki/robot#Nom_commun_7
-        page_data = [defaultdict(list, {"word": "robot"})]
+        page_data = [
+            WordEntry(word="robot", lang_code="fr", lang_name="Français")
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("robot")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [
                 {"form": "roboten", "tags": ["Défini", "Singulier"]},
                 {"form": "robotar", "tags": ["Indéfini", "Pluriel"]},
@@ -392,12 +413,14 @@ def test_sv_nom_c_ar(self, mock_node_to_wikitext):
     )
     def test_cs_decl_nom_ma_dur(self, mock_node_to_wikitext):
         # https://fr.wiktionary.org/wiki/robot#Nom_commun_1_2
-        page_data = [defaultdict(list, {"word": "robot"})]
+        page_data = [
+            WordEntry(word="robot", lang_code="fr", lang_name="Français")
+        ]
         node = TemplateNode(0)
         self.wxr.wtp.start_page("robot")
         extract_inflection(self.wxr, page_data, node)
         self.assertEqual(
-            page_data[-1].get("forms"),
+            [d.model_dump(exclude_defaults=True) for d in page_data[-1].forms],
             [
                 {"form": "roboti", "tags": ["Pluriel", "Nominatif"]},
                 {"form": "robotové", "tags": ["Pluriel", "Nominatif"]},
diff --git a/tests/test_fr_linkage.py b/tests/test_fr_linkage.py
index 498faf3ff..da2aadb2e 100644
--- a/tests/test_fr_linkage.py
+++ b/tests/test_fr_linkage.py
@@ -1,13 +1,13 @@
-import unittest
-from collections import defaultdict
+from unittest import TestCase
 
 from wikitextprocessor import Wtp
 from wiktextract.config import WiktionaryConfig
 from wiktextract.extractor.fr.linkage import extract_linkage
+from wiktextract.extractor.fr.models import WordEntry
 from wiktextract.wxr_context import WiktextractContext
 
 
-class TestLinkage(unittest.TestCase):
+class TestLinkage(TestCase):
     def setUp(self) -> None:
         self.wxr = WiktextractContext(
             Wtp(lang_code="fr"), WiktionaryConfig(dump_file_lang_code="fr")
@@ -17,7 +17,9 @@ def tearDown(self) -> None:
         self.wxr.wtp.close_db_conn()
 
     def test_tags(self):
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         self.wxr.wtp.start_page("bonjour")
         self.wxr.wtp.add_page("Modèle:Canada", 10, body="(Canada)")
         self.wxr.wtp.add_page("Modèle:Louisiane", 10, body="(Louisiane)")
@@ -26,41 +28,33 @@ def test_tags(self):
         )
         extract_linkage(self.wxr, page_data, root, "synonymes")
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "synonyms": [
-                        {"word": "bon matin", "tags": ["Canada", "Louisiane"]}
-                    ]
-                }
-            ],
+            page_data[-1].synonyms[0].model_dump(exclude_defaults=True),
+            {"word": "bon matin", "tags": ["Canada", "Louisiane"]},
         )
 
     def test_zh_synonyms(self):
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         self.wxr.wtp.start_page("你好")
         root = self.wxr.wtp.parse(
             "* {{zh-lien|你们好|nǐmen hǎo|你們好}} — Bonjour (au pluriel)."
         )
         extract_linkage(self.wxr, page_data, root, "synonymes")
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "synonyms": [
-                        {
-                            "word": "你们好",
-                            "roman": "nǐmen hǎo",
-                            "alt": "你們好",
-                            "translation": "Bonjour (au pluriel).",
-                        }
-                    ]
-                }
-            ],
+            page_data[-1].synonyms[0].model_dump(exclude_defaults=True),
+            {
+                "word": "你们好",
+                "roman": "nǐmen hǎo",
+                "alt": "你們好",
+                "translation": "Bonjour (au pluriel).",
+            },
         )
 
     def test_template_as_partial_tag(self):
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         self.wxr.wtp.start_page("bonjour")
         self.wxr.wtp.add_page("Modèle:lien", 10, body="kwei")
         self.wxr.wtp.add_page("Modèle:Canada", 10, body="(Canada)")
@@ -70,37 +64,34 @@ def test_template_as_partial_tag(self):
         )
         extract_linkage(self.wxr, page_data, root, "synonymes")
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "synonyms": [
-                        {"word": "kwei", "tags": ["Canada", "mot Atikamekw"]}
-                    ]
-                }
-            ],
+            page_data[-1].synonyms[0].model_dump(exclude_defaults=True),
+            {"word": "kwei", "tags": ["Canada", "mot Atikamekw"]},
         )
 
     def test_list_item_has_two_words(self):
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         self.wxr.wtp.start_page("masse")
         root = self.wxr.wtp.parse(
             "* [[être à la masse]], [[mettre à la masse]]"
         )
         extract_linkage(self.wxr, page_data, root, "dérivés")
         self.assertEqual(
-            page_data,
             [
-                {
-                    "derived": [
-                        {"word": "être à la masse"},
-                        {"word": "mettre à la masse"},
-                    ]
-                }
+                d.model_dump(exclude_defaults=True)
+                for d in page_data[-1].derived
+            ],
+            [
+                {"word": "être à la masse"},
+                {"word": "mettre à la masse"},
             ],
         )
 
     def test_sub_list(self):
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         self.wxr.wtp.start_page("lézard ocellé")
         root = self.wxr.wtp.parse(
             """* [[saurien]]s (Sauria)
@@ -109,25 +100,26 @@ def test_sub_list(self):
         )
         extract_linkage(self.wxr, page_data, root, "hyper")
         self.assertEqual(
-            page_data,
             [
+                d.model_dump(exclude_defaults=True)
+                for d in page_data[-1].hypernyms
+            ],
+            [
+                {"tags": ["Sauria"], "word": "sauriens"},
                 {
-                    "hypernyms": [
-                        {"tags": ["Sauria"], "word": "sauriens"},
-                        {
-                            "tags": [
-                                "Lacertidae",
-                                "famille des lézards typiques",
-                            ],
-                            "word": "lacertidés",
-                        },
-                    ]
-                }
+                    "tags": [
+                        "Lacertidae",
+                        "famille des lézards typiques",
+                    ],
+                    "word": "lacertidés",
+                },
             ],
         )
 
     def test_sense(self):
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         self.wxr.wtp.start_page("autrice")
         root = self.wxr.wtp.parse(
             """{{(|Celle qui est à l’origine de quelque chose|1}}
@@ -136,17 +128,16 @@ def test_sense(self):
         )
         extract_linkage(self.wxr, page_data, root, "synonymes")
         self.assertEqual(
-            page_data,
+            [
+                d.model_dump(exclude_defaults=True)
+                for d in page_data[-1].synonyms
+            ],
             [
                 {
-                    "synonyms": [
-                        {
-                            "word": "artisane",
-                            "sense": "Celle qui est à l’origine de quelque chose",
-                            "sense_index": 1,
-                        },
-                    ]
-                }
+                    "word": "artisane",
+                    "sense": "Celle qui est à l’origine de quelque chose",
+                    "sense_index": 1,
+                },
             ],
         )
 
@@ -154,45 +145,47 @@ def test_derives_autres_langues_section(self):
         # https://fr.wiktionary.org/wiki/eau#Dérivés_dans_d’autres_langues
         self.wxr.wtp.add_page("Modèle:lien", 10, body="{{{1}}}")
         self.wxr.wtp.add_page("Modèle:L", 10, body="Karipúna")
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         self.wxr.wtp.start_page("eau")
         root = self.wxr.wtp.parse(
             "* {{L|kmv}} : {{lien|dlo|kmv}}, {{lien|djilo|kmv}}"
         )
         extract_linkage(self.wxr, page_data, root, "dérivés autres langues")
         self.assertEqual(
-            page_data,
             [
+                d.model_dump(exclude_defaults=True)
+                for d in page_data[-1].derived
+            ],
+            [
+                {
+                    "word": "dlo",
+                    "lang_code": "kmv",
+                    "lang_name": "Karipúna",
+                },
                 {
-                    "derived": [
-                        {
-                            "word": "dlo",
-                            "lang_code": "kmv",
-                            "lang_name": "Karipúna",
-                        },
-                        {
-                            "word": "djilo",
-                            "lang_code": "kmv",
-                            "lang_name": "Karipúna",
-                        },
-                    ]
-                }
+                    "word": "djilo",
+                    "lang_code": "kmv",
+                    "lang_name": "Karipúna",
+                },
             ],
         )
 
     def test_words_divided_by_slash(self):
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         self.wxr.wtp.start_page("eau")
         root = self.wxr.wtp.parse("* [[benoîte d’eau]] / [[benoite d’eau]]")
         extract_linkage(self.wxr, page_data, root, "dérivés")
         self.assertEqual(
-            page_data,
             [
-                {
-                    "derived": [
-                        {"word": "benoîte d’eau"},
-                        {"word": "benoite d’eau"},
-                    ]
-                }
+                d.model_dump(exclude_defaults=True)
+                for d in page_data[-1].derived
+            ],
+            [
+                {"word": "benoîte d’eau"},
+                {"word": "benoite d’eau"},
             ],
         )
diff --git a/tests/test_fr_note.py b/tests/test_fr_note.py
index 9d08d206a..46105e1e0 100644
--- a/tests/test_fr_note.py
+++ b/tests/test_fr_note.py
@@ -1,13 +1,13 @@
-import unittest
-from collections import defaultdict
+from unittest import TestCase
 
 from wikitextprocessor import Wtp
 from wiktextract.config import WiktionaryConfig
+from wiktextract.extractor.fr.models import WordEntry
 from wiktextract.extractor.fr.note import extract_note
 from wiktextract.wxr_context import WiktextractContext
 
 
-class TestNotes(unittest.TestCase):
+class TestNotes(TestCase):
     def setUp(self) -> None:
         self.wxr = WiktextractContext(
             Wtp(lang_code="fr"), WiktionaryConfig(dump_file_lang_code="fr")
@@ -28,8 +28,11 @@ def test_list_notes(self):
 paragrapy 1
 {{note-féminisation}}"""
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="test", lang_code="fr", lang_name="Français")
+        ]
         extract_note(self.wxr, page_data, nodes.children[0])
         self.assertEqual(
-            page_data, [{"notes": ["paragrapy 1", "list 1", "list 2"]}]
+            page_data[-1].notes,
+            ["paragrapy 1", "list 1", "list 2"],
         )
diff --git a/tests/test_fr_page.py b/tests/test_fr_page.py
index f1c53b98d..d94e6d11a 100644
--- a/tests/test_fr_page.py
+++ b/tests/test_fr_page.py
@@ -2,7 +2,7 @@
 #
 # Copyright (c) 2021 Tatu Ylonen.  See file LICENSE and https://ylonen.org
 
-import unittest
+from unittest import TestCase
 
 from wikitextprocessor import Wtp
 from wiktextract.config import WiktionaryConfig
@@ -10,7 +10,7 @@
 from wiktextract.wxr_context import WiktextractContext
 
 
-class FrPageTests(unittest.TestCase):
+class TestFrPage(TestCase):
     def setUp(self):
         self.maxDiff = None
         conf1 = WiktionaryConfig(
@@ -25,7 +25,7 @@ def tearDown(self) -> None:
     def test_fr_parse_page(self):
         self.wxr.wtp.add_page("Modèle:langue", 10, "Français")
         self.wxr.wtp.add_page("Modèle:S", 10, "Nom commun")
-        lst = parse_page(
+        page_data = parse_page(
             self.wxr,
             "exemple",
             """
@@ -35,7 +35,7 @@ def test_fr_parse_page(self):
 """,
         )
         self.assertEqual(
-            lst,
+            page_data,
             [
                 {
                     "lang_name": "Français",
diff --git a/tests/test_fr_pronunciation.py b/tests/test_fr_pronunciation.py
index 3473a2b73..ff9063748 100644
--- a/tests/test_fr_pronunciation.py
+++ b/tests/test_fr_pronunciation.py
@@ -1,13 +1,15 @@
-import unittest
-from collections import defaultdict
+from unittest import TestCase
 
 from wikitextprocessor import Wtp
 from wiktextract.config import WiktionaryConfig
+from wiktextract.extractor.fr.models import WordEntry
 from wiktextract.extractor.fr.pronunciation import extract_pronunciation
 from wiktextract.wxr_context import WiktextractContext
 
 
-class TestPronunciation(unittest.TestCase):
+class TestPronunciation(TestCase):
+    maxDiff = None
+
     def setUp(self) -> None:
         self.wxr = WiktextractContext(
             Wtp(lang_code="fr"), WiktionaryConfig(dump_file_lang_code="fr")
@@ -18,22 +20,29 @@ def tearDown(self) -> None:
 
     def test_pron_list(self):
         page_data = [
-            defaultdict(list, {"lang_code": "en"}),
-            defaultdict(list, {"lang_code": "fr"}),
-            defaultdict(list, {"lang_code": "fr"}),
+            WordEntry(word="bonjour", lang_code="en", lang_name="Anglais"),
+            WordEntry(word="bonjour", lang_code="fr", lang_name="Français"),
+            WordEntry(word="bonjour", lang_code="fr", lang_name="Français"),
         ]
         self.wxr.wtp.add_page("Modèle:pron", 10, body="\\bɔ̃.ʒuʁ\\")
-        self.wxr.wtp.start_page("")
+        self.wxr.wtp.start_page("bonjour")
         root = self.wxr.wtp.parse(
             "=== Prononciation ===\n* {{pron|bɔ̃.ʒuʁ|fr}}\n** {{écouter|France (Paris)|bõ.ʒuːʁ|audio=Fr-bonjour.ogg|lang=fr}}"
         )
-        extract_pronunciation(self.wxr, page_data, root.children[0], {})
-        self.assertEqual(
+        extract_pronunciation(
+            self.wxr,
             page_data,
+            root.children[0],
+            WordEntry(word="bonjour", lang_code="fr", lang_name="Français"),
+        )
+        self.assertEqual(
+            [d.model_dump(exclude_defaults=True) for d in page_data],
             [
-                {"lang_code": "en"},
+                {"word": "bonjour", "lang_code": "en", "lang_name": "Anglais"},
                 {
+                    "word": "bonjour",
                     "lang_code": "fr",
+                    "lang_name": "Français",
                     "sounds": [
                         {
                             "ipa": "bõ.ʒuːʁ",
@@ -45,7 +54,9 @@ def test_pron_list(self):
                     ],
                 },
                 {
+                    "word": "bonjour",
                     "lang_code": "fr",
+                    "lang_name": "Français",
                     "sounds": [
                         {
                             "ipa": "bõ.ʒuːʁ",
@@ -62,7 +73,7 @@ def test_pron_list(self):
     def test_str_pron(self):
         page_data = []
         self.wxr.wtp.add_page("Modèle:Yale-zh", 10, body="Yale")
-        self.wxr.wtp.start_page("")
+        self.wxr.wtp.start_page("你好")
         root = self.wxr.wtp.parse(
             "=== {{S|prononciation}} ===\n* '''cantonais''' {{pron||yue}}\n** {{Yale-zh}} : nei⁵hou²"
         )
@@ -70,11 +81,14 @@ def test_str_pron(self):
             self.wxr,
             page_data,
             root.children[0],
-            defaultdict(list, {"lang_code": "zh"}),
+            WordEntry(word="你好", lang_code="zh", lang_name="Chinois"),
         )
         self.assertEqual(
-            page_data[0].get("sounds"),
-            [{"tags": ["cantonais", "Yale"], "zh-pron": "nei⁵hou²"}],
+            [
+                sound.model_dump(exclude_defaults=True)
+                for sound in page_data[-1].sounds
+            ],
+            [{"tags": ["cantonais", "Yale"], "zh_pron": "nei⁵hou²"}],
         )
 
     def test_no_ipa(self):
@@ -84,24 +98,25 @@ def test_no_ipa(self):
         Test wikitext from https://fr.wiktionary.org/wiki/mars
         """
         page_data = []
-        self.wxr.wtp.start_page("")
+        self.wxr.wtp.start_page("mars")
         root = self.wxr.wtp.parse(
             """=== {{S|prononciation}} ===
 {{ébauche-pron|sv}}
 * {{écouter|lang=sv|Suède||audio=LL-Q9027 (swe)-Moonhouse-mars.wav}}"""
         )
         extract_pronunciation(
-            self.wxr, page_data, root.children[0], defaultdict(list)
+            self.wxr,
+            page_data,
+            root.children[0],
+            WordEntry(word="你好", lang_code="fr", lang_name="Français"),
         )
         self.assertEqual(
-            page_data[0].get("sounds"),
-            [
-                {
-                    "tags": ["Suède"],
-                    "audio": "LL-Q9027 (swe)-Moonhouse-mars.wav",
-                    "wav_url": "https://commons.wikimedia.org/wiki/Special:FilePath/LL-Q9027 (swe)-Moonhouse-mars.wav",
-                    "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/3/3f/LL-Q9027_(swe)-Moonhouse-mars.wav/LL-Q9027_(swe)-Moonhouse-mars.wav.ogg",
-                    "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/3/3f/LL-Q9027_(swe)-Moonhouse-mars.wav/LL-Q9027_(swe)-Moonhouse-mars.wav.mp3",
-                }
-            ],
+            page_data[-1].sounds[0].model_dump(exclude_defaults=True),
+            {
+                "tags": ["Suède"],
+                "audio": "LL-Q9027 (swe)-Moonhouse-mars.wav",
+                "wav_url": "https://commons.wikimedia.org/wiki/Special:FilePath/LL-Q9027 (swe)-Moonhouse-mars.wav",
+                "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/3/3f/LL-Q9027_(swe)-Moonhouse-mars.wav/LL-Q9027_(swe)-Moonhouse-mars.wav.ogg",
+                "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/3/3f/LL-Q9027_(swe)-Moonhouse-mars.wav/LL-Q9027_(swe)-Moonhouse-mars.wav.mp3",
+            },
         )
diff --git a/tests/test_fr_translation.py b/tests/test_fr_translation.py
index bfc9f0aac..6feaf6c6f 100644
--- a/tests/test_fr_translation.py
+++ b/tests/test_fr_translation.py
@@ -1,13 +1,13 @@
-import unittest
-from collections import defaultdict
+from unittest import TestCase
 
 from wikitextprocessor import Wtp
 from wiktextract.config import WiktionaryConfig
+from wiktextract.extractor.fr.models import WordEntry
 from wiktextract.extractor.fr.translation import extract_translation
 from wiktextract.wxr_context import WiktextractContext
 
 
-class TestTranslation(unittest.TestCase):
+class TestTranslation(TestCase):
     def setUp(self) -> None:
         self.wxr = WiktextractContext(
             Wtp(lang_code="fr"), WiktionaryConfig(dump_file_lang_code="fr")
@@ -17,111 +17,125 @@ def tearDown(self) -> None:
         self.wxr.wtp.close_db_conn()
 
     def test_italic_tag(self):
-        self.wxr.wtp.start_page("")
+        # https://fr.wiktionary.org/wiki/bonjour
+        self.wxr.wtp.start_page("bonjour")
         self.wxr.wtp.add_page("Modèle:T", 10, body="Albanais")
         root = self.wxr.wtp.parse(
             "=== Traductions ===\n* {{trad-début|Formule pour saluer}}\n* {{T|sq}} : {{trad+|sq|mirëdita}}, {{trad-|sq|mirë mëngjes}} ''(le matin)''"
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="bonjour", lang_code="fr", lang_name="Français")
+        ]
         extract_translation(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "translations": [
-                        {
-                            "lang_code": "sq",
-                            "lang_name": "Albanais",
-                            "word": "mirëdita",
-                            "sense": "Formule pour saluer",
-                        },
-                        {
-                            "lang_code": "sq",
-                            "lang_name": "Albanais",
-                            "word": "mirë mëngjes",
-                            "sense": "Formule pour saluer",
-                            "tags": ["le matin"],
-                        },
-                    ]
-                }
-            ],
+            page_data[-1].model_dump(exclude_defaults=True),
+            {
+                "word": "bonjour",
+                "lang_code": "fr",
+                "lang_name": "Français",
+                "translations": [
+                    {
+                        "lang_code": "sq",
+                        "lang_name": "Albanais",
+                        "word": "mirëdita",
+                        "sense": "Formule pour saluer",
+                    },
+                    {
+                        "lang_code": "sq",
+                        "lang_name": "Albanais",
+                        "word": "mirë mëngjes",
+                        "sense": "Formule pour saluer",
+                        "tags": ["le matin"],
+                    },
+                ],
+            },
         )
 
     def test_template_tag(self):
-        self.wxr.wtp.start_page("")
+        self.wxr.wtp.start_page("bonjour")
         self.wxr.wtp.add_page("Modèle:T", 10, body="Arabe")
         self.wxr.wtp.add_page("Modèle:transliterator", 10, body="mrḥbā")
         self.wxr.wtp.add_page("Modèle:informel", 10, body="(Informel)")
         root = self.wxr.wtp.parse(
             "=== Traductions ===\n* {{T|ar}} : {{trad+|ar|مرحبا|dif=مرحبًا|tr={{transliterator|ar|مرحبا}}}} {{informel|nocat=1}}"
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="bonjour", lang_code="fr", lang_name="Français")
+        ]
         extract_translation(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "translations": [
-                        {
-                            "lang_code": "ar",
-                            "lang_name": "Arabe",
-                            "word": "مرحبًا",
-                            "roman": "mrḥbā",
-                            "tags": ["Informel"],
-                        },
-                    ]
-                }
-            ],
+            page_data[-1].model_dump(exclude_defaults=True),
+            {
+                "word": "bonjour",
+                "lang_code": "fr",
+                "lang_name": "Français",
+                "translations": [
+                    {
+                        "lang_code": "ar",
+                        "lang_name": "Arabe",
+                        "word": "مرحبًا",
+                        "roman": "mrḥbā",
+                        "tags": ["Informel"],
+                    },
+                ],
+            },
         )
 
     def test_traditional_writing(self):
-        self.wxr.wtp.start_page("")
+        self.wxr.wtp.start_page("bonjour")
         self.wxr.wtp.add_page("Modèle:T", 10, body="Mongol")
         root = self.wxr.wtp.parse(
             "=== Traductions ===\n* {{T|mn}} : {{trad+|mn|сайн байна уу|tr=sain baina uu|tradi=ᠰᠠᠶᠢᠨ ᠪᠠᠶᠢᠨ᠎ᠠ ᠤᠤ}}"
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="bonjour", lang_code="fr", lang_name="Français")
+        ]
         extract_translation(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "translations": [
-                        {
-                            "lang_code": "mn",
-                            "lang_name": "Mongol",
-                            "word": "сайн байна уу",
-                            "roman": "sain baina uu",
-                            "traditional_writing": "ᠰᠠᠶᠢᠨ ᠪᠠᠶᠢᠨ᠎ᠠ ᠤᠤ",
-                        },
-                    ]
-                }
-            ],
+            page_data[-1].model_dump(exclude_defaults=True),
+            {
+                "word": "bonjour",
+                "lang_code": "fr",
+                "lang_name": "Français",
+                "translations": [
+                    {
+                        "lang_code": "mn",
+                        "lang_name": "Mongol",
+                        "word": "сайн байна уу",
+                        "roman": "sain baina uu",
+                        "traditional_writing": "ᠰᠠᠶᠢᠨ ᠪᠠᠶᠢᠨ᠎ᠠ ᠤᠤ",
+                    },
+                ],
+            },
         )
 
     def test_trad_template_gender_parameter(self):
-        self.wxr.wtp.start_page("")
+        # https://fr.wiktionary.org/wiki/cambium
+        self.wxr.wtp.start_page("cambium")
         self.wxr.wtp.add_page("Modèle:T", 10, body="Allemand")
         self.wxr.wtp.add_page("Modèle:trad", 10, body="''neutre''")
         root = self.wxr.wtp.parse(
             "=== Traductions ===\n* {{T|de}} : {{trad|de|Kambium|n}}"
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="cambium", lang_code="fr", lang_name="Français")
+        ]
         extract_translation(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "translations": [
-                        {
-                            "lang_code": "de",
-                            "lang_name": "Allemand",
-                            "word": "Kambium",
-                            "tags": ["neutre"],
-                        },
-                    ]
-                }
-            ],
+            page_data[-1].model_dump(exclude_defaults=True),
+            {
+                "word": "cambium",
+                "lang_code": "fr",
+                "lang_name": "Français",
+                "translations": [
+                    {
+                        "lang_code": "de",
+                        "lang_name": "Allemand",
+                        "word": "Kambium",
+                        "tags": ["neutre"],
+                    },
+                ],
+            },
         )
 
     def test_template_sense_parameter(self):
@@ -134,20 +148,23 @@ def test_template_sense_parameter(self):
 {{trad-début|{{info lex|finance}}|12}}
 * {{T|hr}} : {{trad+|hr|masa}}"""
         )
-        page_data = [defaultdict(list)]
+        page_data = [
+            WordEntry(word="masse", lang_code="fr", lang_name="Français")
+        ]
         extract_translation(self.wxr, page_data, root.children[0])
         self.assertEqual(
-            page_data,
-            [
-                {
-                    "translations": [
-                        {
-                            "lang_code": "hr",
-                            "lang_name": "Croate",
-                            "word": "masa",
-                            "sense": "(Finance)",
-                        },
-                    ]
-                }
-            ],
+            page_data[-1].model_dump(exclude_defaults=True),
+            {
+                "word": "masse",
+                "lang_code": "fr",
+                "lang_name": "Français",
+                "translations": [
+                    {
+                        "lang_code": "hr",
+                        "lang_name": "Croate",
+                        "word": "masa",
+                        "sense": "(Finance)",
+                    },
+                ],
+            },
         )