diff --git a/src/wiktextract/extractor/nl/inflection.py b/src/wiktextract/extractor/nl/inflection.py
index e1ce78a0f..01d8d091e 100644
--- a/src/wiktextract/extractor/nl/inflection.py
+++ b/src/wiktextract/extractor/nl/inflection.py
@@ -22,6 +22,7 @@
"-denoun1-",
"-nlstam-",
"-csadjc-comp-",
+ "-dumstam-",
]
)
@@ -41,6 +42,8 @@ def extract_inflection_template(
extract_nlstam_template(wxr, word_entry, t_node)
elif t_node.template_name.startswith("-csadjc-comp-"):
extract_csadjc_comp_template(wxr, word_entry, t_node)
+ elif t_node.template_name == "-dumstam-":
+ extract_dumstam_template(wxr, word_entry, t_node)
def extract_noun_adj_table(
@@ -122,9 +125,19 @@ def extract_vervoeging_page(
if t_node.template_name in table_templates:
extract_nlverb_template(wxr, word_entry, t_node, "")
sense = ""
- for level_node in root.find_child_recursively(LEVEL_KIND_FLAGS):
- sense = clean_node(wxr, None, level_node.largs)
- for t_node in level_node.find_child(NodeKind.TEMPLATE):
+ for lang_level_node in root.find_child(NodeKind.LEVEL2):
+ lang_name = clean_node(wxr, None, lang_level_node.largs)
+ if lang_name != word_entry.lang:
+ continue
+ for sense_level_node in lang_level_node.find_child_recursively(
+ LEVEL_KIND_FLAGS
+ ):
+ sense = clean_node(wxr, None, sense_level_node.largs)
+ for t_node in sense_level_node.find_child(NodeKind.TEMPLATE):
+ if t_node.template_name in table_templates:
+ extract_nlverb_template(wxr, word_entry, t_node, sense)
+ # only have language level node
+ for t_node in lang_level_node.find_child(NodeKind.TEMPLATE):
if t_node.template_name in table_templates:
extract_nlverb_template(wxr, word_entry, t_node, sense)
@@ -333,3 +346,23 @@ def extract_csadjc_comp_template(
form.raw_tags.append(row_header)
translate_raw_tags(form)
word_entry.forms.append(form)
+
+
+def extract_dumstam_template(
+ wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
+) -> None:
+ # https://nl.wiktionary.org/wiki/Sjabloon:-dumstam-
+ tags = [
+ ["infinitive"],
+ ["past", "singular"],
+ ["past", "plural"],
+ ["past", "participle"],
+ ]
+ for arg_name in range(1, 5):
+ word = clean_node(
+ wxr, None, t_node.template_parameters.get(arg_name, "")
+ )
+ if word not in ["", word_entry.word]:
+ form = Form(form=word, tags=tags[arg_name - 1])
+ word_entry.forms.append(form)
+ clean_node(wxr, word_entry, t_node)
diff --git a/src/wiktextract/extractor/nl/pos.py b/src/wiktextract/extractor/nl/pos.py
index c43885a4a..1d9e886a1 100644
--- a/src/wiktextract/extractor/nl/pos.py
+++ b/src/wiktextract/extractor/nl/pos.py
@@ -260,6 +260,37 @@ def extract_l_template(
}
+def extract_oudeschrijfwijze_template_g_arg(
+ wxr: WiktextractContext, g_arg: str, sense: Sense
+) -> bool:
+ for tags_dict in [
+ NOUN_FORM_OF_TEMPLATE_GENDER_TAGS,
+ NOUN_FORM_OF_TEMPLATE_NUM_TAGS,
+ ]:
+ if g_arg in tags_dict:
+ tag = tags_dict[g_arg]
+ if isinstance(tag, str):
+ sense.tags.append(tag)
+ elif isinstance(tag, list):
+ sense.tags.extend(tag)
+ return True
+ return False
+
+
+def extract_oudeschrijfwijze_template(
+ wxr: WiktextractContext, t_node: TemplateNode, sense: Sense
+) -> None:
+ g_arg_str = clean_node(wxr, None, t_node.template_parameters.get("g", ""))
+ if not extract_oudeschrijfwijze_template_g_arg(wxr, g_arg_str, sense):
+ g_args = t_node.template_parameters.get("g", "")
+ if isinstance(g_args, list):
+ for g_arg in g_args:
+ if isinstance(g_arg, TemplateNode):
+ extract_oudeschrijfwijze_template_g_arg(
+ wxr, g_arg.template_name, sense
+ )
+
+
def extract_noun_form_of_template(
wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
) -> None:
@@ -268,11 +299,15 @@ def extract_noun_form_of_template(
if t_node.template_name.endswith("-pl"):
sense.tags.append("plural")
else:
- num_arg = t_node.template_parameters.get("getal", "")
+ num_arg = clean_node(
+ wxr, None, t_node.template_parameters.get("getal", "")
+ )
if num_arg in NOUN_FORM_OF_TEMPLATE_NUM_TAGS:
sense.tags.append(NOUN_FORM_OF_TEMPLATE_NUM_TAGS[num_arg])
- gender_arg = t_node.template_parameters.get("gesl", "")
+ gender_arg = clean_node(
+ wxr, None, t_node.template_parameters.get("gesl", "")
+ )
if gender_arg in NOUN_FORM_OF_TEMPLATE_GENDER_TAGS:
gender_tag = NOUN_FORM_OF_TEMPLATE_GENDER_TAGS[gender_arg]
if isinstance(gender_tag, str):
@@ -281,17 +316,8 @@ def extract_noun_form_of_template(
sense.tags.extend(gender_tag)
# Sjabloon:oudeschrijfwijze
- g_arg = t_node.template_parameters.get("g", "")
- for tags_dict in [
- NOUN_FORM_OF_TEMPLATE_GENDER_TAGS,
- NOUN_FORM_OF_TEMPLATE_NUM_TAGS,
- ]:
- if g_arg in tags_dict:
- tag = tags_dict[g_arg]
- if isinstance(tag, str):
- sense.tags.append(tag)
- elif isinstance(tag, list):
- sense.tags.extend(tag)
+ if t_node.template_name == "oudeschrijfwijze":
+ extract_oudeschrijfwijze_template(wxr, t_node, sense)
form_of = clean_node(wxr, None, t_node.template_parameters.get(1, ""))
if form_of != "":
diff --git a/src/wiktextract/extractor/nl/spelling_form.py b/src/wiktextract/extractor/nl/spelling_form.py
index 539a3cdee..6d8cd391b 100644
--- a/src/wiktextract/extractor/nl/spelling_form.py
+++ b/src/wiktextract/extractor/nl/spelling_form.py
@@ -18,7 +18,9 @@ def extract_spelling_form_section(
note_str = new_note_str.strip("() ")
else:
form_nodes.append(new_note_str)
- else:
+ elif isinstance(node, str) or (
+ isinstance(node, WikiNode) and node.kind == NodeKind.LINK
+ ):
form_nodes.append(node)
form_str = clean_node(wxr, None, form_nodes)
if len(form_str) > 0:
diff --git a/src/wiktextract/extractor/nl/tags.py b/src/wiktextract/extractor/nl/tags.py
index 4e029a3bb..e1f01749b 100644
--- a/src/wiktextract/extractor/nl/tags.py
+++ b/src/wiktextract/extractor/nl/tags.py
@@ -66,7 +66,7 @@
# "stopwoord": "filled pause",
"straattaal": "slang",
"streektaal": "regiolectal",
- # "taal": "language",
+ "taal": "linguistics",
"toponiem": "toponymic",
"verkorting": "clipping",
"verouderd": "obsolete",
@@ -92,6 +92,8 @@
"alleen meervoud": "plural-only", # Sjabloon:plurt
"geen meervoud": "no-plural", # Sjabloon:singt
"versterkend voorvoegsel": ["intensifier", "prefix"],
+ "in een bijzin": "with-subordinate-clause", # Sjabloon:ovt-mv-bijz
+ "bij inversie": "inversion", # Sjabloon:1ps
}
TABLE_TAGS = {
@@ -99,6 +101,7 @@
"enkelvoud": "singular",
"meervoud": "plural",
"verkleinwoord": "diminutive",
+ "bezitsvorm": "possessive",
# Sjabloon:adjcomp
"stellend": "positive",
"vergrotend": "comparative",
@@ -123,6 +126,8 @@
"derde": "third-person",
"verleden": "past",
"voorwaardelijk": "conditional",
+ "hoofdzin": "main-clause",
+ "bijzin": "subordinate-clause",
# Sjabloon:-nlname-
"nominatief": "nominative",
"genitief": "genitive",
@@ -202,7 +207,7 @@
"ecologie": "ecology",
"economie": "economics",
# "eendvogels": "anseriform",
- # "eenheid": "",
+ "eenheid": "units-of-measure",
"effectenhandel": "trading",
"egyptologie": "Egyptology",
# "toponiem: eiland": "",
@@ -211,7 +216,7 @@
# "element": "element",
"emotie": "emotion",
# "evenhoevigen": "",
- # "familie": "family",
+ "familie": "familiar",
"farmacologie": "pharmacology",
# "feest": "party",
"fietsen": "cycling",
diff --git a/src/wiktextract/extractor/nl/translation.py b/src/wiktextract/extractor/nl/translation.py
index 474b8475f..0c5341c95 100644
--- a/src/wiktextract/extractor/nl/translation.py
+++ b/src/wiktextract/extractor/nl/translation.py
@@ -52,17 +52,19 @@ def extract_translation_list_item(
elif not before_colon:
if brackets == 0 and isinstance(node, TemplateNode):
if node.template_name == "trad":
- word_entry.translations.append(
- Translation(
- lang=lang_name,
- lang_code=node.template_parameters.get(1, ""),
- word=clean_node(
- wxr, None, node.template_parameters.get(2, "")
- ),
- sense=sense,
- sense_index=sense_index,
- )
+ tr_word = clean_node(
+ wxr, None, node.template_parameters.get(2, "")
)
+ if tr_word != "":
+ word_entry.translations.append(
+ Translation(
+ lang=lang_name,
+ lang_code=node.template_parameters.get(1, ""),
+ word=tr_word,
+ sense=sense,
+ sense_index=sense_index,
+ )
+ )
elif (
node.template_name in LIST_ITEM_TAG_TEMPLATES
and len(word_entry.translations) > 0
diff --git a/tests/test_nl_gloss.py b/tests/test_nl_gloss.py
index c73702f76..505d4d489 100644
--- a/tests/test_nl_gloss.py
+++ b/tests/test_nl_gloss.py
@@ -408,3 +408,34 @@ def test_double_colons_list_in_parentheses(self):
}
],
)
+
+ def test_template_arg_in_oudeschrijfwijze(self):
+ self.wxr.wtp.add_page(
+ "Sjabloon:oudeschrijfwijze",
+ 10,
+ """'''Jura''' [[WikiWoordenboek:Genus|v]] / [[WikiWoordenboek:Genus|m]], soms ook: [[WikiWoordenboek:Genus|o]][[Categorie:WikiWoordenboek:Test/Bijzonder genus]]
+# verouderde spelling of vorm van [[jura#Nederlands|jura]] tot 2006[[Categorie:Oude spelling van het Nederlands van voor 2006]]""",
+ )
+ data = parse_page(
+ self.wxr,
+ "Jura",
+ """==Nederlands==
+====Zelfstandig naamwoord====
+{{oudeschrijfwijze|jura|2006|nld|g={{f}} / {{m}}, soms ook: {{n}}}}""",
+ )
+ self.assertEqual(
+ data[0]["senses"],
+ [
+ {
+ "glosses": [
+ "verouderde spelling of vorm van jura tot 2006",
+ ],
+ "categories": [
+ "WikiWoordenboek:Test/Bijzonder genus",
+ "Oude spelling van het Nederlands van voor 2006",
+ ],
+ "tags": ["form-of", "feminine", "masculine", "neuter"],
+ "form_of": [{"word": "jura"}],
+ }
+ ],
+ )