diff --git a/src/wiktextract/extractor/nl/analyze_template.py b/src/wiktextract/extractor/nl/analyze_template.py
index a9f8a40f..727d10a4 100644
--- a/src/wiktextract/extractor/nl/analyze_template.py
+++ b/src/wiktextract/extractor/nl/analyze_template.py
@@ -104,9 +104,16 @@ def analyze_template(wtp: Wtp, page: Page) -> tuple[set[str], bool]:
# pre-expand section templates, like "=nld=", "-pron-"
# don't expand "=="
# don't expand inflection table templates like "-nlnoun-"
- return (
- set(),
+ need_pre_expand = (
re.fullmatch(r"Sjabloon:=.+=", page.title) is not None
or page.title in POS_TEMPLATES
- or page.title in SECTION_TEMPLATES,
+ or page.title in SECTION_TEMPLATES
)
+
+ # magic word breaks level2 node in "=qtu=" template
+ if need_pre_expand and page.body.startswith("__NOEDITSECTION__"):
+ wtp.add_page(
+ page.title, 10, page.body.removeprefix("__NOEDITSECTION__").strip()
+ )
+
+ return set(), need_pre_expand
diff --git a/src/wiktextract/extractor/nl/example.py b/src/wiktextract/extractor/nl/example.py
index 5aaac894..59729700 100644
--- a/src/wiktextract/extractor/nl/example.py
+++ b/src/wiktextract/extractor/nl/example.py
@@ -4,7 +4,7 @@
from ...wxr_context import WiktextractContext
from .models import Example, Sense
-EXAMPLE_TEMPLATES = frozenset({"bijv-1", "bijv-2", "citeer"})
+EXAMPLE_TEMPLATES = frozenset({"bijv-1", "bijv-2", "bijv-e", "citeer"})
def extract_example_list_item(
@@ -28,7 +28,7 @@ def extract_example_template(
e_text = clean_node(wxr, None, node.template_parameters.get(1, ""))
if len(e_text) > 0:
sense.examples.append(Example(text=e_text))
- elif node.template_name == "bijv-2":
+ elif node.template_name in ["bijv-2", "bijv-e"]:
e_text = clean_node(wxr, None, node.template_parameters.get(1, ""))
if len(e_text) > 0:
e_trans = clean_node(wxr, None, node.template_parameters.get(2, ""))
diff --git a/src/wiktextract/extractor/nl/inflection.py b/src/wiktextract/extractor/nl/inflection.py
index 00e6b435..9c8804db 100644
--- a/src/wiktextract/extractor/nl/inflection.py
+++ b/src/wiktextract/extractor/nl/inflection.py
@@ -12,7 +12,13 @@
def extract_inflection_template(
wxr: WiktextractContext, word_entry: WordEntry, t_node: TemplateNode
) -> None:
- if t_node.template_name in ["-nlnoun-", "adjcomp"]:
+ if t_node.template_name in [
+ "-nlnoun-",
+ "adjcomp",
+ "-nlname-",
+ "-denoun-",
+ "-denoun1-",
+ ]:
extract_noun_adj_table(wxr, word_entry, t_node)
elif t_node.template_name == "-nlstam-":
extract_nlstam_template(wxr, word_entry, t_node)
diff --git a/src/wiktextract/extractor/nl/pos.py b/src/wiktextract/extractor/nl/pos.py
index 4ce46523..c43885a4 100644
--- a/src/wiktextract/extractor/nl/pos.py
+++ b/src/wiktextract/extractor/nl/pos.py
@@ -40,7 +40,7 @@ def extract_pos_section(
forms_data.forms.clear()
forms_data.categories.clear()
extract_pos_section_nodes(wxr, page_data, base_data, forms_data, level_node)
- if len(page_data[-1].senses) == 0:
+ if len(page_data[-1].senses) == 0 and pos_title in LINKAGE_SECTIONS:
page_data.pop()
@@ -56,9 +56,9 @@ def extract_pos_section_nodes(
if (
isinstance(node, WikiNode)
and node.kind == NodeKind.LIST
- and node.sarg.endswith("#")
+ and node.sarg.endswith(("#", "::"))
):
- if gloss_list_start == 0:
+ if gloss_list_start == 0 and node.sarg.endswith("#"):
gloss_list_start = index
extract_pos_header_line_nodes(
wxr, page_data[-1], level_node.children[:index]
@@ -120,12 +120,33 @@ def extract_pos_section_nodes(
extract_verb_form_of_template(
wxr, page_data, base_data, forms_data, node
)
+ elif isinstance(node, TemplateNode):
+ # tag template after form-of template
+ cats = {}
+ expanded_text = clean_node(wxr, cats, node)
+ if (
+ expanded_text.startswith("(")
+ and expanded_text.endswith(")")
+ and len(page_data[-1].senses) > 0
+ ):
+ page_data[-1].senses[-1].raw_tags.append(
+ expanded_text.strip("() ")
+ )
+ page_data[-1].senses[-1].categories.extend(
+ cats.get("categories", [])
+ )
+ translate_raw_tags(page_data[-1].senses[-1])
def extract_gloss_list_item(
- wxr: WiktextractContext, word_entry: WordEntry, list_item: WikiNode
+ wxr: WiktextractContext,
+ word_entry: WordEntry,
+ list_item: WikiNode,
) -> None:
- sense = Sense()
+ create_new_sense = (
+ False if list_item.sarg == "::" and len(word_entry.senses) > 0 else True
+ )
+ sense = Sense() if create_new_sense else word_entry.senses[-1]
gloss_nodes = []
for child in list_item.children:
if isinstance(child, TemplateNode):
@@ -158,13 +179,28 @@ def extract_gloss_list_item(
while gloss_text.startswith(","): # between qualifier templates
gloss_text = gloss_text.removeprefix(",").strip()
m = re.match(r"\(([^()]+)\)", gloss_text)
- if m is not None: # expanded "verouderd" template in "2ps" template
- gloss_text = gloss_text[m.end() :].strip()
- sense.raw_tags.append(m.group(1))
+ if m is not None:
+ new_gloss_text = gloss_text[m.end() :].strip()
+ if new_gloss_text != "":
+ # expanded "verouderd" template in "2ps" template
+ gloss_text = new_gloss_text
+ sense.raw_tags.append(m.group(1))
+ else: # gloss text after form-of template
+ gloss_text = m.group(1)
+
if len(gloss_text) > 0:
sense.glosses.append(gloss_text)
+ if (
+ len(sense.glosses) > 0
+ or len(sense.tags) > 0
+ or len(sense.raw_tags) > 0
+ or len(sense.examples) > 0
+ ):
translate_raw_tags(sense)
- word_entry.senses.append(sense)
+ if len(sense.glosses) == 0:
+ sense.tags.append("no-gloss")
+ if create_new_sense:
+ word_entry.senses.append(sense)
def extract_pos_header_line_nodes(
diff --git a/src/wiktextract/extractor/nl/tags.py b/src/wiktextract/extractor/nl/tags.py
index e46eea13..ffcc0bce 100644
--- a/src/wiktextract/extractor/nl/tags.py
+++ b/src/wiktextract/extractor/nl/tags.py
@@ -123,6 +123,12 @@
"derde": "third-person",
"verleden": "past",
"voorwaardelijk": "conditional",
+ # Sjabloon:-nlname-
+ "nominatief": "nominative",
+ "genitief": "genitive",
+ # Sjabloon:-denoun-
+ "datief": "dative",
+ "accusatief": "accusative",
}
diff --git a/tests/test_nl_gloss.py b/tests/test_nl_gloss.py
index 28cea48b..c73702f7 100644
--- a/tests/test_nl_gloss.py
+++ b/tests/test_nl_gloss.py
@@ -290,3 +290,121 @@ def test_eng_onv_d(self):
self.assertEqual(
data[1]["categories"], ["Zelfstandig naamwoord in het Engels"]
)
+
+ def test_no_gloss_but_has_tag_example(self):
+ self.wxr.wtp.add_page(
+ "Sjabloon:naam-m",
+ 10,
+ """([[mannelijk]]e [[naam]])[[Categorie:Mannelijke naam_in_het_Engels]]""",
+ )
+ data = parse_page(
+ self.wxr,
+ "Clark",
+ """==Engels==
+====Eigennaam====
+'''Clark'''
+#{{naam-m|eng}}
+{{bijv-2|'''Clark''' Gable was a popular movie star|'''Clark''' Gable was een bekende filmster.}}""",
+ )
+ self.assertEqual(
+ data[0]["senses"],
+ [
+ {
+ "categories": ["Mannelijke naam_in_het_Engels"],
+ "tags": ["masculine", "name", "no-gloss"],
+ "examples": [
+ {
+ "text": "Clark Gable was a popular movie star",
+ "translation": "Clark Gable was een bekende filmster.",
+ }
+ ],
+ }
+ ],
+ )
+
+ def test_double_colons_list(self):
+ self.wxr.wtp.add_page(
+ "Sjabloon:oudeschrijfwijze",
+ 10,
+ """'''Ehstland'''
+# verouderde spelling of vorm van [[Estland#Duits|Estland]][[Categorie:Oude spelling van het Duits]]""",
+ )
+ self.wxr.wtp.add_page(
+ "Sjabloon:verouderd",
+ 10,
+ "([[verouderd]])[[Categorie:Verouderd_in_het_Duits]]",
+ )
+ data = parse_page(
+ self.wxr,
+ "Ehstland",
+ """==Duits==
+====Eigennaam====
+{{oudeschrijfwijze|Estland||deu}}
+::{{verouderd|deu}} nominatief enkelvoud van [[Ehstland#Duits|Ehstland]]""",
+ )
+ self.assertEqual(
+ data[0]["senses"],
+ [
+ {
+ "categories": [
+ "Oude spelling van het Duits",
+ "Verouderd_in_het_Duits",
+ ],
+ "glosses": [
+ "verouderde spelling of vorm van Estland",
+ "nominatief enkelvoud van Ehstland",
+ ],
+ "tags": ["form-of", "obsolete"],
+ "form_of": [{"word": "Estland"}],
+ }
+ ],
+ )
+
+ def test_tag_template_after_form_of_template(self):
+ self.wxr.wtp.add_page(
+ "Sjabloon:geologie",
+ 10,
+ "([[geologie]])[[Categorie:Geologie_in_het_Nederlands]]",
+ )
+ data = parse_page(
+ self.wxr,
+ "Fanerozoïcum",
+ """==Nederlands==
+====Zelfstandig naamwoord====
+{{oudeschrijfwijze|fanerozoïcum|2006|nld|g=n}} {{geologie|nld}}""",
+ )
+ self.assertEqual(data[0]["senses"][0]["topics"], ["geology"])
+ self.assertEqual(
+ data[0]["senses"][0]["categories"], ["Geologie_in_het_Nederlands"]
+ )
+
+ def test_double_colons_list_in_parentheses(self):
+ self.wxr.wtp.add_page(
+ "Sjabloon:oudeschrijfwijze",
+ 10,
+ """'''Haafer'''
+# verouderde spelling of vorm van [[Hafer#Duits|Hafer]] tot 1876[[Categorie:Oude spelling van het Duits van voor 1876]]""",
+ )
+ self.wxr.wtp.add_page("Sjabloon:Q", 10, "[[Haafer#Duits|Haafer]]")
+ data = parse_page(
+ self.wxr,
+ "Haafer",
+ """==Duits==
+====Zelfstandig naamwoord====
+{{oudeschrijfwijze|Hafer|1876|deu}}
+::(nominatief mannelijk enkelvoud van {{Q|Haafer|deu}})""",
+ )
+ self.assertEqual(
+ data[0]["senses"],
+ [
+ {
+ "glosses": [
+ "verouderde spelling of vorm van Hafer tot 1876",
+ "nominatief mannelijk enkelvoud van Haafer",
+ ],
+ "categories": ["Oude spelling van het Duits van voor 1876"],
+ "tags": ["form-of"],
+ "form_of": [{"word": "Hafer"}],
+ }
+ ],
+ )