Skip to content

Commit

Permalink
[de] add "pos_title" field
Browse files Browse the repository at this point in the history
  • Loading branch information
xxyzz committed Jan 27, 2025
1 parent cf77bca commit 613a9c7
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/wiktextract/extractor/de/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ class WordEntry(BaseModelWrap):

word: str = Field(description="word string")
pos: str = Field(default="", description="Part of speech type")
pos_title: str = ""
other_pos: list[str] = []
# pos_title: str = Field(default=None, description="Original POS title")
lang_code: str = Field(
Expand Down
4 changes: 4 additions & 0 deletions src/wiktextract/extractor/de/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,12 @@ def process_pos_section(
level_node: LevelNode,
) -> None:
pos_data_list = []
pos_title = ""
for template_node in level_node.find_content(NodeKind.TEMPLATE):
if template_node.template_name == "Wortart":
pos_argument = template_node.template_parameters.get(1, "").strip()
if pos_title == "":
pos_title = pos_argument
if pos_argument in IGNORE_POS:
continue
elif pos_argument in FORM_POS:
Expand Down Expand Up @@ -169,6 +172,7 @@ def process_pos_section(
page_data[-1].tags.append(tag)
if pos_index == 0:
page_data[-1].pos = pos
page_data[-1].pos_title = pos_title
elif pos != page_data[-1].pos and pos not in page_data[-1].other_pos:
page_data[-1].other_pos.append(pos)

Expand Down
6 changes: 6 additions & 0 deletions tests/test_de_gloss.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def test_form_of(self):
"lang": "Deutsch",
"lang_code": "de",
"pos": "adj",
"pos_title": "Deklinierte Form",
"senses": [
{
"form_of": [{"word": "konjugiert"}],
Expand Down Expand Up @@ -300,6 +301,7 @@ def test_no_bedeutungen_section(self):
"lang": "Litauisch",
"lang_code": "lt",
"pos": "unknown",
"pos_title": "Deklinierte Form",
"senses": [
{
"form_of": [{"word": "abakas"}],
Expand Down Expand Up @@ -330,6 +332,7 @@ def test_grammatische_merkmale_no_form_of_pos_title(self):
"lang": "Latein",
"lang_code": "la",
"pos": "verb",
"pos_title": "Infinitiv",
"senses": [
{
"form_of": [{"word": "abire"}],
Expand Down Expand Up @@ -363,6 +366,7 @@ def test_no_gloss_list(self):
"lang": "Interlingua",
"lang_code": "ia",
"pos": "unknown",
"pos_title": "Konjugierte Form",
"senses": [
{"glosses": ["Indikativ Präsens Aktiv des Verbs amar"]}
],
Expand Down Expand Up @@ -390,6 +394,7 @@ def test_unordered_list(self):
"lang": "Prußisch",
"lang_code": "prg",
"pos": "prep",
"pos_title": "Präposition",
"senses": [{"glosses": ["Nebenform der Präposition esse"]}],
"word": "assa",
}
Expand All @@ -413,6 +418,7 @@ def test_description_list_plus_unordered_list(self):
"lang": "Polnisch",
"lang_code": "pl",
"pos": "noun",
"pos_title": "Deklinierte Form",
"senses": [
{
"form_of": [{"word": "auto"}],
Expand Down
3 changes: 3 additions & 0 deletions tests/test_de_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def test_de_parse_page(self):
"lang_code": "de",
"word": "Beispiel",
"pos": "noun",
"pos_title": "Substantiv",
"senses": [{"tags": ["no-gloss"]}],
}
],
Expand Down Expand Up @@ -69,6 +70,7 @@ def test_de_parse_page_skipping_head_templates(self):
"lang_code": "de",
"word": "Beispiel",
"pos": "noun",
"pos_title": "Substantiv",
"senses": [{"tags": ["no-gloss"]}],
}
],
Expand All @@ -91,6 +93,7 @@ def test_multiple_pos(self):
"lang": "Deutsch",
"lang_code": "de",
"pos": "noun",
"pos_title": "Substantiv",
"other_pos": ["name"],
"senses": [
{
Expand Down

0 comments on commit 613a9c7

Please sign in to comment.