diff --git a/src/wiktextract/extractor/fr/conjugation.py b/src/wiktextract/extractor/fr/conjugation.py index 1b37a9c55..3168f02f0 100644 --- a/src/wiktextract/extractor/fr/conjugation.py +++ b/src/wiktextract/extractor/fr/conjugation.py @@ -42,6 +42,10 @@ def extract_conjugation( extract_conjugation( wxr, entry, conj_template.template_name[1:], "2" ) + elif conj_template.template_name.startswith("ja-flx-adj"): + proces_ja_flx_adj_template( + wxr, entry, conj_template, conj_page_title + ) def process_fr_conj_template( @@ -194,3 +198,46 @@ def process_fr_conj_wiki_table( if len(form.form) > 0 and form.form != "—": entry.forms.append(form) + + +def proces_ja_flx_adj_template( + wxr: WiktextractContext, + entry: WordEntry, + template_node: TemplateNode, + conj_page_title: str, +) -> None: + # https://fr.wiktionary.org/wiki/Modèle:ja-adj + # https://fr.wiktionary.org/wiki/Modèle:ja-flx-adj-な + expanded_template = wxr.wtp.parse( + wxr.wtp.node_to_wikitext(template_node), expand_all=True + ) + for table_node in expanded_template.find_child(NodeKind.TABLE): + first_tag = "" + for row in table_node.find_child(NodeKind.TABLE_ROW): + forms = [] + tags = [first_tag] + for cell_index, row_child in enumerate( + row.find_child(NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL) + ): + row_child_text = clean_node(wxr, None, row_child) + if row_child.kind == NodeKind.TABLE_HEADER_CELL: + first_tag = row_child_text + else: + for line_index, line in enumerate( + row_child_text.splitlines() + ): + if cell_index == 0: + tags.append(line) + continue + if line_index + 1 > len(forms): + forms.append( + Form(tags=tags, source=conj_page_title) + ) + if cell_index == 1: + forms[line_index].form = line + elif cell_index == 2: + forms[line_index].hiragana = line + elif cell_index == 3: + forms[line_index].roman = line + + entry.forms.extend(forms) diff --git a/src/wiktextract/extractor/fr/form_line.py b/src/wiktextract/extractor/fr/form_line.py index fc847945d..e1e800999 100644 --- a/src/wiktextract/extractor/fr/form_line.py +++ b/src/wiktextract/extractor/fr/form_line.py @@ -35,7 +35,10 @@ def extract_form_line( process_zh_mot_template(wxr, node, page_data) elif node.template_name == "ja-mot": process_ja_mot_template(wxr, node, page_data) - elif node.template_name in ("conj", "conjugaison"): + elif node.template_name in ( + "conj", + "conjugaison", + ) or node.template_name.startswith(("ja-adj-", "ja-verb-")): process_conj_template(wxr, node, page_data) else: tag = clean_node(wxr, page_data[-1], node) @@ -162,4 +165,8 @@ def process_conj_template( tag = clean_node(wxr, page_data[-1], expanded_node) if template_node.template_name in ("conj", "conjugaison"): tag = tag.removesuffix("(voir la conjugaison)").strip() + elif template_node.template_name.startswith("ja-"): + tag = ( + tag.removesuffix("(conjugaison)").removesuffix("(flexions)").strip() + ) page_data[-1].tags.append(tag) diff --git a/src/wiktextract/extractor/fr/models.py b/src/wiktextract/extractor/fr/models.py index 6984c498e..461a810ec 100644 --- a/src/wiktextract/extractor/fr/models.py +++ b/src/wiktextract/extractor/fr/models.py @@ -26,7 +26,11 @@ class Form(FrenchBaseModel): form: str = "" tags: list[str] = [] ipas: list[str] = [] - source: str = Field("", description="Form line template name") + source: str = Field( + "", description="Form line template name or Conjugaison page title" + ) + hiragana: str = "" + roman: str = "" class Sound(FrenchBaseModel): diff --git a/tests/test_fr_conj.py b/tests/test_fr_conj.py index 5b009c0b0..ac9f2acc1 100644 --- a/tests/test_fr_conj.py +++ b/tests/test_fr_conj.py @@ -157,3 +157,55 @@ def test_onglets_conjugaison(self): }, ], ) + + def test_ja_flx_adj(self): + # https://fr.wiktionary.org/wiki/Conjugaison:japonais/格好だ + self.wxr.wtp.start_page("格好") + self.wxr.wtp.add_page( + "Conjugaison:japonais/格好だ", + 116, + "{{ja-flx-adj-な|格好|かっこう|kakkou}}", + ) + self.wxr.wtp.add_page( + "Modèle:ja-flx-adj-な", + 10, + """

Flexions

+{| +|- +! colspan=\"4\" | '''Formes de base''' +|- +| '''Imperfectif''' (未然形) || [[格好だろ]] || [[かっこうだろ]] || ''kakkou daro'' +|- +! colspan=\"4\" | '''Clefs de constructions''' +|- +| '''Neutre négatif''' || [[格好ではない]]
[[格好じゃない]]
|| [[かっこうではない]]
[[かっこうじゃない]]
|| ''kakkou dewa nai
kakkou ja nai'' +|}""", + ) + entry = WordEntry(lang_code="ja", lang="Japonais", word="格好") + extract_conjugation(self.wxr, entry, "Conjugaison:japonais/格好だ") + self.assertEqual( + [f.model_dump(exclude_defaults=True) for f in entry.forms], + [ + { + "form": "格好だろ", + "hiragana": "かっこうだろ", + "roman": "kakkou daro", + "source": "Conjugaison:japonais/格好だ", + "tags": ["Formes de base", "Imperfectif (未然形)"], + }, + { + "form": "格好ではない", + "hiragana": "かっこうではない", + "roman": "kakkou dewa nai", + "source": "Conjugaison:japonais/格好だ", + "tags": ["Clefs de constructions", "Neutre négatif"], + }, + { + "form": "格好じゃない", + "hiragana": "かっこうじゃない", + "roman": "kakkou ja nai", + "source": "Conjugaison:japonais/格好だ", + "tags": ["Clefs de constructions", "Neutre négatif"], + }, + ], + )