Skip to content

Commit

Permalink
Rename lang_name to lang
Browse files Browse the repository at this point in the history
  • Loading branch information
empiriker committed Jan 5, 2024
1 parent c9ba3d1 commit b976575
Show file tree
Hide file tree
Showing 24 changed files with 90 additions and 84 deletions.
8 changes: 3 additions & 5 deletions src/wiktextract/extractor/de/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class Translation(BaseModelWrap):
default=None,
description="Wiktionary language code of the translation term",
)
lang_name: Optional[str] = Field(
lang: Optional[str] = Field(
default=None, description="Localized language name"
)
uncertain: Optional[bool] = Field(
Expand Down Expand Up @@ -151,9 +151,7 @@ class Sound(BaseModelWrap):
lang_code: list[str] = Field(
default=[], description="Wiktionary language code"
)
lang_name: list[str] = Field(
default=[], description="Localized language name"
)
lang: list[str] = Field(default=[], description="Localized language name")
# roman: list[str] = Field(
# default=[], description="Translitaration to Roman characters"
# )
Expand All @@ -179,7 +177,7 @@ class WordEntry(BaseModelWrap):
lang_code: str = Field(
description="Wiktionary language code", examples=["es"]
)
lang_name: str = Field(
lang: str = Field(
description="Localized language name of the word", examples=["español"]
)
senses: Optional[list[Sense]] = []
Expand Down
12 changes: 6 additions & 6 deletions src/wiktextract/extractor/de/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,15 +272,15 @@ def parse_page(
for level2_node in tree.find_child(NodeKind.LEVEL2):
for subtitle_template in level2_node.find_content(NodeKind.TEMPLATE):
# The language sections are marked with
# == <title> ({{Sprache|<lang_name>}}) ==
# where <title> is the title of the page and <lang_name> is the
# == <title> ({{Sprache|<lang>}}) ==
# where <title> is the title of the page and <lang> is the
# German name of the language of the section.
if subtitle_template.template_name == "Sprache":
lang_name = subtitle_template.template_parameters.get(1)
lang_code = name_to_code(lang_name, "de")
lang = subtitle_template.template_parameters.get(1)
lang_code = name_to_code(lang, "de")
if lang_code == "":
wxr.wtp.warning(
f"Unknown language: {lang_name}",
f"Unknown language: {lang}",
sortid="extractor/de/page/parse_page/76",
)
if (
Expand All @@ -290,7 +290,7 @@ def parse_page(
continue

base_data = WordEntry(
lang_name=lang_name, lang_code=lang_code, word=wxr.wtp.title
lang=lang, lang_code=lang_code, word=wxr.wtp.title
)
parse_section(wxr, page_data, base_data, level2_node.children)

Expand Down
4 changes: 2 additions & 2 deletions src/wiktextract/extractor/de/pronunciation.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,14 @@ def process_lautschrift_template(

lang_code = template_parameters.get("spr")
if lang_code:
lang_name = code_to_name(lang_code, "de")
lang = code_to_name(lang_code, "de")
add_sound_data_without_appending_to_existing_properties(
wxr,
sound_data,
{
"ipa": [ipa],
"lang_code": lang_code,
"lang_name": lang_name,
"lang": lang,
},
)
else:
Expand Down
6 changes: 3 additions & 3 deletions src/wiktextract/extractor/de/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,10 @@ def process_translation_list(

lang_code = node.template_parameters.get(1)
translation_data.lang_code = lang_code
translation_data.lang_name = code_to_name(lang_code, "de")
if translation_data.lang_name == "":
translation_data.lang = code_to_name(lang_code, "de")
if translation_data.lang == "":
wxr.wtp.debug(
f"Unknown language code: {translation_data.lang_name}",
f"Unknown language code: {translation_data.lang}",
sortid="extractor/de/translation/process_translation_list/70",
)
if node.template_name[-1] == "?":
Expand Down
2 changes: 1 addition & 1 deletion src/wiktextract/extractor/es/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ class WordEntry(BaseModelWrap):
lang_code: str = Field(
description="Wiktionary language code", examples=["es"]
)
lang_name: str = Field(
lang: str = Field(
description="Localized language name of the word", examples=["español"]
)
senses: Optional[list[Sense]] = []
Expand Down
7 changes: 4 additions & 3 deletions src/wiktextract/extractor/es/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from wikitextprocessor import NodeKind, WikiNode
from wikitextprocessor.parser import WikiNodeChildrenList

from wiktextract.extractor.es.etymology import process_etymology_block
from wiktextract.extractor.es.example import extract_example
from wiktextract.extractor.es.gloss import extract_gloss
Expand Down Expand Up @@ -368,10 +369,10 @@ def parse_page(
):
continue

lang_name = clean_node(wxr, categories, subtitle_template)
wxr.wtp.start_section(lang_name)
lang = clean_node(wxr, categories, subtitle_template)
wxr.wtp.start_section(lang)
base_data = WordEntry(
lang_name=lang_name, lang_code=lang_code, word=wxr.wtp.title
lang=lang, lang_code=lang_code, word=wxr.wtp.title
)
base_data.categories.extend(categories["categories"])
parse_entries(wxr, page_data, base_data, level2_node)
Expand Down
4 changes: 2 additions & 2 deletions src/wiktextract/extractor/ru/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Translation(BaseModelWrap):
lang_code: str = Field(
description="Wiktionary language code of the translation term"
)
lang_name: str = Field(
lang: str = Field(
description="Localized language name of the translation term"
)
sense: Optional[str] = Field(
Expand Down Expand Up @@ -112,7 +112,7 @@ class WordEntry(BaseModelWrap):
lang_code: str = Field(
description="Wiktionary language code", examples=["ru"]
)
lang_name: str = Field(
lang: str = Field(
description="Localized language name of the word", examples=["Русский"]
)
categories: list[str] = Field(
Expand Down
6 changes: 3 additions & 3 deletions src/wiktextract/extractor/ru/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,11 @@ def parse_page(

categories = {"categories": []}

lang_name = clean_node(wxr, categories, subtitle_template)
wxr.wtp.start_section(lang_name)
lang = clean_node(wxr, categories, subtitle_template)
wxr.wtp.start_section(lang)

base_data = WordEntry(
lang_name=lang_name, lang_code=lang_code, word=wxr.wtp.title
lang=lang, lang_code=lang_code, word=wxr.wtp.title
)
base_data.categories.extend(categories["categories"])

Expand Down
4 changes: 2 additions & 2 deletions src/wiktextract/extractor/ru/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def extract_translations(
for key, raw_value in template_node.template_parameters.items():
if isinstance(key, str):
lang_code = key
lang_name = code_to_name(lang_code, "ru")
lang = code_to_name(lang_code, "ru")

for value_node in (
raw_value
Expand All @@ -36,7 +36,7 @@ def extract_translations(
word_entry.translations.append(
Translation(
lang_code=lang_code,
lang_name=lang_name,
lang=lang,
word=word,
sense=sense if sense else None,
)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_de_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()

def get_default_page_data(self) -> list[WordEntry]:
return [WordEntry(word="Beispiel", lang_code="de", lang_name="Deutsch")]
return [WordEntry(word="Beispiel", lang_code="de", lang="Deutsch")]

def test_de_extract_examples(self):
self.wxr.wtp.start_page("")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_de_gloss.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()

def get_default_word_entry(self):
return WordEntry(lang_code="de", lang_name="Deutsch", word="Beispiel")
return WordEntry(lang_code="de", lang="Deutsch", word="Beispiel")

def test_de_extract_glosses(self):
self.wxr.wtp.start_page("")
Expand Down
4 changes: 2 additions & 2 deletions tests/test_de_linkages.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()

def get_default_word_entry(self) -> WordEntry:
return WordEntry(word="Beispiel", lang_code="de", lang_name="Deutsch")
return WordEntry(word="Beispiel", lang_code="de", lang="Deutsch")

def test_de_extract_linkages(self):
test_cases = [
Expand Down Expand Up @@ -105,7 +105,7 @@ def test_de_extract_linkages(self):
self.assertEqual(
word_entry.model_dump(
exclude_defaults=True,
exclude={"word", "lang_code", "lang_name"},
exclude={"word", "lang_code", "lang"},
),
case["expected"],
)
14 changes: 7 additions & 7 deletions tests/test_de_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()

def get_default_base_data(self):
return WordEntry(lang_code="de", lang_name="Deutsch", word="Beispiel")
return WordEntry(lang_code="de", lang="Deutsch", word="Beispiel")

def test_de_parse_page(self):
self.wxr.wtp.add_page("Vorlage:Sprache", 10, "")
Expand All @@ -45,7 +45,7 @@ def test_de_parse_page(self):
lst,
[
{
"lang_name": "Deutsch",
"lang": "Deutsch",
"lang_code": "de",
"word": "Beispiel",
"pos": "noun",
Expand All @@ -71,7 +71,7 @@ def test_de_parse_page_skipping_head_templates(self):
lst,
[
{
"lang_name": "Deutsch",
"lang": "Deutsch",
"lang_code": "de",
"word": "Beispiel",
"pos": "noun",
Expand Down Expand Up @@ -104,7 +104,7 @@ def test_de_parse_section(self):
{
"word": "Beispiel",
"lang_code": "de",
"lang_name": "Deutsch",
"lang": "Deutsch",
"pos": "adj",
"senses": [
{
Expand All @@ -118,7 +118,7 @@ def test_de_parse_section(self):
"word": "Beispiel",
"lang_code": "de",
"pos": "adv",
"lang_name": "Deutsch",
"lang": "Deutsch",
"senses": [
{
"glosses": ["gloss1"],
Expand All @@ -131,7 +131,7 @@ def test_de_parse_section(self):
"word": "Beispiel",
"lang_code": "de",
"pos": "verb",
"lang_name": "Deutsch",
"lang": "Deutsch",
"senses": [
{
"glosses": ["gloss2"],
Expand All @@ -144,7 +144,7 @@ def test_de_parse_section(self):
"word": "Beispiel",
"lang_code": "de",
"pos": "noun",
"lang_name": "Deutsch",
"lang": "Deutsch",
"senses": [
{
"glosses": ["gloss3"],
Expand Down
10 changes: 6 additions & 4 deletions tests/test_de_pronunciation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

from wiktextract.config import WiktionaryConfig
from wiktextract.extractor.de.models import Sound
from wiktextract.extractor.de.pronunciation import (process_hoerbeispiele,
process_ipa)
from wiktextract.extractor.de.pronunciation import (
process_hoerbeispiele,
process_ipa,
)
from wiktextract.wxr_context import WiktextractContext


Expand Down Expand Up @@ -35,7 +37,7 @@ def test_de_process_ipa(self):
"expected": [
{
"ipa": ["ipa1"],
"lang_name": ["Deutsch"],
"lang": ["Deutsch"],
"lang_code": ["de"],
}
],
Expand All @@ -46,7 +48,7 @@ def test_de_process_ipa(self):
{"ipa": ["ipa1", "ipa2"]},
{
"ipa": ["ipa3"],
"lang_name": ["Deutsch"],
"lang": ["Deutsch"],
"lang_code": ["de"],
},
],
Expand Down
Loading

0 comments on commit b976575

Please sign in to comment.