Skip to content

Commit

Permalink
Set extra='forbid' and remove pydantic logging in Russian Wiktionary
Browse files Browse the repository at this point in the history
This work is a contribution to the EWOK project, which receives funding from LABEX ASLAN (ANR–10–LABX–0081) at the Université de Lyon, as part of the "Investissements d'Avenir" program initiated and overseen by the Agence Nationale de la Recherche (ANR) in France.
  • Loading branch information
empiriker committed Dec 5, 2023
1 parent 52441a6 commit 9da9fa9
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 40 deletions.
39 changes: 2 additions & 37 deletions src/wiktextract/extractor/ru/models.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,14 @@
import json
import logging
from typing import Optional

from pydantic import BaseModel, ConfigDict, Field, model_validator
from pydantic.json_schema import GenerateJsonSchema

from wiktextract.wxr_context import WiktextractContext


class PydanticLogger:
wxr: Optional[WiktextractContext] = None

@classmethod
def debug(
cls, msg: str, trace: Optional[str] = None, sortid: str = "XYZunsorted"
):
if cls.wxr:
cls.wxr.wtp.debug(msg, trace=trace, sortid=sortid)
else:
logging.debug(msg)


class BaseModelWrap(BaseModel):
model_config = ConfigDict(validate_assignment=True)


class LoggingExtraFieldsModel(BaseModelWrap):
@model_validator(mode="before")
def log_extra_fields(cls, values):
all_allowed_field_names = cls.model_fields.keys()
extra_fields = {
name: str(value)
for name, value in values.items()
if name not in all_allowed_field_names
}
if extra_fields:
class_full_name = cls.__name__
PydanticLogger.debug(
msg=f"Pydantic - Got extra fields in {class_full_name}: {extra_fields}",
sortid="wiktextract/extractor/es/pydantic/extra_fields/33",
)
return values
model_config = ConfigDict(validate_assignment=True, extra="forbid")


class WordEntry(LoggingExtraFieldsModel):
class WordEntry(BaseModelWrap):
"""WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract."""

word: str = Field(description="word string")
Expand Down
4 changes: 1 addition & 3 deletions src/wiktextract/extractor/ru/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from wikitextprocessor import NodeKind, WikiNode

from wiktextract.extractor.ru.models import PydanticLogger, WordEntry
from wiktextract.extractor.ru.models import WordEntry
from wiktextract.extractor.ru.pronunciation import extract_pronunciation
from wiktextract.page import clean_node
from wiktextract.wxr_context import WiktextractContext
Expand Down Expand Up @@ -153,8 +153,6 @@ def parse_page(

if wxr.config.verbose:
logging.info(f"Parsing page: {page_title}")
# Pass current wiktextractcontext to pydantic for more better logging
PydanticLogger.wxr = wxr

wxr.config.word = page_title
wxr.wtp.start_page(page_title)
Expand Down

0 comments on commit 9da9fa9

Please sign in to comment.