From 9da9fa9bb06d55ac872e37b1e5532d39aa9d4793 Mon Sep 17 00:00:00 2001 From: Empiriker Date: Tue, 5 Dec 2023 08:39:59 +0100 Subject: [PATCH] Set extra='forbid' and remove pydantic logging in Russian Wiktionary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This work is a contribution to the EWOK project, which receives funding from LABEX ASLAN (ANR–10–LABX–0081) at the Université de Lyon, as part of the "Investissements d'Avenir" program initiated and overseen by the Agence Nationale de la Recherche (ANR) in France. --- src/wiktextract/extractor/ru/models.py | 39 ++------------------------ src/wiktextract/extractor/ru/page.py | 4 +-- 2 files changed, 3 insertions(+), 40 deletions(-) diff --git a/src/wiktextract/extractor/ru/models.py b/src/wiktextract/extractor/ru/models.py index 1179fc832..3ad7b06ac 100644 --- a/src/wiktextract/extractor/ru/models.py +++ b/src/wiktextract/extractor/ru/models.py @@ -1,49 +1,14 @@ import json -import logging -from typing import Optional from pydantic import BaseModel, ConfigDict, Field, model_validator from pydantic.json_schema import GenerateJsonSchema -from wiktextract.wxr_context import WiktextractContext - - -class PydanticLogger: - wxr: Optional[WiktextractContext] = None - - @classmethod - def debug( - cls, msg: str, trace: Optional[str] = None, sortid: str = "XYZunsorted" - ): - if cls.wxr: - cls.wxr.wtp.debug(msg, trace=trace, sortid=sortid) - else: - logging.debug(msg) - class BaseModelWrap(BaseModel): - model_config = ConfigDict(validate_assignment=True) - - -class LoggingExtraFieldsModel(BaseModelWrap): - @model_validator(mode="before") - def log_extra_fields(cls, values): - all_allowed_field_names = cls.model_fields.keys() - extra_fields = { - name: str(value) - for name, value in values.items() - if name not in all_allowed_field_names - } - if extra_fields: - class_full_name = cls.__name__ - PydanticLogger.debug( - msg=f"Pydantic - Got extra fields in {class_full_name}: {extra_fields}", - sortid="wiktextract/extractor/es/pydantic/extra_fields/33", - ) - return values + model_config = ConfigDict(validate_assignment=True, extra="forbid") -class WordEntry(LoggingExtraFieldsModel): +class WordEntry(BaseModelWrap): """WordEntry is a dictionary containing lexical information of a single word extracted from Wiktionary with wiktextract.""" word: str = Field(description="word string") diff --git a/src/wiktextract/extractor/ru/page.py b/src/wiktextract/extractor/ru/page.py index c27e5084c..429df0ca0 100644 --- a/src/wiktextract/extractor/ru/page.py +++ b/src/wiktextract/extractor/ru/page.py @@ -4,7 +4,7 @@ from wikitextprocessor import NodeKind, WikiNode -from wiktextract.extractor.ru.models import PydanticLogger, WordEntry +from wiktextract.extractor.ru.models import WordEntry from wiktextract.extractor.ru.pronunciation import extract_pronunciation from wiktextract.page import clean_node from wiktextract.wxr_context import WiktextractContext @@ -153,8 +153,6 @@ def parse_page( if wxr.config.verbose: logging.info(f"Parsing page: {page_title}") - # Pass current wiktextractcontext to pydantic for more better logging - PydanticLogger.wxr = wxr wxr.config.word = page_title wxr.wtp.start_page(page_title)