diff --git a/bibat/examples/baseball/baseball/data_preparation.py b/bibat/examples/baseball/baseball/data_preparation.py index a411de0..84100d2 100644 --- a/bibat/examples/baseball/baseball/data_preparation.py +++ b/bibat/examples/baseball/baseball/data_preparation.py @@ -10,13 +10,11 @@ from pathlib import Path from typing import Any -import numpy as np import pandas as pd import pandera as pa from pandera.typing import DataFrame, Series from pandera.typing.common import DataFrameBase from pydantic import BaseModel, field_serializer, field_validator -from pydantic_core.core_schema import field_after_validator_function from baseball import util @@ -54,6 +52,7 @@ class PreparedData(BaseModel, arbitrary_types_allowed=True): @field_validator("measurements") def validate_measurements(cls, v: Any) -> DataFrameBase[MeasurementsDF]: + """Validate the measurements field.""" if isinstance(v, str): v = pd.read_json(StringIO(v)) return MeasurementsDF.validate(v) @@ -62,6 +61,7 @@ def validate_measurements(cls, v: Any) -> DataFrameBase[MeasurementsDF]: def serialize_measurements( self, measurements: DataFrame[MeasurementsDF], _info ): + """Serialise the measurements field.""" return measurements.to_json() diff --git a/bibat/examples/baseball/docs/report.html b/bibat/examples/baseball/docs/report.html index beb844e..cf1a3b0 100644 --- a/bibat/examples/baseball/docs/report.html +++ b/bibat/examples/baseball/docs/report.html @@ -3485,14 +3485,14 @@
To take into account the inconsistency between the two raw data sources, I first had to change the variable RAW_DATA_FILES
:
= {
- RAW_DATA_FILES "2006": [os.path.join(RAW_DIR, "2006.csv")],
- "bdb": [
- "bdb-main.csv"),
- os.path.join(RAW_DIR, "bdb-post.csv"),
- os.path.join(RAW_DIR, "bdb-apps.csv"),
- os.path.join(RAW_DIR,
- ], }
"bdb": [
+ "bdb-main.csv"),
+ os.path.join(RAW_DIR, "bdb-post.csv"),
+ os.path.join(RAW_DIR, "bdb-apps.csv"),
+ os.path.join(RAW_DIR,
+ ],
+ }
+
Next I changed the prepare_data
function to handle the two different data sources.
def prepare_data():
"""Run main function."""
diff --git a/docs/_static/report.html b/docs/_static/report.html
index beb844e..cf1a3b0 100644
--- a/docs/_static/report.html
+++ b/docs/_static/report.html
@@ -3485,14 +3485,14 @@ Preparing the data
=DataFrame[MeasurementsDF](measurements),
measurements )
To take into account the inconsistency between the two raw data sources, I first had to change the variable RAW_DATA_FILES
:
= {
- RAW_DATA_FILES "2006": [os.path.join(RAW_DIR, "2006.csv")],
- "bdb": [
- "bdb-main.csv"),
- os.path.join(RAW_DIR, "bdb-post.csv"),
- os.path.join(RAW_DIR, "bdb-apps.csv"),
- os.path.join(RAW_DIR,
- ], }
"bdb": [
+ "bdb-main.csv"),
+ os.path.join(RAW_DIR, "bdb-post.csv"),
+ os.path.join(RAW_DIR, "bdb-apps.csv"),
+ os.path.join(RAW_DIR,
+ ],
+ }
+
Next I changed the prepare_data
function to handle the two different data sources.
def prepare_data():
"""Run main function."""