diff --git a/bibat/examples/baseball/baseball/data_preparation.py b/bibat/examples/baseball/baseball/data_preparation.py index a411de0..84100d2 100644 --- a/bibat/examples/baseball/baseball/data_preparation.py +++ b/bibat/examples/baseball/baseball/data_preparation.py @@ -10,13 +10,11 @@ from pathlib import Path from typing import Any -import numpy as np import pandas as pd import pandera as pa from pandera.typing import DataFrame, Series from pandera.typing.common import DataFrameBase from pydantic import BaseModel, field_serializer, field_validator -from pydantic_core.core_schema import field_after_validator_function from baseball import util @@ -54,6 +52,7 @@ class PreparedData(BaseModel, arbitrary_types_allowed=True): @field_validator("measurements") def validate_measurements(cls, v: Any) -> DataFrameBase[MeasurementsDF]: + """Validate the measurements field.""" if isinstance(v, str): v = pd.read_json(StringIO(v)) return MeasurementsDF.validate(v) @@ -62,6 +61,7 @@ def validate_measurements(cls, v: Any) -> DataFrameBase[MeasurementsDF]: def serialize_measurements( self, measurements: DataFrame[MeasurementsDF], _info ): + """Serialise the measurements field.""" return measurements.to_json() diff --git a/bibat/examples/baseball/docs/report.html b/bibat/examples/baseball/docs/report.html index beb844e..cf1a3b0 100644 --- a/bibat/examples/baseball/docs/report.html +++ b/bibat/examples/baseball/docs/report.html @@ -3485,14 +3485,14 @@

Preparing the data

measurements=DataFrame[MeasurementsDF](measurements), )

To take into account the inconsistency between the two raw data sources, I first had to change the variable RAW_DATA_FILES:

-
RAW_DATA_FILES = {
-    "2006": [os.path.join(RAW_DIR, "2006.csv")],
-    "bdb": [
-        os.path.join(RAW_DIR, "bdb-main.csv"),
-        os.path.join(RAW_DIR, "bdb-post.csv"),
-        os.path.join(RAW_DIR, "bdb-apps.csv"),
-    ],
-}
+
    "bdb": [
+        os.path.join(RAW_DIR, "bdb-main.csv"),
+        os.path.join(RAW_DIR, "bdb-post.csv"),
+        os.path.join(RAW_DIR, "bdb-apps.csv"),
+    ],
+}
+
+

Next I changed the prepare_data function to handle the two different data sources.

def prepare_data():
     """Run main function."""
diff --git a/docs/_static/report.html b/docs/_static/report.html
index beb844e..cf1a3b0 100644
--- a/docs/_static/report.html
+++ b/docs/_static/report.html
@@ -3485,14 +3485,14 @@ 

Preparing the data

measurements=DataFrame[MeasurementsDF](measurements), )

To take into account the inconsistency between the two raw data sources, I first had to change the variable RAW_DATA_FILES:

-
RAW_DATA_FILES = {
-    "2006": [os.path.join(RAW_DIR, "2006.csv")],
-    "bdb": [
-        os.path.join(RAW_DIR, "bdb-main.csv"),
-        os.path.join(RAW_DIR, "bdb-post.csv"),
-        os.path.join(RAW_DIR, "bdb-apps.csv"),
-    ],
-}
+
    "bdb": [
+        os.path.join(RAW_DIR, "bdb-main.csv"),
+        os.path.join(RAW_DIR, "bdb-post.csv"),
+        os.path.join(RAW_DIR, "bdb-apps.csv"),
+    ],
+}
+
+

Next I changed the prepare_data function to handle the two different data sources.

def prepare_data():
     """Run main function."""