From ce48ec82b766686fdc69b9c722b49d1a29065634 Mon Sep 17 00:00:00 2001 From: Benjamin Gutzmann Date: Thu, 21 Mar 2024 22:07:00 +0100 Subject: [PATCH] Fix parsing of DWD Observation stations where name contains a comma --- CHANGELOG.rst | 2 + .../dwd/observation/test_api_stations.py | 43 +++++++++++++++++++ .../provider/dwd/observation/metaindex.py | 20 ++++----- 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1da6c28d5..e6139ae5f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,8 @@ Changelog Development *********** +- Fix parsing of DWD Observation stations where name contains a comma + 0.78.0 (09.03.2024) ******************* diff --git a/tests/provider/dwd/observation/test_api_stations.py b/tests/provider/dwd/observation/test_api_stations.py index 7f2ab68e2..2d11a7bfd 100644 --- a/tests/provider/dwd/observation/test_api_stations.py +++ b/tests/provider/dwd/observation/test_api_stations.py @@ -120,3 +120,46 @@ def test_dwd_observations_stations_minute_1(default_settings): }, ) assert_frame_equal(given_df, expected_df) + + +@pytest.mark.remote +def test_dwd_observations_stations_name_with_comma(): + request = DwdObservationRequest( + parameter="kl", + resolution="monthly", + period="recent", + ) + stations = request.all() + stations = stations.df.filter(pl.col("station_id").is_in(["00314", "03164", "06272"])) + assert stations.to_dicts() == [ + { + "station_id": "00314", + "start_date": dt.datetime(1881, 1, 1, 0, 0, tzinfo=ZoneInfo(key="UTC")), + "end_date": dt.datetime(2024, 2, 29, 0, 0, tzinfo=ZoneInfo(key="UTC")), + "latitude": 51.1604, + "longitude": 14.5042, + "height": 234.0, + "name": "Kubschütz, Kr. Bautzen", + "state": "Sachsen", + }, + { + "station_id": "03164", + "start_date": dt.datetime(1881, 1, 1, 0, 0, tzinfo=ZoneInfo(key="UTC")), + "end_date": dt.datetime(2024, 2, 29, 0, 0, tzinfo=ZoneInfo(key="UTC")), + "latitude": 50.8492, + "longitude": 8.7745, + "height": 187.0, + "name": "Cölbe, Kr. Marburg-Biedenkopf", + "state": "Hessen", + }, + { + "station_id": "06272", + "start_date": dt.datetime(2004, 10, 1, 0, 0, tzinfo=ZoneInfo(key="UTC")), + "end_date": dt.datetime(2024, 2, 29, 0, 0, tzinfo=ZoneInfo(key="UTC")), + "latitude": 50.8426, + "longitude": 10.2518, + "height": 284.0, + "name": "Salzungen, Bad-Gräfen-Nitzendorf", + "state": "Thüringen", + }, + ] diff --git a/wetterdienst/provider/dwd/observation/metaindex.py b/wetterdienst/provider/dwd/observation/metaindex.py index d27162edd..41cc59601 100644 --- a/wetterdienst/provider/dwd/observation/metaindex.py +++ b/wetterdienst/provider/dwd/observation/metaindex.py @@ -182,17 +182,17 @@ def _read_meta_df(file: BytesIO) -> pl.LazyFrame: if first.startswith("SP"): # Skip first line if it contains a header lines = lines[1:] - content = BytesIO(b"\n".join(lines)) - df = pl.read_csv(source=content, encoding="latin-1", has_header=False, truncate_ragged_lines=True) + lines = [line.decode("latin-1") for line in lines] + df = pl.DataFrame(lines) column_specs = ( - (0, 5), - (6, 14), - (15, 24), - (23, 38), - (38, 50), - (50, 60), - (60, 102), - (102, 200), + (0, 4), + (6, 13), + (15, 22), + (24, 37), + (39, 49), + (51, 59), + (61, 140), + (141, 200), ) df = read_fwf_from_df(df, column_specs) return df.rename(mapping=lambda col: DWD_COLUMN_NAMES_MAPPING.get(col, col)).lazy()