diff --git a/src/CSET/operators/_utils.py b/src/CSET/operators/_utils.py index 0682dd804..bdae972a7 100644 --- a/src/CSET/operators/_utils.py +++ b/src/CSET/operators/_utils.py @@ -1,4 +1,4 @@ -# © Crown copyright, Met Office (2022-2024) and CSET contributors. +# © Crown copyright, Met Office (2022-2025) and CSET contributors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -189,3 +189,87 @@ def fully_equalise_attributes(cubes: iris.cube.CubeList): logging.debug("Removed attributes from coordinate %s: %s", coord, removed) return cubes + + +def ensure_aggregatable_across_cases( + cube: iris.cube.Cube | iris.cube.CubeList, +) -> iris.cube.Cube: + """Ensure a Cube or CubeList can be aggregated across multiple cases. + + Arguments + --------- + cube: iris.cube.Cube | iris.cube.CubeList + If a Cube is provided a sub-operator is called to determine if the + cube has the necessary dimensional coordinates to be aggregateable. + These necessary coordinates are 'forecast_period' and + 'forecast_reference_time'.If a CubeList is provided a Cube is created + by slicing over all time coordinates and the resulting list is merged + to create an aggregatable cube. + + Returns + ------- + cube: iris.cube.Cube + A time aggregatable cube with dimension coordinates including + 'forecast_period' and 'forecast_reference_time'. + + Raises + ------ + ValueError + If a Cube is provided and it is not aggregatable a ValueError is + raised. The user should then provide a CubeList to be turned into an + aggregatable cube to allow aggregation across multiple cases to occur. + """ + + def is_time_aggregatable(cube: iris.cube.Cube) -> bool: + """Determine whether a cube can be aggregated in time. + + If a cube is aggregatable it will contain both a 'forecast_reference_time' + and 'forecast_period' coordinate as dimensional coordinates. + + Arguments + --------- + cube: iris.cube.Cube + An iris cube which will be checked to see if it is aggregatable based + on a set of pre-defined dimensional time coordinates: + 'forecast_period' and 'forecast_reference_time'. + + Returns + ------- + bool + If true, then the cube is aggregatable and contains dimensional + coordinates including both 'forecast_reference_time' and + 'forecast_period'. + """ + # Acceptable time coordinate names for aggregatable cube. + TEMPORAL_COORD_NAMES = ["forecast_period", "forecast_reference_time"] + + # Coordinate names for the cube. + coord_names = [coord.name() for coord in cube.coords(dim_coords=True)] + + # Check which temporal coordinates we have. + temporal_coords = [ + coord for coord in coord_names if coord in TEMPORAL_COORD_NAMES + ] + # Return whether both coordinates are in the temporal coordinates. + return len(temporal_coords) == 2 + + # Check to see if a cube is input and if that cube is iterable. + if isinstance(cube, iris.cube.Cube): + if is_time_aggregatable(cube): + return cube + else: + raise ValueError( + "Single Cube should have 'forecast_period' and" + "'forecast_reference_time' dimensional coordinates. " + "To make a time aggregatable Cube input a CubeList." + ) + # Create an aggregatable cube from the provided CubeList. + else: + new_cube_list = iris.cube.CubeList() + for sub_cube in cube: + for cube_slice in sub_cube.slices_over( + ["forecast_period", "forecast_reference_time"] + ): + new_cube_list.append(cube_slice) + new_merged_cube = new_cube_list.merge_cube() + return new_merged_cube diff --git a/src/CSET/operators/collapse.py b/src/CSET/operators/collapse.py index 71bb03a8c..75a4566ef 100644 --- a/src/CSET/operators/collapse.py +++ b/src/CSET/operators/collapse.py @@ -21,6 +21,8 @@ import iris.coord_categorisation import iris.cube +from CSET.operators._utils import ensure_aggregatable_across_cases + def collapse( cube: iris.cube.Cube, @@ -76,6 +78,53 @@ def collapse( return collapsed_cube +def collapse_by_lead_time( + cube: iris.cube.Cube | iris.cube.CubeList, + method: str, + additional_percent: float = None, + **kwargs, +) -> iris.cube.Cube: + """Collapse a cube around lead time for multiple cases. + + First checks if the data can be aggregated by lead time easily. Then + collapses by lead time for a specified method using the collapse function. + + Arguments + --------- + cube: iris.cube.Cube | iris.cube.CubeList + Cube to collapse by lead time or CubeList that will be converted + to a cube before collapsing by lead time. + method: str + Type of collapse i.e. method: 'MEAN', 'MAX', 'MIN', 'MEDIAN', + 'PERCENTILE'. For 'PERCENTILE' the additional_percent must be specified. + + Returns + ------- + cube: iris.cube.Cube + Single variable collapsed by lead time based on chosen method. + + Raises + ------ + ValueError + If additional_percent wasn't supplied while using PERCENTILE method. + """ + if method == "PERCENTILE" and additional_percent is None: + raise ValueError("Must specify additional_percent") + # Ensure the cube can be aggregated over multiple cases. + cube_to_collapse = ensure_aggregatable_across_cases(cube) + # Collapse by lead time. + if method == "PERCENTILE": + collapsed_cube = collapse( + cube_to_collapse, + "forecast_period", + method, + additional_percent=additional_percent, + ) + else: + collapsed_cube = collapse(cube_to_collapse, "forecast_period", method) + return collapsed_cube + + def collapse_by_hour_of_day( cube: iris.cube.Cube, method: str, diff --git a/tests/operators/test_collapse.py b/tests/operators/test_collapse.py index 7b28660a2..dcd7f528f 100644 --- a/tests/operators/test_collapse.py +++ b/tests/operators/test_collapse.py @@ -16,6 +16,7 @@ import iris import iris.cube +import numpy as np import pytest from CSET.operators import collapse @@ -29,6 +30,22 @@ def long_forecast() -> iris.cube.Cube: ) +@pytest.fixture() +def long_forecast_multi_day() -> iris.cube.Cube: + """Get long_forecast_multi_day to run tests on.""" + return iris.load_cube( + "tests/test_data/long_forecast_air_temp_multi_day.nc", "air_temperature" + ) + + +@pytest.fixture() +def long_forecast_many_cubes() -> iris.cube.Cube: + """Get long_forecast_may_cubes to run tests on.""" + return iris.load( + "tests/test_data/long_forecast_air_temp_fcst_*.nc", "air_temperature" + ) + + def test_collapse(cube): """Reduces dimension of cube.""" # Test collapsing a single coordinate. @@ -79,3 +96,67 @@ def test_collapse_by_hour_of_day_percentile(long_forecast): ) expected_cube = "" assert repr(collapsed_cube) == expected_cube + + +def test_collapse_by_lead_time_single_cube(long_forecast_multi_day): + """Check cube collapse by lead time.""" + calculated_cube = collapse.collapse( + long_forecast_multi_day, "forecast_period", "MEAN" + ) + assert np.allclose( + calculated_cube.data, + collapse.collapse_by_lead_time(long_forecast_multi_day, "MEAN").data, + rtol=1e-06, + atol=1e-02, + ) + + +def test_collapse_by_lead_time_cube_list( + long_forecast_multi_day, long_forecast_many_cubes +): + """Check CubeList is made into an aggregatable cube and collapses by lead time.""" + calculated_cube = collapse.collapse( + long_forecast_multi_day, "forecast_period", "MEAN" + ) + assert np.allclose( + calculated_cube.data, + collapse.collapse_by_lead_time(long_forecast_many_cubes, "MEAN").data, + rtol=1e-06, + atol=1e-02, + ) + + +def test_collapse_by_lead_time_single_cube_percentile(long_forecast_multi_day): + """Check Cube collapse by lead time with percentiles.""" + calculated_cube = collapse.collapse( + long_forecast_multi_day, "forecast_period", "PERCENTILE", additional_percent=75 + ) + with pytest.raises(ValueError): + collapse.collapse_by_lead_time(long_forecast_multi_day, "PERCENTILE") + assert np.allclose( + calculated_cube.data, + collapse.collapse_by_lead_time( + long_forecast_multi_day, "PERCENTILE", additional_percent=75 + ).data, + rtol=1e-06, + atol=1e-02, + ) + + +def test_collapse_by_lead_time_cube_list_percentile( + long_forecast_multi_day, long_forecast_many_cubes +): + """Check CubeList is made into an aggregatable cube and collapses by lead time with percentiles.""" + calculated_cube = collapse.collapse( + long_forecast_multi_day, "forecast_period", "PERCENTILE", additional_percent=75 + ) + with pytest.raises(ValueError): + collapse.collapse_by_lead_time(long_forecast_many_cubes, "PERCENTILE") + assert np.allclose( + calculated_cube.data, + collapse.collapse_by_lead_time( + long_forecast_many_cubes, "PERCENTILE", additional_percent=75 + ).data, + rtol=1e-06, + atol=1e-02, + ) diff --git a/tests/operators/test_utils.py b/tests/operators/test_utils.py index 3ccb8db9d..2a4820cac 100644 --- a/tests/operators/test_utils.py +++ b/tests/operators/test_utils.py @@ -1,4 +1,4 @@ -# © Crown copyright, Met Office (2022-2024) and CSET contributors. +# © Crown copyright, Met Office (2022-2025) and CSET contributors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,11 +17,36 @@ import iris import iris.coords import iris.cube +import numpy as np import pytest import CSET.operators._utils as operator_utils +@pytest.fixture() +def long_forecast() -> iris.cube.Cube: + """Get long_forecast to run tests on.""" + return iris.load_cube( + "tests/test_data/long_forecast_air_temp_fcst_1.nc", "air_temperature" + ) + + +@pytest.fixture() +def long_forecast_multi_day() -> iris.cube.Cube: + """Get long_forecast_multi_day to run tests on.""" + return iris.load_cube( + "tests/test_data/long_forecast_air_temp_multi_day.nc", "air_temperature" + ) + + +@pytest.fixture() +def long_forecast_many_cubes() -> iris.cube.Cube: + """Get long_forecast_may_cubes to run tests on.""" + return iris.load( + "tests/test_data/long_forecast_air_temp_fcst_*.nc", "air_temperature" + ) + + def test_missing_coord_get_cube_yxcoordname_x(regrid_rectilinear_cube): """Missing X coordinate raises error.""" regrid_rectilinear_cube.remove_coord("grid_longitude") @@ -137,3 +162,44 @@ def test_fully_equalise_attributes_equalise_coords(): fixed_cubes = operator_utils.fully_equalise_attributes([c1, c2]) for cube in fixed_cubes: assert "shared_attribute" not in cube.coord("foo").attributes + + +def test_ensure_aggregatable_across_cases_true_aggregateable_cube( + long_forecast_multi_day, +): + """Check that an aggregatable cube is returned with no changes.""" + assert np.allclose( + operator_utils.ensure_aggregatable_across_cases(long_forecast_multi_day).data, + long_forecast_multi_day.data, + rtol=1e-06, + atol=1e-02, + ) + + +def test_ensure_aggregatable_across_cases_false_aggregateable_cube(long_forecast): + """Check that a non-aggregatable cube raises an error.""" + with pytest.raises(ValueError): + operator_utils.ensure_aggregatable_across_cases(long_forecast) + + +def test_ensure_aggregatable_across_cases_cubelist( + long_forecast_many_cubes, long_forecast_multi_day +): + """Check that a CubeList turns into an aggregatable Cube.""" + # Check output is a Cube. + output_data = operator_utils.ensure_aggregatable_across_cases( + long_forecast_many_cubes + ) + assert isinstance(output_data, iris.cube.Cube) + # Check output can be aggregated in time. + assert isinstance( + operator_utils.ensure_aggregatable_across_cases(output_data), iris.cube.Cube + ) + # Check output is identical to a pre-calculated cube. + pre_calculated_data = long_forecast_multi_day + assert np.allclose( + pre_calculated_data.data, + output_data.data, + rtol=1e-06, + atol=1e-02, + ) diff --git a/tests/test_data/long_forecast_air_temp_fcst_2.nc b/tests/test_data/long_forecast_air_temp_fcst_2.nc new file mode 100644 index 000000000..f96123241 Binary files /dev/null and b/tests/test_data/long_forecast_air_temp_fcst_2.nc differ diff --git a/tests/test_data/long_forecast_air_temp_fcst_3.nc b/tests/test_data/long_forecast_air_temp_fcst_3.nc new file mode 100644 index 000000000..96b73bc32 Binary files /dev/null and b/tests/test_data/long_forecast_air_temp_fcst_3.nc differ diff --git a/tests/test_data/long_forecast_air_temp_multi_day.nc b/tests/test_data/long_forecast_air_temp_multi_day.nc new file mode 100644 index 000000000..86f4c08c1 Binary files /dev/null and b/tests/test_data/long_forecast_air_temp_multi_day.nc differ