diff --git a/CHANGELOG.md b/CHANGELOG.md index ff6bf170..7ce4a817 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,8 @@ and start a new "In Progress" section above it. ## In progress - Better argument validation in `resample_spatial`/`resample_cube_spatial` (related to [Open-EO/openeo-python-client#690](https://github.com/Open-EO/openeo-python-client/issues/690)) +- Improve `resample_spatial`/`resample_cube_spatial` metadata tracking in dry-run ([#348](https://github.com/Open-EO/openeo-python-driver/issues/348)) + ## 0.123.0 diff --git a/openeo_driver/dry_run.py b/openeo_driver/dry_run.py index c3321a5f..cdc46b37 100644 --- a/openeo_driver/dry_run.py +++ b/openeo_driver/dry_run.py @@ -48,11 +48,13 @@ DimensionAlreadyExistsException, SpatialDimension, TemporalDimension, + CubeMetadata, ) from pyproj import CRS from shapely.geometry import GeometryCollection, MultiPolygon, Point, Polygon from shapely.geometry.base import BaseGeometry +from openeo.utils.normalize import normalize_resample_resolution from openeo_driver import filter_properties from openeo_driver.datacube import DriverDataCube, DriverVectorCube from openeo_driver.datastructs import ResolutionMergeArgs, SarBackscatterArgs @@ -427,7 +429,7 @@ def get_source_constraints(self, merge=True) -> List[SourceConstraint]: metadata: CollectionMetadata = target.metadata spatial_dim = metadata.spatial_dimensions[0] # TODO: derive resolution from openeo:gsd instead (see openeo-geopyspark-driver) - resolutions = [dim.step for dim in metadata.spatial_dimensions if dim.step is not None] + resolutions = tuple(dim.step for dim in metadata.spatial_dimensions if dim.step is not None) if len(resolutions) > 0 and spatial_dim.crs is not None: constraints["resample"] = { "target_crs": spatial_dim.crs, @@ -436,9 +438,7 @@ def get_source_constraints(self, merge=True) -> List[SourceConstraint]: } args = resampling_op.get_arguments_by_operation("resample_spatial") if args: - resolution = args[0]["resolution"] - if not isinstance(resolution, list): - resolution = [resolution, resolution] + resolution = normalize_resample_resolution(args[0]["resolution"]) projection = args[0]["projection"] method = args[0].get("method", "near") constraints["resample"] = {"target_crs": projection, "resolution": resolution, "method": method} @@ -530,12 +530,12 @@ class DryRunDataCube(DriverDataCube): estimate memory/cpu usage, ... """ - def __init__(self, traces: List[DataTraceBase], data_tracer: DryRunDataTracer, metadata: CollectionMetadata = None): + def __init__(self, traces: List[DataTraceBase], data_tracer: DryRunDataTracer, metadata: CubeMetadata = None): super(DryRunDataCube, self).__init__(metadata=metadata) self._traces = traces or [] self._data_tracer = data_tracer - def _process(self, operation, arguments, metadata: CollectionMetadata = None) -> "DryRunDataCube": + def _process(self, operation, arguments, metadata: CubeMetadata = None) -> "DryRunDataCube": """Helper to handle single-cube operations""" # New data cube with operation added to each trace traces = self._data_tracer.process_traces(traces=self._traces, operation=operation, arguments=arguments) @@ -691,14 +691,30 @@ def raster_to_vector(self): def run_udf(self): return self._process(operation="run_udf", arguments={}) + def resample_spatial( + self, + resolution: Union[float, Tuple[float, float]], + projection: Union[int, str] = None, + method: str = "near", + align: str = "upper-left", + ): + return self._process( + "resample_spatial", + arguments={"resolution": resolution, "projection": projection, "method": method, "align": align}, + metadata=(self.metadata or CubeMetadata()).resample_spatial(resolution=resolution, projection=projection), + ) + def resample_cube_spatial(self, target: "DryRunDataCube", method: str = "near") -> "DryRunDataCube": cube = self._process("process_type", [ProcessType.FOCAL_SPACE]) cube = cube._process("resample_cube_spatial", arguments={"target": target, "method": method}) + if target.metadata: + metadata = (self.metadata or CubeMetadata()).resample_cube_spatial(target=target.metadata) + else: + metadata = None return DryRunDataCube( traces=cube._traces + target._traces, data_tracer=self._data_tracer, - # TODO: properly merge (other) metadata? - metadata=self.metadata, + metadata=metadata, ) def reduce_dimension( @@ -763,18 +779,6 @@ def sar_backscatter(self, args: SarBackscatterArgs) -> "DryRunDataCube": def resolution_merge(self, args: ResolutionMergeArgs) -> "DryRunDataCube": return self._process("resolution_merge", args) - def resample_spatial( - self, - resolution: Union[float, Tuple[float, float]], - projection: Union[int, str] = None, - method: str = "near", - align: str = "upper-left", - ): - return self._process( - "resample_spatial", - arguments={"resolution": resolution, "projection": projection, "method": method, "align": align}, - ) - def apply_kernel(self, kernel: numpy.ndarray, factor=1, border=0, replace_invalid=0) -> "DriverDataCube": cube = self._process("process_type", [ProcessType.FOCAL_SPACE]) cube = cube._process("pixel_buffer", arguments={"buffer_size": [x / 2.0 for x in kernel.shape]}) diff --git a/setup.py b/setup.py index 04e3cadb..2704274f 100644 --- a/setup.py +++ b/setup.py @@ -53,7 +53,7 @@ "flask>=2.0.0", "werkzeug>=3.0.3", # https://github.com/Open-EO/openeo-python-driver/issues/243 "requests>=2.28.0", - "openeo>=0.33.0", + "openeo>=0.38.0.a2.dev", "openeo_processes==0.0.4", # 0.0.4 is special build/release, also see https://github.com/Open-EO/openeo-python-driver/issues/152 "gunicorn>=20.0.1", "numpy>=1.22.0", diff --git a/tests/test_dry_run.py b/tests/test_dry_run.py index bd365d69..79507d1e 100644 --- a/tests/test_dry_run.py +++ b/tests/test_dry_run.py @@ -5,6 +5,7 @@ import pytest import shapely.geometry from openeo.internal.graph_building import PGNode +from openeo.metadata import SpatialDimension from openeo.rest.datacube import DataCube from openeo_driver.datacube import DriverVectorCube @@ -287,7 +288,7 @@ def test_evaluate_graph_diamond(dry_run_env, dry_run_tracer): ("load_collection", ("S2_FOOBAR", ())), { "bands": ["grass"], - "resample": {"method": "near", "resolution": [10, 10], "target_crs": CRS_UTM}, + "resample": {"method": "near", "resolution": (10, 10), "target_crs": CRS_UTM}, "spatial_extent": {"west": 1, "east": 2, "south": 51, "north": 52, "crs": "EPSG:4326"}, }, ), @@ -961,7 +962,7 @@ def test_multiple_filter_spatial(dry_run_env, dry_run_tracer): "west": 166021.44308054057, }, "filter_spatial": {"geometries": DummyVectorCube.from_geometry(shapely.geometry.shape(polygon1))}, - "resample": {"method": "near", "resolution": [0.25, 0.25], "target_crs": 4326}, + "resample": {"method": "near", "resolution": (0.25, 0.25), "target_crs": 4326}, "weak_spatial_extent": { "crs": "EPSG:32631", "east": 1056748.2872412915, @@ -1068,7 +1069,7 @@ def test_resample_filter_spatial(dry_run_env, dry_run_tracer): "west": 166021.44308054057, }, "filter_spatial": {"geometries": DummyVectorCube.from_geometry(shapely.geometry.shape(polygon))}, - "resample": {"method": "near", "resolution": [0.25, 0.25], "target_crs": 4326}, + "resample": {"method": "near", "resolution": (0.25, 0.25), "target_crs": 4326}, "weak_spatial_extent": { "crs": "EPSG:32631", "east": 1056748.2872412915, @@ -1807,7 +1808,7 @@ def test_filter_after_merge_cubes(dry_run_env, dry_run_tracer): { "bands": ["ndvi"], "process_type": [ProcessType.FOCAL_SPACE], - "resample": {"method": "average", "resolution": [10, 10], "target_crs": CRS_UTM}, + "resample": {"method": "average", "resolution": (10, 10), "target_crs": CRS_UTM}, "spatial_extent": { "crs": "EPSG:32631", "east": 642140.0, @@ -2385,11 +2386,11 @@ def test_complex_extract_load_stac(dry_run_env, dry_run_tracer): assert loadparams.bands == ["SCL"] assert loadparams.global_extent == expected_extent assert loadparams.pixel_buffer == [38.5, 38.5] - assert loadparams.target_resolution == None + assert loadparams.target_resolution is None loadparams = _extract_load_parameters(dry_run_env, source_id_stac) assert loadparams.global_extent == expected_extent - assert loadparams.target_resolution == [10, 10] + assert loadparams.target_resolution == (10, 10) def test_normalize_geometries(dry_run_env, dry_run_tracer): @@ -2447,3 +2448,41 @@ def test_normalize_geometries(dry_run_env, dry_run_tracer): "south": 5654910, "west": 89920, } + + +def test_resample_cube_spatial_from_resampled_target(dry_run_env, dry_run_tracer): + """ + Use `resample_cube_spatial` with a target that is result from a separate `resample_spatial` + """ + target = DataCube.load_collection("SENTINEL1_GRD", connection=None) + target = target.resample_spatial(resolution=(3, 5), projection=32631) + assert target.metadata.spatial_dimensions == [ + SpatialDimension(name="x", extent=[None, None], crs=32631, step=3), + SpatialDimension(name="y", extent=[None, None], crs=32631, step=5), + ] + + cube = DataCube.load_collection("S2_FOOBAR", connection=None) + assert cube.metadata is None + cube = cube.resample_cube_spatial(target=target) + assert cube.metadata.spatial_dimensions == [ + SpatialDimension(name="x", extent=[None, None], crs=32631, step=3), + SpatialDimension(name="y", extent=[None, None], crs=32631, step=5), + ] + + pg = cube.flat_graph() + _ = evaluate(pg, env=dry_run_env, do_dry_run=False) + + source_constraints = dry_run_tracer.get_source_constraints(merge=True) + assert source_constraints == [ + ( + ("load_collection", ("S2_FOOBAR", ())), + { + "process_type": [ProcessType.FOCAL_SPACE], + "resample": {"method": "near", "resolution": (3, 5), "target_crs": 32631}, + }, + ), + ( + ("load_collection", ("SENTINEL1_GRD", ())), + {"resample": {"method": "near", "resolution": (3, 5), "target_crs": 32631}}, + ), + ] diff --git a/tests/test_views_execute.py b/tests/test_views_execute.py index 11bbec6a..ae44d661 100644 --- a/tests/test_views_execute.py +++ b/tests/test_views_execute.py @@ -644,7 +644,7 @@ def test_execute_resample_and_merge_cubes(api): dummy = dummy_backend.get_collection("S2_FAPAR_CLOUDCOVER") last_load_collection_call = dummy_backend.last_load_collection_call("S2_FAPAR_CLOUDCOVER") assert last_load_collection_call.target_crs == CRS_UTM - assert last_load_collection_call.target_resolution == [10, 10] + assert last_load_collection_call.target_resolution == (10, 10) assert dummy.merge_cubes.call_count == 1 assert dummy.resample_cube_spatial.call_count == 1 assert dummy.resample_cube_spatial.call_args.kwargs["method"] == "cubic"