Skip to content

Commit

Permalink
Issue #348 Improve "resample_spatial" metadata tracking in dry-run
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Jan 23, 2025
1 parent 983ca80 commit 7116e40
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 28 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ and start a new "In Progress" section above it.
## In progress

- Better argument validation in `resample_spatial`/`resample_cube_spatial` (related to [Open-EO/openeo-python-client#690](https://github.com/Open-EO/openeo-python-client/issues/690))
- Improve `resample_spatial`/`resample_cube_spatial` metadata tracking in dry-run ([#348](https://github.com/Open-EO/openeo-python-driver/issues/348))


## 0.123.0

Expand Down
44 changes: 24 additions & 20 deletions openeo_driver/dry_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,13 @@
DimensionAlreadyExistsException,
SpatialDimension,
TemporalDimension,
CubeMetadata,
)
from pyproj import CRS
from shapely.geometry import GeometryCollection, MultiPolygon, Point, Polygon
from shapely.geometry.base import BaseGeometry

from openeo.utils.normalize import normalize_resample_resolution
from openeo_driver import filter_properties
from openeo_driver.datacube import DriverDataCube, DriverVectorCube
from openeo_driver.datastructs import ResolutionMergeArgs, SarBackscatterArgs
Expand Down Expand Up @@ -427,7 +429,7 @@ def get_source_constraints(self, merge=True) -> List[SourceConstraint]:
metadata: CollectionMetadata = target.metadata
spatial_dim = metadata.spatial_dimensions[0]
# TODO: derive resolution from openeo:gsd instead (see openeo-geopyspark-driver)
resolutions = [dim.step for dim in metadata.spatial_dimensions if dim.step is not None]
resolutions = tuple(dim.step for dim in metadata.spatial_dimensions if dim.step is not None)
if len(resolutions) > 0 and spatial_dim.crs is not None:
constraints["resample"] = {
"target_crs": spatial_dim.crs,
Expand All @@ -436,9 +438,7 @@ def get_source_constraints(self, merge=True) -> List[SourceConstraint]:
}
args = resampling_op.get_arguments_by_operation("resample_spatial")
if args:
resolution = args[0]["resolution"]
if not isinstance(resolution, list):
resolution = [resolution, resolution]
resolution = normalize_resample_resolution(args[0]["resolution"])
projection = args[0]["projection"]
method = args[0].get("method", "near")
constraints["resample"] = {"target_crs": projection, "resolution": resolution, "method": method}
Expand Down Expand Up @@ -530,12 +530,12 @@ class DryRunDataCube(DriverDataCube):
estimate memory/cpu usage, ...
"""

def __init__(self, traces: List[DataTraceBase], data_tracer: DryRunDataTracer, metadata: CollectionMetadata = None):
def __init__(self, traces: List[DataTraceBase], data_tracer: DryRunDataTracer, metadata: CubeMetadata = None):
super(DryRunDataCube, self).__init__(metadata=metadata)
self._traces = traces or []
self._data_tracer = data_tracer

def _process(self, operation, arguments, metadata: CollectionMetadata = None) -> "DryRunDataCube":
def _process(self, operation, arguments, metadata: CubeMetadata = None) -> "DryRunDataCube":
"""Helper to handle single-cube operations"""
# New data cube with operation added to each trace
traces = self._data_tracer.process_traces(traces=self._traces, operation=operation, arguments=arguments)
Expand Down Expand Up @@ -691,14 +691,30 @@ def raster_to_vector(self):
def run_udf(self):
return self._process(operation="run_udf", arguments={})

def resample_spatial(
self,
resolution: Union[float, Tuple[float, float]],
projection: Union[int, str] = None,
method: str = "near",
align: str = "upper-left",
):
return self._process(
"resample_spatial",
arguments={"resolution": resolution, "projection": projection, "method": method, "align": align},
metadata=(self.metadata or CubeMetadata()).resample_spatial(resolution=resolution, projection=projection),
)

def resample_cube_spatial(self, target: "DryRunDataCube", method: str = "near") -> "DryRunDataCube":
cube = self._process("process_type", [ProcessType.FOCAL_SPACE])
cube = cube._process("resample_cube_spatial", arguments={"target": target, "method": method})
if target.metadata:
metadata = (self.metadata or CubeMetadata()).resample_cube_spatial(target=target.metadata)
else:
metadata = None
return DryRunDataCube(
traces=cube._traces + target._traces,
data_tracer=self._data_tracer,
# TODO: properly merge (other) metadata?
metadata=self.metadata,
metadata=metadata,
)

def reduce_dimension(
Expand Down Expand Up @@ -763,18 +779,6 @@ def sar_backscatter(self, args: SarBackscatterArgs) -> "DryRunDataCube":
def resolution_merge(self, args: ResolutionMergeArgs) -> "DryRunDataCube":
return self._process("resolution_merge", args)

def resample_spatial(
self,
resolution: Union[float, Tuple[float, float]],
projection: Union[int, str] = None,
method: str = "near",
align: str = "upper-left",
):
return self._process(
"resample_spatial",
arguments={"resolution": resolution, "projection": projection, "method": method, "align": align},
)

def apply_kernel(self, kernel: numpy.ndarray, factor=1, border=0, replace_invalid=0) -> "DriverDataCube":
cube = self._process("process_type", [ProcessType.FOCAL_SPACE])
cube = cube._process("pixel_buffer", arguments={"buffer_size": [x / 2.0 for x in kernel.shape]})
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
"flask>=2.0.0",
"werkzeug>=3.0.3", # https://github.com/Open-EO/openeo-python-driver/issues/243
"requests>=2.28.0",
"openeo>=0.33.0",
"openeo>=0.38.0.a2.dev",
"openeo_processes==0.0.4", # 0.0.4 is special build/release, also see https://github.com/Open-EO/openeo-python-driver/issues/152
"gunicorn>=20.0.1",
"numpy>=1.22.0",
Expand Down
51 changes: 45 additions & 6 deletions tests/test_dry_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest
import shapely.geometry
from openeo.internal.graph_building import PGNode
from openeo.metadata import SpatialDimension
from openeo.rest.datacube import DataCube

from openeo_driver.datacube import DriverVectorCube
Expand Down Expand Up @@ -287,7 +288,7 @@ def test_evaluate_graph_diamond(dry_run_env, dry_run_tracer):
("load_collection", ("S2_FOOBAR", ())),
{
"bands": ["grass"],
"resample": {"method": "near", "resolution": [10, 10], "target_crs": CRS_UTM},
"resample": {"method": "near", "resolution": (10, 10), "target_crs": CRS_UTM},
"spatial_extent": {"west": 1, "east": 2, "south": 51, "north": 52, "crs": "EPSG:4326"},
},
),
Expand Down Expand Up @@ -961,7 +962,7 @@ def test_multiple_filter_spatial(dry_run_env, dry_run_tracer):
"west": 166021.44308054057,
},
"filter_spatial": {"geometries": DummyVectorCube.from_geometry(shapely.geometry.shape(polygon1))},
"resample": {"method": "near", "resolution": [0.25, 0.25], "target_crs": 4326},
"resample": {"method": "near", "resolution": (0.25, 0.25), "target_crs": 4326},
"weak_spatial_extent": {
"crs": "EPSG:32631",
"east": 1056748.2872412915,
Expand Down Expand Up @@ -1068,7 +1069,7 @@ def test_resample_filter_spatial(dry_run_env, dry_run_tracer):
"west": 166021.44308054057,
},
"filter_spatial": {"geometries": DummyVectorCube.from_geometry(shapely.geometry.shape(polygon))},
"resample": {"method": "near", "resolution": [0.25, 0.25], "target_crs": 4326},
"resample": {"method": "near", "resolution": (0.25, 0.25), "target_crs": 4326},
"weak_spatial_extent": {
"crs": "EPSG:32631",
"east": 1056748.2872412915,
Expand Down Expand Up @@ -1807,7 +1808,7 @@ def test_filter_after_merge_cubes(dry_run_env, dry_run_tracer):
{
"bands": ["ndvi"],
"process_type": [ProcessType.FOCAL_SPACE],
"resample": {"method": "average", "resolution": [10, 10], "target_crs": CRS_UTM},
"resample": {"method": "average", "resolution": (10, 10), "target_crs": CRS_UTM},
"spatial_extent": {
"crs": "EPSG:32631",
"east": 642140.0,
Expand Down Expand Up @@ -2385,11 +2386,11 @@ def test_complex_extract_load_stac(dry_run_env, dry_run_tracer):
assert loadparams.bands == ["SCL"]
assert loadparams.global_extent == expected_extent
assert loadparams.pixel_buffer == [38.5, 38.5]
assert loadparams.target_resolution == None
assert loadparams.target_resolution is None

loadparams = _extract_load_parameters(dry_run_env, source_id_stac)
assert loadparams.global_extent == expected_extent
assert loadparams.target_resolution == [10, 10]
assert loadparams.target_resolution == (10, 10)


def test_normalize_geometries(dry_run_env, dry_run_tracer):
Expand Down Expand Up @@ -2447,3 +2448,41 @@ def test_normalize_geometries(dry_run_env, dry_run_tracer):
"south": 5654910,
"west": 89920,
}


def test_resample_cube_spatial_from_resampled_target(dry_run_env, dry_run_tracer):
"""
Use `resample_cube_spatial` with a target that is result from a separate `resample_spatial`
"""
target = DataCube.load_collection("SENTINEL1_GRD", connection=None)
target = target.resample_spatial(resolution=(3, 5), projection=32631)
assert target.metadata.spatial_dimensions == [
SpatialDimension(name="x", extent=[None, None], crs=32631, step=3),
SpatialDimension(name="y", extent=[None, None], crs=32631, step=5),
]

cube = DataCube.load_collection("S2_FOOBAR", connection=None)
assert cube.metadata is None
cube = cube.resample_cube_spatial(target=target)
assert cube.metadata.spatial_dimensions == [
SpatialDimension(name="x", extent=[None, None], crs=32631, step=3),
SpatialDimension(name="y", extent=[None, None], crs=32631, step=5),
]

pg = cube.flat_graph()
_ = evaluate(pg, env=dry_run_env, do_dry_run=False)

source_constraints = dry_run_tracer.get_source_constraints(merge=True)
assert source_constraints == [
(
("load_collection", ("S2_FOOBAR", ())),
{
"process_type": [ProcessType.FOCAL_SPACE],
"resample": {"method": "near", "resolution": (3, 5), "target_crs": 32631},
},
),
(
("load_collection", ("SENTINEL1_GRD", ())),
{"resample": {"method": "near", "resolution": (3, 5), "target_crs": 32631}},
),
]
2 changes: 1 addition & 1 deletion tests/test_views_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@ def test_execute_resample_and_merge_cubes(api):
dummy = dummy_backend.get_collection("S2_FAPAR_CLOUDCOVER")
last_load_collection_call = dummy_backend.last_load_collection_call("S2_FAPAR_CLOUDCOVER")
assert last_load_collection_call.target_crs == CRS_UTM
assert last_load_collection_call.target_resolution == [10, 10]
assert last_load_collection_call.target_resolution == (10, 10)
assert dummy.merge_cubes.call_count == 1
assert dummy.resample_cube_spatial.call_count == 1
assert dummy.resample_cube_spatial.call_args.kwargs["method"] == "cubic"
Expand Down

0 comments on commit 7116e40

Please sign in to comment.