From 6b3c539f0019e9620909e15e71ba97ba9b656aa6 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Wed, 10 May 2023 17:28:29 +0200 Subject: [PATCH] Issue #425 initial `Connection.load_stac` --- CHANGELOG.md | 2 + openeo/rest/connection.py | 139 +++++++++++++++++++++++++++++++++- openeo/rest/job.py | 2 + tests/rest/conftest.py | 11 +++ tests/rest/test_connection.py | 32 ++++++++ 5 files changed, 182 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e544cea4..85a3da60a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Show progress bar while waiting for OIDC authentication with device code flow, including special mode for in Jupyter notebooks. ([#237](https://github.com/Open-EO/openeo-python-client/issues/237)) +- Basic support for `load_stac` process with `Connection.load_stac()` + ([#425](https://github.com/Open-EO/openeo-python-client/issues/)) ### Changed diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 307916ffd..8d5cc32d4 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -1036,10 +1036,141 @@ def load_result( TemporalDimension(name='t', extent=[]), BandDimension(name="bands", bands=[Band("unknown")]), ]) - cube = self.datacube_from_process(process_id="load_result", id=id, - **dict_no_none(spatial_extent=spatial_extent, - temporal_extent=temporal_extent and DataCube._get_temporal_extent( - temporal_extent), bands=bands)) + cube = self.datacube_from_process( + process_id="load_result", + id=id, + **dict_no_none( + spatial_extent=spatial_extent, + temporal_extent=temporal_extent and DataCube._get_temporal_extent(temporal_extent), + bands=bands, + ), + ) + cube.metadata = metadata + return cube + + def load_stac( + self, + url: str, + spatial_extent: Optional[Dict[str, float]] = None, + temporal_extent: Optional[List[Union[str, datetime.datetime, datetime.date]]] = None, + bands: Optional[List[str]] = None, + properties: Optional[dict] = None, + ) -> DataCube: + """ + Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`. + A batch job result can be loaded by providing a reference to it. + + If supported by the underlying metadata and file format, the data that is added to the data cube can be + restricted with the parameters ``spatial_extent``, ``temporal_extent`` and ``bands``. + If no data is available for the given extents, a ``NoDataAvailable`` error is thrown. + + Remarks: + + * The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as + specified in the metadata if the ``bands`` parameter is set to ``null``. + * If no additional parameter is specified this would imply that the whole data set is expected to be loaded. + Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only + load the data that is actually required after evaluating subsequent processes such as filters. + This means that the values should be processed only after the data has been limited to the required extent + and as a consequence also to a manageable size. + + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) + or a specific STAC API Collection that allows to filter items and to download assets. + This includes batch job results, which itself are compliant to STAC. + For external URLs, authentication details such as API keys or tokens may need to be included in the URL. + + Batch job results can be specified in two ways: + + - For Batch job results at the same back-end, a URL pointing to the corresponding batch job results + endpoint should be provided. The URL usually ends with ``/jobs/{id}/results`` and ``{id}`` + is the corresponding batch job ID. + - For external results, a signed URL must be provided. Not all back-ends support signed URLs, + which are provided as a link with the link relation `canonical` in the batch job result metadata. + :param spatial_extent: + Limits the data to load to the specified bounding box or polygons. + + For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects + with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + + For vector data, the process loads the geometry into the data cube if the geometry is fully within the + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). + Empty geometries may only be in the data cube if no spatial extent has been provided. + + The GeoJSON can be one of the following feature types: + + * A ``Polygon`` or ``MultiPolygon`` geometry, + * a ``Feature`` with a ``Polygon`` or ``MultiPolygon`` geometry, or + * a ``FeatureCollection`` containing at least one ``Feature`` with ``Polygon`` or ``MultiPolygon`` geometries. + + Set this parameter to ``None`` to set no limit for the spatial extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. + + :param temporal_extent: + Limits the data to load to the specified left-closed temporal interval. + Applies to all temporal dimensions. + The interval has to be specified as an array with exactly two elements: + + 1. The first element is the start of the temporal interval. + The specified instance in time is **included** in the interval. + 2. The second element is the end of the temporal interval. + The specified instance in time is **excluded** from the interval. + + The second element must always be greater/later than the first element. + Otherwise, a `TemporalExtentEmpty` exception is thrown. + + Also supports open intervals by setting one of the boundaries to ``None``, but never both. + + Set this parameter to ``None`` to set no limit for the temporal extent. + Be careful with this when loading large datasets. It is recommended to use this parameter instead of + using ``filter_temporal()`` directly after loading unbounded data. + + :param bands: + Only adds the specified bands into the data cube so that bands that don't match the list + of band names are not available. Applies to all dimensions of type `bands`. + + Either the unique band name (metadata field ``name`` in bands) or one of the common band names + (metadata field ``common_name`` in bands) can be specified. + If the unique band name and the common name conflict, the unique band name has a higher priority. + + The order of the specified array defines the order of the bands in the data cube. + If multiple bands match a common name, all matched bands are included in the original order. + + It is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data. + + :param properties: + Limits the data by metadata properties to include only data in the data cube which + all given conditions return ``True`` for (AND operation). + + Specify key-value-pairs with the key being the name of the metadata property, + which can be retrieved with the openEO Data Discovery for Collections. + The value must be a condition (user-defined process) to be evaluated against a STAC API. + This parameter is not supported for static STAC. + + .. versionadded:: 0.17.0 + """ + # TODO: detect actual metadata from URL + metadata = CollectionMetadata( + {}, + dimensions=[ + SpatialDimension(name="x", extent=[]), + SpatialDimension(name="y", extent=[]), + TemporalDimension(name="t", extent=[]), + BandDimension(name="bands", bands=[Band("unknown")]), + ], + ) + arguments = {"url": url} + # TODO: more normalization/validation of extent/band parameters and `properties` + if spatial_extent: + arguments["spatial_extent"] = spatial_extent + if temporal_extent: + arguments["temporal_extent"] = DataCube._get_temporal_extent(temporal_extent) + if bands: + arguments["bands"] = bands + if properties: + arguments["properties"] = properties + cube = self.datacube_from_process(process_id="load_stac", **arguments) cube.metadata = metadata return cube diff --git a/openeo/rest/job.py b/openeo/rest/job.py index 0273f64bd..71d6b5262 100644 --- a/openeo/rest/job.py +++ b/openeo/rest/job.py @@ -32,6 +32,8 @@ class BatchJob: """ + # TODO #425 method to bootstrap `load_stac` directly from a BatchJob object + def __init__(self, job_id: str, connection: 'Connection'): self.job_id = job_id """Unique identifier of the batch job (string).""" diff --git a/tests/rest/conftest.py b/tests/rest/conftest.py index b9f542e6a..ffe27ba20 100644 --- a/tests/rest/conftest.py +++ b/tests/rest/conftest.py @@ -8,6 +8,10 @@ import pytest import time_machine +from openeo.rest.connection import Connection + +API_URL = "https://oeo.test/" + @pytest.fixture(params=["1.0.0"]) def api_version(request): @@ -65,3 +69,10 @@ def assert_oidc_device_code_flow(url: str = "https://oidc.test/dc", elapsed: flo assert time_machine.coordinates.time() - start >= elapsed return assert_oidc_device_code_flow + + +@pytest.fixture +def con120(requests_mock): + requests_mock.get(API_URL, json={"api_version": "1.2.0"}) + con = Connection(API_URL) + return con diff --git a/tests/rest/test_connection.py b/tests/rest/test_connection.py index 13732e1ca..13fc5b969 100644 --- a/tests/rest/test_connection.py +++ b/tests/rest/test_connection.py @@ -1846,6 +1846,38 @@ def test_load_result_filters(requests_mock): } +class TestLoadStac: + def test_basic(self, con120): + cube = con120.load_stac("https://provide.test/dataset") + assert cube.flat_graph() == { + "loadstac1": { + "process_id": "load_stac", + "arguments": {"url": "https://provide.test/dataset"}, + "result": True, + } + } + + def test_extents(self, con120): + cube = con120.load_stac( + "https://provide.test/dataset", + spatial_extent={"west": 1, "south": 2, "east": 3, "north": 4}, + temporal_extent=["2023-05-10", "2023-06-01"], + bands=["B02", "B03"], + ) + assert cube.flat_graph() == { + "loadstac1": { + "process_id": "load_stac", + "arguments": { + "url": "https://provide.test/dataset", + "spatial_extent": {"east": 3, "north": 4, "south": 2, "west": 1}, + "temporal_extent": ["2023-05-10", "2023-06-01"], + "bands": ["B02", "B03"], + }, + "result": True, + } + } + + def test_list_file_formats(requests_mock): requests_mock.get(API_URL, json={"api_version": "1.0.0"}) conn = Connection(API_URL)