From 289880c6886a2bbe6908feb2924aa68f1a7750a8 Mon Sep 17 00:00:00 2001 From: Alice Fage Date: Mon, 8 Jul 2024 13:52:37 +1200 Subject: [PATCH] revert: Pull Requests #993 and #985 (#997) #### Motivation Revert Pull Requests #993 and #985 #985 caused [collection_from_item to fail](https://toitutewhenua.atlassian.net/browse/TDE-1209) and while #985 resolved that issue, it [creates invalid STAC Items](https://toitutewhenua.atlassian.net/browse/TDE-1217). Unit tests and GitHub Actions tests did not pick up these issues. #### Modification Revert commits 384d0fc674ce8fe4f1ff8726362881887a68522f and ed177c622dbb93587f00524635051c5f0f4a8b9f #### Checklist _If not applicable, provide explanation of why._ - [x] Tests updated - [also created a Jira issue to improve testing so we can pick these up in future](https://toitutewhenua.atlassian.net/browse/TDE-1218). - [x] Docs updated - [x] Issue linked in Title --- scripts/files/geotiff.py | 3 +- scripts/stac/imagery/collection.py | 7 +- scripts/stac/imagery/create_stac.py | 5 +- scripts/stac/imagery/item.py | 117 +++++++++--------- scripts/stac/imagery/tests/collection_test.py | 42 +++---- scripts/stac/imagery/tests/item_test.py | 55 ++++---- scripts/standardise_validate.py | 3 +- 7 files changed, 107 insertions(+), 125 deletions(-) diff --git a/scripts/files/geotiff.py b/scripts/files/geotiff.py index a60efd648..ee41ff66f 100644 --- a/scripts/files/geotiff.py +++ b/scripts/files/geotiff.py @@ -1,10 +1,9 @@ from shapely.geometry import Polygon from scripts.gdal.gdalinfo import GdalInfo -from scripts.stac.imagery.item import BoundingBox -def get_extents(gdalinfo_result: GdalInfo) -> tuple[dict[str, list[list[list[float]]]], BoundingBox]: +def get_extents(gdalinfo_result: GdalInfo) -> tuple[dict[str, list[list[list[float]]]], tuple[float, float, float, float]]: """Get the geometry and bounding box from the `gdalinfo`. Args: diff --git a/scripts/stac/imagery/collection.py b/scripts/stac/imagery/collection.py index 486ae19b2..6aa986894 100644 --- a/scripts/stac/imagery/collection.py +++ b/scripts/stac/imagery/collection.py @@ -11,7 +11,6 @@ from scripts.files.fs import write from scripts.json_codec import dict_to_json_bytes from scripts.stac.imagery.capture_area import generate_capture_area, gsd_to_float -from scripts.stac.imagery.item import BoundingBox from scripts.stac.imagery.metadata_constants import ( DATA_CATEGORIES, DEM, @@ -161,7 +160,7 @@ def add_providers(self, providers: list[Provider]) -> None: for p in providers: self.stac["providers"].append(p) - def update_spatial_extent(self, item_bbox: BoundingBox) -> None: + def update_spatial_extent(self, item_bbox: list[float]) -> None: """Update (if needed) the Collection spatial extent from a bounding box. Args: @@ -181,7 +180,7 @@ def update_spatial_extent(self, item_bbox: BoundingBox) -> None: max_x = max(bbox[0], bbox[2], item_bbox[0], item_bbox[2]) max_y = max(bbox[1], bbox[3], item_bbox[1], item_bbox[3]) - self.update_extent(bbox=(min_x, min_y, max_x, max_y)) + self.update_extent(bbox=[min_x, min_y, max_x, max_y]) def update_temporal_extent(self, item_start_datetime: str, item_end_datetime: str) -> None: """Update (if needed) the temporal extent of the collection. @@ -216,7 +215,7 @@ def update_temporal_extent(self, item_start_datetime: str, item_end_datetime: st ] ) - def update_extent(self, bbox: BoundingBox | None = None, interval: list[str] | None = None) -> None: + def update_extent(self, bbox: list[float] | None = None, interval: list[str] | None = None) -> None: """Update an extent of the Collection whereas it's spatial or temporal. Args: diff --git a/scripts/stac/imagery/create_stac.py b/scripts/stac/imagery/create_stac.py index 0c5779afb..f827e192a 100644 --- a/scripts/stac/imagery/create_stac.py +++ b/scripts/stac/imagery/create_stac.py @@ -34,7 +34,10 @@ def create_item( geometry, bbox = get_extents(gdalinfo_result) - item = ImageryItem(id_, file, utc_now, start_datetime, end_datetime, geometry, bbox, collection_id) + item = ImageryItem(id_, file, utc_now) + item.update_datetime(start_datetime, end_datetime) + item.update_spatial(geometry, bbox) + item.add_collection(collection_id) get_log().info("ImageryItem created", path=file) return item diff --git a/scripts/stac/imagery/item.py b/scripts/stac/imagery/item.py index eafea8f8c..a28144ff3 100644 --- a/scripts/stac/imagery/item.py +++ b/scripts/stac/imagery/item.py @@ -1,6 +1,5 @@ import os from collections.abc import Callable -from dataclasses import dataclass from datetime import datetime from typing import Any @@ -11,68 +10,66 @@ from scripts.stac.util.STAC_VERSION import STAC_VERSION from scripts.stac.util.stac_extensions import StacExtensions -BoundingBox = tuple[float, float, float, float] +class ImageryItem: + stac: dict[str, Any] -@dataclass -class Properties: - created: str - updated: str - start_datetime: str - end_datetime: str - datetime: str | None = None - - -@dataclass -class ImageryItem: # pylint: disable-msg=too-many-instance-attributes - feature = "type" - stac_version = STAC_VERSION - id: str - links: list[dict[str, str]] - assets: dict[str, dict[str, str]] - stac_extensions: list[str] - properties: Properties - geometry: dict[str, Any] - bbox: BoundingBox - collection_id: str - - def __init__( # pylint: disable-msg=too-many-arguments - self, - id_: str, - file: str, - now: Callable[[], datetime], - start_datetime: str, - end_datetime: str, - geometry: dict[str, Any], - bbox: BoundingBox, - collection_id: str, - ) -> None: + def __init__(self, id_: str, file: str, now: Callable[[], datetime]) -> None: file_content = fs.read(file) file_modified_datetime = format_rfc_3339_datetime_string(modified(file)) now_string = format_rfc_3339_datetime_string(now()) - self.id = id_ - - self.links = [ - {"rel": "self", "href": f"./{id_}.json", "type": "application/json"}, - {"rel": "collection", "href": "./collection.json", "type": "application/json"}, - {"rel": "parent", "href": "./collection.json", "type": "application/json"}, - ] - self.assets = { - "visual": { - "href": os.path.join(".", os.path.basename(file)), - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "file:checksum": checksum.multihash_as_hex(file_content), - "created": file_modified_datetime, - "updated": file_modified_datetime, - } + self.stac = { + "type": "Feature", + "stac_version": STAC_VERSION, + "id": id_, + "links": [ + {"rel": "self", "href": f"./{id_}.json", "type": "application/json"}, + ], + "assets": { + "visual": { + "href": os.path.join(".", os.path.basename(file)), + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "file:checksum": checksum.multihash_as_hex(file_content), + "created": file_modified_datetime, + "updated": file_modified_datetime, + } + }, + "stac_extensions": [StacExtensions.file.value], + "properties": {"created": now_string, "updated": now_string}, } - self.stac_extensions = [StacExtensions.file.value] - self.properties = Properties( - created=now_string, - updated=now_string, - start_datetime=start_datetime, - end_datetime=end_datetime, - ) - self.geometry = geometry - self.bbox = bbox - self.collection_id = collection_id + + def update_datetime(self, start_datetime: str, end_datetime: str) -> None: + """Update the Item `start_datetime` and `end_datetime` property. + + Args: + start_datetime: a start date in `YYYY-MM-DD` format + end_datetime: a end date in `YYYY-MM-DD` format + """ + self.stac.setdefault("properties", {}) + self.stac["properties"]["start_datetime"] = start_datetime + self.stac["properties"]["end_datetime"] = end_datetime + self.stac["properties"]["datetime"] = None + + # FIXME: redefine the 'Any' + def update_spatial(self, geometry: dict[str, Any], bbox: tuple[float, ...]) -> None: + """Update the `geometry` and `bbox` (bounding box) of the Item. + + Args: + geometry: a geometry + bbox: a bounding box + """ + self.stac["geometry"] = geometry + self.stac["bbox"] = bbox + + def add_collection(self, collection_id: str) -> None: + """Link a Collection to the Item as its `collection` and `parent`. + + Args: + collection_id: the id of the collection to link + """ + self.stac["collection"] = collection_id + self.add_link(rel="collection") + self.add_link(rel="parent") + + def add_link(self, rel: str, href: str = "./collection.json", file_type: str = "application/json") -> None: + self.stac["links"].append({"rel": rel, "href": href, "type": file_type}) diff --git a/scripts/stac/imagery/tests/collection_test.py b/scripts/stac/imagery/tests/collection_test.py index 16270a646..58b7c9271 100644 --- a/scripts/stac/imagery/tests/collection_test.py +++ b/scripts/stac/imagery/tests/collection_test.py @@ -1,10 +1,10 @@ import json import os -from dataclasses import asdict +import tempfile +from collections.abc import Callable, Generator from datetime import datetime, timezone from shutil import rmtree -from tempfile import TemporaryDirectory, mkdtemp -from typing import Callable, Generator +from tempfile import mkdtemp import pytest import shapely.geometry @@ -75,7 +75,7 @@ def test_id_parsed_on_init(metadata: CollectionMetadata) -> None: def test_bbox_updated_from_none(metadata: CollectionMetadata) -> None: collection = ImageryCollection(metadata, any_epoch_datetime) - bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0) + bbox = [1799667.5, 5815977.0, 1800422.5, 5814986.0] collection.update_spatial_extent(bbox) assert collection.stac["extent"]["spatial"]["bbox"] == [bbox] @@ -83,13 +83,13 @@ def test_bbox_updated_from_none(metadata: CollectionMetadata) -> None: def test_bbox_updated_from_existing(metadata: CollectionMetadata) -> None: collection = ImageryCollection(metadata, any_epoch_datetime) # init bbox - bbox = (174.889641, -41.217532, 174.902344, -41.203521) + bbox = [174.889641, -41.217532, 174.902344, -41.203521] collection.update_spatial_extent(bbox) # update bbox - bbox = (174.917643, -41.211157, 174.922965, -41.205490) + bbox = [174.917643, -41.211157, 174.922965, -41.205490] collection.update_spatial_extent(bbox) - assert collection.stac["extent"]["spatial"]["bbox"] == [(174.889641, -41.217532, 174.922965, -41.203521)] + assert collection.stac["extent"]["spatial"]["bbox"] == [[174.889641, -41.217532, 174.922965, -41.203521]] def test_interval_updated_from_none(metadata: CollectionMetadata) -> None: @@ -128,18 +128,18 @@ def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None: item_file_path = "./scripts/tests/data/empty.tiff" modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc) os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) - start_datetime = "2021-01-27T00:00:00Z" - end_datetime = "2021-01-27T00:00:00Z" + item = ImageryItem("BR34_5000_0304", item_file_path, now_function) geometry = { "type": "Polygon", "coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]], } bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0) - item = ImageryItem( - "BR34_5000_0304", item_file_path, now_function, start_datetime, end_datetime, geometry, bbox, collection.stac["id"] - ) + start_datetime = "2021-01-27T00:00:00Z" + end_datetime = "2021-01-27T00:00:00Z" + item.update_spatial(geometry, bbox) + item.update_datetime(start_datetime, end_datetime) - collection.add_item(asdict(item)) + collection.add_item(item.stac) links = collection.stac["links"].copy() @@ -159,19 +159,15 @@ def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None: with subtests.test(): assert collection.stac["extent"]["spatial"]["bbox"] == [bbox] - now_string = format_rfc_3339_datetime_string(now) for property_name in ["created", "updated"]: with subtests.test(msg=f"collection {property_name}"): - assert collection.stac[property_name] == now_string - - with subtests.test(msg=f"item assets.visual.{property_name}"): - assert item.assets["visual"][property_name] == "2001-02-03T04:05:06Z" + assert collection.stac[property_name] == format_rfc_3339_datetime_string(now) - with subtests.test(msg="item properties.created"): - assert item.properties.created == now_string + with subtests.test(msg=f"item properties.{property_name}"): + assert item.stac["properties"][property_name] == format_rfc_3339_datetime_string(now) - with subtests.test(msg="item properties.updated"): - assert item.properties.updated == now_string + with subtests.test(msg=f"item assets.visual.{property_name}"): + assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z" def test_write_collection(metadata: CollectionMetadata) -> None: @@ -283,7 +279,7 @@ def test_capture_area_added(metadata: CollectionMetadata, subtests: SubTests) -> } ) ) - with TemporaryDirectory() as tmp_path: + with tempfile.TemporaryDirectory() as tmp_path: artifact_path = os.path.join(tmp_path, "tmp") collection.add_capture_area(polygons, tmp_path, artifact_path) file_target = os.path.join(tmp_path, file_name) diff --git a/scripts/stac/imagery/tests/item_test.py b/scripts/stac/imagery/tests/item_test.py index 199987629..5ec7a65fc 100644 --- a/scripts/stac/imagery/tests/item_test.py +++ b/scripts/stac/imagery/tests/item_test.py @@ -3,10 +3,9 @@ from pytest_mock import MockerFixture from pytest_subtests import SubTests -from scripts.datetimes import format_rfc_3339_datetime_string from scripts.files.files_helper import get_file_name_from_path from scripts.stac.imagery.collection import ImageryCollection -from scripts.stac.imagery.item import BoundingBox, ImageryItem +from scripts.stac.imagery.item import ImageryItem from scripts.stac.imagery.metadata_constants import CollectionMetadata from scripts.tests.datetimes_test import any_epoch_datetime @@ -25,34 +24,39 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None: start_datetime = "2021-01-27T00:00:00Z" end_datetime = "2021-01-27T00:00:00Z" - item = ImageryItem(id_, path, any_epoch_datetime, start_datetime, end_datetime, geometry, bbox, "any_collection_id") + item = ImageryItem(id_, path, any_epoch_datetime) + item.update_spatial(geometry, bbox) + item.update_datetime(start_datetime, end_datetime) # checks with subtests.test(): - assert item.id == id_ + assert item.stac["id"] == id_ with subtests.test(): - assert item.properties.start_datetime == start_datetime + assert item.stac["properties"]["start_datetime"] == start_datetime with subtests.test(): - assert item.properties.end_datetime == end_datetime + assert item.stac["properties"]["end_datetime"] == end_datetime with subtests.test(): - assert item.properties.datetime is None + assert item.stac["properties"]["datetime"] is None with subtests.test(): - assert item.geometry["coordinates"] == geometry["coordinates"] + assert item.stac["geometry"]["coordinates"] == geometry["coordinates"] with subtests.test(): - assert item.geometry == geometry + assert item.stac["geometry"] == geometry with subtests.test(): - assert item.bbox == bbox + assert item.stac["bbox"] == bbox with subtests.test(): - assert item.assets["visual"]["file:checksum"] == "1220e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + assert ( + item.stac["assets"]["visual"]["file:checksum"] + == "1220e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + ) with subtests.test(): - assert {"rel": "self", "href": f"./{id_}.json", "type": "application/json"} in item.links + assert {"rel": "self", "href": f"./{id_}.json", "type": "application/json"} in item.stac["links"] # pylint: disable=duplicate-code @@ -74,30 +78,15 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) mocker.patch("scripts.files.fs.read", return_value=b"") - item = ImageryItem( - id_, - path, - any_epoch_datetime, - any_epoch_datetime_string(), - any_epoch_datetime_string(), - {}, - any_bounding_box(), - collection.stac["id"], - ) + item = ImageryItem(id_, path, any_epoch_datetime) - with subtests.test(): - assert item.collection_id == ulid + item.add_collection(collection.stac["id"]) with subtests.test(): - assert {"rel": "collection", "href": "./collection.json", "type": "application/json"} in item.links + assert item.stac["collection"] == ulid with subtests.test(): - assert {"rel": "parent", "href": "./collection.json", "type": "application/json"} in item.links - - -def any_bounding_box() -> BoundingBox: - return 1, 2, 3, 4 + assert {"rel": "collection", "href": "./collection.json", "type": "application/json"} in item.stac["links"] - -def any_epoch_datetime_string() -> str: - return format_rfc_3339_datetime_string(any_epoch_datetime()) + with subtests.test(): + assert {"rel": "parent", "href": "./collection.json", "type": "application/json"} in item.stac["links"] diff --git a/scripts/standardise_validate.py b/scripts/standardise_validate.py index bc8d74e4a..e0597215f 100644 --- a/scripts/standardise_validate.py +++ b/scripts/standardise_validate.py @@ -1,7 +1,6 @@ import argparse import os import sys -from dataclasses import asdict from linz_logger import get_log @@ -118,7 +117,7 @@ def main() -> None: item = create_item( file.get_path_standardised(), start_datetime, end_datetime, arguments.collection_id, file.get_gdalinfo() ) - write(stac_item_path, dict_to_json_bytes(asdict(item)), content_type=ContentType.GEOJSON.value) + write(stac_item_path, dict_to_json_bytes(item.stac), content_type=ContentType.GEOJSON.value) get_log().info("stac_saved", path=stac_item_path)