Skip to content

Commit

Permalink
refactor: Convert ImageryItem to a data class TDE-1147 (#985)
Browse files Browse the repository at this point in the history
#### Motivation

The `update_*` methods were only there to make testing slightly more
convenient, but also broke initialisation of the object into multiple
stages. This way the production code is simpler (a single constructor
invocation with no further initialisation), and the tests barely change
to make sure all relevant properties are set.

#### Checklist

- [x] Tests updated
- [ ] Docs updated (N/A)
- [x] Issue linked in Title
  • Loading branch information
l0b0 authored Jun 14, 2024
1 parent b8c289d commit ed177c6
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 112 deletions.
3 changes: 2 additions & 1 deletion scripts/files/geotiff.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from shapely.geometry import Polygon

from scripts.gdal.gdalinfo import GdalInfo
from scripts.stac.imagery.item import BoundingBox


def get_extents(gdalinfo_result: GdalInfo) -> tuple[dict[str, list[list[list[float]]]], tuple[float, float, float, float]]:
def get_extents(gdalinfo_result: GdalInfo) -> tuple[dict[str, list[list[list[float]]]], BoundingBox]:
"""Get the geometry and bounding box from the `gdalinfo`.
Args:
Expand Down
18 changes: 10 additions & 8 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from collections.abc import Callable
from dataclasses import asdict
from datetime import datetime
from typing import Any

Expand All @@ -11,6 +12,7 @@
from scripts.files.fs import write
from scripts.json_codec import dict_to_json_bytes
from scripts.stac.imagery.capture_area import generate_capture_area, gsd_to_float
from scripts.stac.imagery.item import BoundingBox, ImageryItem
from scripts.stac.imagery.metadata_constants import (
DATA_CATEGORIES,
DEM,
Expand Down Expand Up @@ -128,18 +130,18 @@ def add_capture_area(self, polygons: list[BaseGeometry], target: str, artifact_t
if StacExtensions.file.value not in self.stac["stac_extensions"]:
self.stac["stac_extensions"].append(StacExtensions.file.value)

def add_item(self, item: dict[Any, Any]) -> None:
def add_item(self, item: ImageryItem) -> None:
"""Add an `Item` to the `links` of the `Collection`.
Args:
item: STAC Item to add
"""
item_self_link = next((feat for feat in item["links"] if feat["rel"] == "self"), None)
file_checksum = checksum.multihash_as_hex(dict_to_json_bytes(item))
item_self_link = next((feat for feat in item.links if feat["rel"] == "self"), None)
file_checksum = checksum.multihash_as_hex(dict_to_json_bytes(asdict(item)))
if item_self_link:
self.add_link(href=item_self_link["href"], file_checksum=file_checksum)
self.update_temporal_extent(item["properties"]["start_datetime"], item["properties"]["end_datetime"])
self.update_spatial_extent(item["bbox"])
self.update_temporal_extent(item.properties.start_datetime, item.properties.end_datetime)
self.update_spatial_extent(item.bbox)

def add_link(self, href: str, file_checksum: str) -> None:
"""Add a `link` to the existing `links` list of the Collection.
Expand All @@ -160,7 +162,7 @@ def add_providers(self, providers: list[Provider]) -> None:
for p in providers:
self.stac["providers"].append(p)

def update_spatial_extent(self, item_bbox: list[float]) -> None:
def update_spatial_extent(self, item_bbox: BoundingBox) -> None:
"""Update (if needed) the Collection spatial extent from a bounding box.
Args:
Expand All @@ -180,7 +182,7 @@ def update_spatial_extent(self, item_bbox: list[float]) -> None:
max_x = max(bbox[0], bbox[2], item_bbox[0], item_bbox[2])
max_y = max(bbox[1], bbox[3], item_bbox[1], item_bbox[3])

self.update_extent(bbox=[min_x, min_y, max_x, max_y])
self.update_extent(bbox=(min_x, min_y, max_x, max_y))

def update_temporal_extent(self, item_start_datetime: str, item_end_datetime: str) -> None:
"""Update (if needed) the temporal extent of the collection.
Expand Down Expand Up @@ -215,7 +217,7 @@ def update_temporal_extent(self, item_start_datetime: str, item_end_datetime: st
]
)

def update_extent(self, bbox: list[float] | None = None, interval: list[str] | None = None) -> None:
def update_extent(self, bbox: BoundingBox | None = None, interval: list[str] | None = None) -> None:
"""Update an extent of the Collection whereas it's spatial or temporal.
Args:
Expand Down
5 changes: 1 addition & 4 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@ def create_item(

geometry, bbox = get_extents(gdalinfo_result)

item = ImageryItem(id_, file, utc_now)
item.update_datetime(start_datetime, end_datetime)
item.update_spatial(geometry, bbox)
item.add_collection(collection_id)
item = ImageryItem(id_, file, utc_now, start_datetime, end_datetime, geometry, bbox, collection_id)

get_log().info("ImageryItem created", path=file)
return item
117 changes: 60 additions & 57 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from collections.abc import Callable
from dataclasses import dataclass
from datetime import datetime
from typing import Any

Expand All @@ -10,66 +11,68 @@
from scripts.stac.util.STAC_VERSION import STAC_VERSION
from scripts.stac.util.stac_extensions import StacExtensions

BoundingBox = tuple[float, float, float, float]

class ImageryItem:
stac: dict[str, Any]

def __init__(self, id_: str, file: str, now: Callable[[], datetime]) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))
now_string = format_rfc_3339_datetime_string(now())
self.stac = {
"type": "Feature",
"stac_version": STAC_VERSION,
"id": id_,
"links": [
{"rel": "self", "href": f"./{id_}.json", "type": "application/json"},
],
"assets": {
"visual": {
"href": os.path.join(".", os.path.basename(file)),
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
},
"stac_extensions": [StacExtensions.file.value],
"properties": {"created": now_string, "updated": now_string},
}

def update_datetime(self, start_datetime: str, end_datetime: str) -> None:
"""Update the Item `start_datetime` and `end_datetime` property.
Args:
start_datetime: a start date in `YYYY-MM-DD` format
end_datetime: a end date in `YYYY-MM-DD` format
"""
self.stac.setdefault("properties", {})
self.stac["properties"]["start_datetime"] = start_datetime
self.stac["properties"]["end_datetime"] = end_datetime
self.stac["properties"]["datetime"] = None
@dataclass
class Properties:
created: str
updated: str
start_datetime: str
end_datetime: str
datetime: str | None = None

# FIXME: redefine the 'Any'
def update_spatial(self, geometry: dict[str, Any], bbox: tuple[float, ...]) -> None:
"""Update the `geometry` and `bbox` (bounding box) of the Item.

Args:
geometry: a geometry
bbox: a bounding box
"""
self.stac["geometry"] = geometry
self.stac["bbox"] = bbox
@dataclass
class ImageryItem: # pylint: disable-msg=too-many-instance-attributes
feature = "type"
stac_version = STAC_VERSION
id: str
links: list[dict[str, str]]
assets: dict[str, dict[str, str]]
stac_extensions: list[str]
properties: Properties
geometry: dict[str, Any]
bbox: BoundingBox
collection_id: str

def add_collection(self, collection_id: str) -> None:
"""Link a Collection to the Item as its `collection` and `parent`.
Args:
collection_id: the id of the collection to link
"""
self.stac["collection"] = collection_id
self.add_link(rel="collection")
self.add_link(rel="parent")
def __init__( # pylint: disable-msg=too-many-arguments
self,
id_: str,
file: str,
now: Callable[[], datetime],
start_datetime: str,
end_datetime: str,
geometry: dict[str, Any],
bbox: BoundingBox,
collection_id: str,
) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))
now_string = format_rfc_3339_datetime_string(now())
self.id = id_

def add_link(self, rel: str, href: str = "./collection.json", file_type: str = "application/json") -> None:
self.stac["links"].append({"rel": rel, "href": href, "type": file_type})
self.links = [
{"rel": "self", "href": f"./{id_}.json", "type": "application/json"},
{"rel": "collection", "href": "./collection.json", "type": "application/json"},
{"rel": "parent", "href": "./collection.json", "type": "application/json"},
]
self.assets = {
"visual": {
"href": os.path.join(".", os.path.basename(file)),
"type": "image/tiff; application=geotiff; profile=cloud-optimized",
"file:checksum": checksum.multihash_as_hex(file_content),
"created": file_modified_datetime,
"updated": file_modified_datetime,
}
}
self.stac_extensions = [StacExtensions.file.value]
self.properties = Properties(
created=now_string,
updated=now_string,
start_datetime=start_datetime,
end_datetime=end_datetime,
)
self.geometry = geometry
self.bbox = bbox
self.collection_id = collection_id
41 changes: 22 additions & 19 deletions scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import json
import os
import tempfile
from collections.abc import Callable, Generator
from datetime import datetime, timezone
from shutil import rmtree
from tempfile import mkdtemp
from tempfile import TemporaryDirectory, mkdtemp
from typing import Callable, Generator

import pytest
import shapely.geometry
Expand Down Expand Up @@ -75,21 +74,21 @@ def test_id_parsed_on_init(metadata: CollectionMetadata) -> None:

def test_bbox_updated_from_none(metadata: CollectionMetadata) -> None:
collection = ImageryCollection(metadata, any_epoch_datetime)
bbox = [1799667.5, 5815977.0, 1800422.5, 5814986.0]
bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0)
collection.update_spatial_extent(bbox)
assert collection.stac["extent"]["spatial"]["bbox"] == [bbox]


def test_bbox_updated_from_existing(metadata: CollectionMetadata) -> None:
collection = ImageryCollection(metadata, any_epoch_datetime)
# init bbox
bbox = [174.889641, -41.217532, 174.902344, -41.203521]
bbox = (174.889641, -41.217532, 174.902344, -41.203521)
collection.update_spatial_extent(bbox)
# update bbox
bbox = [174.917643, -41.211157, 174.922965, -41.205490]
bbox = (174.917643, -41.211157, 174.922965, -41.205490)
collection.update_spatial_extent(bbox)

assert collection.stac["extent"]["spatial"]["bbox"] == [[174.889641, -41.217532, 174.922965, -41.203521]]
assert collection.stac["extent"]["spatial"]["bbox"] == [(174.889641, -41.217532, 174.922965, -41.203521)]


def test_interval_updated_from_none(metadata: CollectionMetadata) -> None:
Expand Down Expand Up @@ -128,18 +127,18 @@ def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None:
item_file_path = "./scripts/tests/data/empty.tiff"
modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc)
os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
item = ImageryItem("BR34_5000_0304", item_file_path, now_function)
start_datetime = "2021-01-27T00:00:00Z"
end_datetime = "2021-01-27T00:00:00Z"
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
}
bbox = (1799667.5, 5815977.0, 1800422.5, 5814986.0)
start_datetime = "2021-01-27T00:00:00Z"
end_datetime = "2021-01-27T00:00:00Z"
item.update_spatial(geometry, bbox)
item.update_datetime(start_datetime, end_datetime)
item = ImageryItem(
"BR34_5000_0304", item_file_path, now_function, start_datetime, end_datetime, geometry, bbox, collection.stac["id"]
)

collection.add_item(item.stac)
collection.add_item(item)

links = collection.stac["links"].copy()

Expand All @@ -159,15 +158,19 @@ def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None:
with subtests.test():
assert collection.stac["extent"]["spatial"]["bbox"] == [bbox]

now_string = format_rfc_3339_datetime_string(now)
for property_name in ["created", "updated"]:
with subtests.test(msg=f"collection {property_name}"):
assert collection.stac[property_name] == format_rfc_3339_datetime_string(now)

with subtests.test(msg=f"item properties.{property_name}"):
assert item.stac["properties"][property_name] == format_rfc_3339_datetime_string(now)
assert collection.stac[property_name] == now_string

with subtests.test(msg=f"item assets.visual.{property_name}"):
assert item.stac["assets"]["visual"][property_name] == "2001-02-03T04:05:06Z"
assert item.assets["visual"][property_name] == "2001-02-03T04:05:06Z"

with subtests.test(msg="item properties.created"):
assert item.properties.created == now_string

with subtests.test(msg="item properties.updated"):
assert item.properties.updated == now_string


def test_write_collection(metadata: CollectionMetadata) -> None:
Expand Down Expand Up @@ -279,7 +282,7 @@ def test_capture_area_added(metadata: CollectionMetadata, subtests: SubTests) ->
}
)
)
with tempfile.TemporaryDirectory() as tmp_path:
with TemporaryDirectory() as tmp_path:
artifact_path = os.path.join(tmp_path, "tmp")
collection.add_capture_area(polygons, tmp_path, artifact_path)
file_target = os.path.join(tmp_path, file_name)
Expand Down
Loading

0 comments on commit ed177c6

Please sign in to comment.