Skip to content

Commit

Permalink
feat: update existing Collection (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
paulfouquet committed Dec 19, 2024
1 parent 0232744 commit de61f27
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 20 deletions.
53 changes: 45 additions & 8 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os
from typing import Any

Expand Down Expand Up @@ -92,6 +93,30 @@ def __init__(

self.add_providers(merge_provider_roles(providers))

@classmethod
def from_file(cls, file_name: str, metadata: CollectionMetadata, updated_datetime: str) -> "ImageryCollection":
"""Load an ImageryCollection from a Collection file.
Args:
file_name: The s3 URL or local path of the Collection file to load.
Returns:
The loaded ImageryCollection.
"""
file_content = read(file_name)
stac_from_file = json.loads(file_content.decode("UTF-8"))
stac_from_file["updated"] = updated_datetime
collection = cls(
metadata=metadata,
created_datetime=stac_from_file["created"],
updated_datetime=stac_from_file["updated"],
linz_slug=stac_from_file["linz:slug"],
)
# Override STAC from the original collection
collection.stac = stac_from_file

return collection

def add_capture_area(self, polygons: list[BaseGeometry], target: str, artifact_target: str = "/tmp") -> None:
"""Add the capture area of the Collection.
The `href` or path of the capture-area.geojson is always set as the relative `./capture-area.geojson`
Expand Down Expand Up @@ -165,14 +190,26 @@ def add_item(self, item: dict[Any, Any]) -> None:
"""
item_self_link = next((feat for feat in item["links"] if feat["rel"] == "self"), None)
if item_self_link:
self.stac["links"].append(
Link(
path=item_self_link["href"],
rel=Relation.ITEM,
media_type=StacMediaType.GEOJSON,
file_content=dict_to_json_bytes(item),
).stac
)
link_to_add = Link(
path=item_self_link["href"],
rel=Relation.ITEM,
media_type=StacMediaType.GEOJSON,
file_content=dict_to_json_bytes(item),
).stac

# Check if the Item to add already exists in the collection
exist = False
for link in self.stac["links"]:
if link["href"] == link_to_add["href"]:
if link["file:checksum"] == link_to_add["file:checksum"]:
exist = True
break
# If the item has been updated, remove the old link
self.stac["links"].remove(link)
break

if not exist:
self.stac["links"].append(link_to_add)
self.update_temporal_extent(item["properties"]["start_datetime"], item["properties"]["end_datetime"])
self.update_spatial_extent(item["bbox"])

Expand Down
25 changes: 13 additions & 12 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,20 @@ def create_collection(
Returns:
an ImageryCollection object
"""
existing_collection = {}
if odr_url:
existing_collection = get_published_file_contents(odr_url, "collection")

collection = ImageryCollection(
metadata=collection_metadata,
created_datetime=cast(str, existing_collection.get("created", current_datetime)),
updated_datetime=current_datetime,
linz_slug=linz_slug,
collection_id=collection_id,
providers=get_providers(licensors, producers),
add_title_suffix=add_title_suffix,
)
collection = ImageryCollection.from_file(
os.path.join(odr_url, "collection.json"), collection_metadata, current_datetime
)
else:
collection = ImageryCollection(
metadata=collection_metadata,
created_datetime=current_datetime,
updated_datetime=current_datetime,
linz_slug=linz_slug,
collection_id=collection_id,
providers=get_providers(licensors, producers),
add_title_suffix=add_title_suffix,
)

for item in stac_items:
collection.add_item(item)
Expand Down
80 changes: 80 additions & 0 deletions scripts/stac/imagery/tests/create_stac_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ def test_create_collection_resupply(
"type": "Collection",
"stac_version": STAC_VERSION,
"id": collection_id,
"linz:slug": fake_linz_slug,
"created": created_datetime_string,
"updated": created_datetime_string,
}
Expand Down Expand Up @@ -287,6 +288,85 @@ def test_create_collection_resupply(
assert collection.stac["updated"] == updated_datetime_string


def test_create_collection_resupply_add_items(
fake_collection_metadata: CollectionMetadata, fake_linz_slug: str, subtests: SubTests, tmp_path: Path
) -> None:
collection_id = "test_collection"
created_datetime = any_epoch_datetime()
created_datetime_string = format_rfc_3339_datetime_string(created_datetime)
existing_item_link = {
"rel": "item",
"href": "./item_a.json",
"type": "application/geo+json",
"file:checksum": "1220559708896b75aebab2bbadbc184f6b9ce22708adbb725e93bf3f08a38d2bc71e",
}

existing_collection_content = {
"type": "Collection",
"stac_version": STAC_VERSION,
"id": collection_id,
"linz:slug": fake_linz_slug,
"links": [
{
"rel": "root",
"href": "https://nz-imagery.s3.ap-southeast-2.amazonaws.com/catalog.json",
"type": "application/json",
},
{"rel": "self", "href": "./collection.json", "type": "application/json"},
existing_item_link,
],
"created": created_datetime_string,
"updated": created_datetime_string,
}
existing_collection_path = tmp_path / "collection.json"
existing_collection_path.write_text(json.dumps(existing_collection_content))

item_to_add = {
"type": "Feature",
"id": "item_b",
"links": [
{"href": "./item_b.json", "rel": "self", "type": "application/geo+json"},
{"href": "./collection.json", "rel": "collection", "type": "application/json"},
{"href": "./collection.json", "rel": "parent", "type": "application/json"},
],
"properties": {"start_datetime": "2024-09-02T12:00:00Z", "end_datetime": "2024-09-02T12:00:00Z"},
"bbox": [171.8256487, -34.3559317, 172.090076, -34.0291036],
}

item_to_add_link = {
"rel": "item",
"href": "./item_b.json",
"type": "application/geo+json",
"file:checksum": "12203040c94dda3807c4430b312e9b400604188a639f22cc8067136084662fc2618d",
}

updated_datetime_string = format_rfc_3339_datetime_string(created_datetime + timedelta(days=1))

collection = create_collection(
collection_id=collection_id,
linz_slug=fake_linz_slug,
collection_metadata=fake_collection_metadata,
current_datetime=updated_datetime_string,
producers=[],
licensors=[],
stac_items=[item_to_add],
item_polygons=[],
add_capture_dates=False,
uri="test",
odr_url=tmp_path.as_posix(),
)

with subtests.test("created datetime"):
assert collection.stac["created"] == existing_collection_content["created"]

with subtests.test("updated datetime"):
assert collection.stac["updated"] == updated_datetime_string

with subtests.test("links"):
assert item_to_add_link in collection.stac["links"]
assert existing_item_link in collection.stac["links"]


def test_create_item_with_odr_url(tmp_path: Path) -> None:
item_name = "empty"
existing_item_file = tmp_path / f"{item_name}.json"
Expand Down

0 comments on commit de61f27

Please sign in to comment.