Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip: includeDerived TDE-1251 #1039

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions scripts/cli/cli_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@ class InputParameterError(Exception):

class TileFiles(NamedTuple):
output: str
""" The tile name of the output file that will be created """
inputs: list[str]
""" The list of input files to be used to create the output file """
includeDerived: bool = False
""" Whether the STAC Item should include the derived_from links """


def get_tile_files(source: str) -> list[TileFiles]:
Expand All @@ -34,11 +38,14 @@ def get_tile_files(source: str) -> list[TileFiles]:

Example:
>>> get_tile_files('[{"output": "CE16_5000_1001", "input": ["s3://bucket/SN9457_CE16_10k_0501.tif"]}]')
[TileFiles(output='CE16_5000_1001', inputs=['s3://bucket/SN9457_CE16_10k_0501.tif'])]
[TileFiles(output='CE16_5000_1001', inputs=['s3://bucket/SN9457_CE16_10k_0501.tif'], includeDerived=False)]
"""
try:
source_json: list[TileFiles] = json.loads(
source, object_hook=lambda d: TileFiles(inputs=d["input"], output=d["output"])
source,
object_hook=lambda d: TileFiles(
inputs=d["input"], output=d["output"], includeDerived=d.get("includeDerived", False)
),
)
except (json.decoder.JSONDecodeError, KeyError) as e:
get_log().error(type(e).__name__, error=str(e))
Expand Down
19 changes: 19 additions & 0 deletions scripts/cli/tests/cli_helper_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,29 @@ def test_get_tile_files(subtests: SubTests) -> None:
with subtests.test():
assert expected_input_filenames == source[0].inputs

with subtests.test():
assert source[0].includeDerived is False

with subtests.test():
assert expected_output_filename_b == source[1].output


def test_get_tile_files_with_include_derived(subtests: SubTests) -> None:
file_source = '[{"output": "tile_name","input": ["file_a.tiff", "file_b.tiff"], "includeDerived": true}]'
expected_output_filename = "tile_name"
expected_input_filenames = ["file_a.tiff", "file_b.tiff"]

source: list[TileFiles] = get_tile_files(file_source)
with subtests.test():
assert expected_output_filename == source[0].output

with subtests.test():
assert expected_input_filenames == source[0].inputs

with subtests.test():
assert source[0].includeDerived is True


def test_parse_list() -> None:
str_list = "Auckland Council; Toitū Te Whenua Land Information New Zealand;Nelson Council;"
list_parsed = parse_list(str_list)
Expand Down
15 changes: 14 additions & 1 deletion scripts/files/file_tiff.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
from decimal import Decimal
from enum import Enum
from typing import Annotated, Any
Expand Down Expand Up @@ -30,6 +31,7 @@ def __init__(
self,
paths: list[str],
preset: str | None = None,
include_derived: bool = False,
) -> None:
paths_original = []
for p in paths:
Expand All @@ -39,6 +41,9 @@ def __init__(
paths_original.append(unquote(p))

self._paths_original = paths_original
self._derived_from = []
if include_derived:
self._derived_from = [f"{os.path.splitext(path)[0]}.json" for path in paths_original]
self._path_standardised = ""
self._errors: list[dict[str, Any]] = []
self._gdalinfo: GdalInfo | None = None
Expand Down Expand Up @@ -150,14 +155,22 @@ def get_errors(self) -> list[dict[str, Any]]:
return self._errors

def get_paths_original(self) -> list[str]:
"""Get the path(es) of the original (non standardised) file.
"""Get the path(s) of the original (non standardised) file.
It can be a list of path if the standardised file is a retiled image.

Returns:
a list of file path
"""
return self._paths_original

def get_derived_from(self) -> list[str]:
"""Get the path(s) of the STAC Items associated to the original TIFF files.

Returns:
a list of STAC Item file path
"""
return self._derived_from

def get_path_standardised(self) -> str:
"""Get the path of the standardised file.

Expand Down
6 changes: 6 additions & 0 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def create_item(
start_datetime: str,
end_datetime: str,
collection_id: str,
derived_from: list[str],
gdalinfo_result: GdalInfo | None = None,
) -> ImageryItem:
"""Create an ImageryItem (STAC) to be linked to a Collection.
Expand All @@ -22,6 +23,7 @@ def create_item(
start_datetime: start date of the survey
end_datetime: end date of the survey
collection_id: collection id to link to the Item
derived_from: list of STAC Items from where this Item is derived
gdalinfo_result: result of the gdalinfo command. Defaults to None.

Returns:
Expand All @@ -39,5 +41,9 @@ def create_item(
item.update_spatial(geometry, bbox)
item.add_collection(collection_id)

for derived in derived_from:
# TODO: add checksum and maybe created datetime and updated datetime
item.add_link(rel="derived_from", href=derived)

get_log().info("ImageryItem created", path=file)
return item
7 changes: 6 additions & 1 deletion scripts/standardise_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,12 @@ def main() -> None:

# Create STAC and save in target
item = create_item(
file.get_path_standardised(), start_datetime, end_datetime, arguments.collection_id, file.get_gdalinfo()
file.get_path_standardised(),
start_datetime,
end_datetime,
arguments.collection_id,
file.get_derived_from(),
file.get_gdalinfo(),
)
write(stac_item_path, dict_to_json_bytes(item.stac), content_type=ContentType.GEOJSON.value)
get_log().info("stac_saved", path=stac_item_path)
Expand Down
2 changes: 1 addition & 1 deletion scripts/standardising.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def standardising(
footprint_file_name = files.output + SUFFIX_FOOTPRINT
standardized_file_path = os.path.join(target_output, standardized_file_name)
footprint_file_path = os.path.join(target_output, footprint_file_name)
tiff = FileTiff(files.inputs, preset)
tiff = FileTiff(files.inputs, preset, files.includeDerived)
tiff.set_path_standardised(standardized_file_path)

# Already proccessed can skip processing
Expand Down
11 changes: 11 additions & 0 deletions scripts/tests/data/national-dem_aws.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[
{
"output": "BR20",
"input": [
"s3://nz-elevation/west-coast/west-coast_2020-2022/dem_1m/2193/BR20_10000_0401.tiff",
"s3://nz-elevation/west-coast/west-coast_2020-2022/dem_1m/2193/BR20_10000_0402.tiff",
"s3://nz-elevation/west-coast/west-coast_2020-2022/dem_1m/2193/BR20_10000_0403.tiff"
],
"includeDerived": true
}
]
Loading