Skip to content

Commit

Permalink
feat: Remove empty TIFFs after standardising TDE-964 (#767)
Browse files Browse the repository at this point in the history
* feat: Remove empty TIFFs after standardising TDE-964

* refactor: Check tile byte counts only in first page
  • Loading branch information
l0b0 authored Jan 9, 2024
1 parent 83f88f9 commit 70d79b3
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 16 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/format-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,9 @@ jobs:
run: |
docker run -v "${HOME}/tmp/:/tmp/" topo-imagery python3 translate_ascii.py --from-file ./tests/data/elevation_ascii.json --target /tmp/
cmp --silent "${HOME}/tmp/elevation_ascii.tiff" ./scripts/tests/data/output/elevation_ascii.tiff
- name: End to end test - Remove empty files
run: |
docker run -v "${HOME}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01
empty_target_directory="$(find "${HOME}/tmp-empty" -maxdepth 0 -type d -empty)"
[[ -n "$empty_target_directory" ]]
21 changes: 19 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ boto3 = "^1.28.70"
linz-logger = "^0.11.0"
py-multihash = "^2.0.1"
shapely = "^2.0.1"
tifffile = "^2023.12.9"

[tool.poetry.dev-dependencies]
black = "^23.11.0"
Expand Down
38 changes: 24 additions & 14 deletions scripts/standardising.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import List, Optional

from linz_logger import get_log
from tifffile import TiffFile

from scripts.aws.aws_helper import is_s3
from scripts.cli.cli_helper import TileFiles
Expand Down Expand Up @@ -55,17 +56,21 @@ def run_standardising(
get_log().info("standardising_start", gdalVersion=gdal_version, fileCount=len(todo))

with Pool(concurrency) as p:
standardized_tiffs = p.map(
partial(
standardising,
preset=preset,
source_epsg=source_epsg,
target_epsg=target_epsg,
target_output=target_output,
cutline=cutline,
),
todo,
)
standardized_tiffs = [
entry
for entry in p.map(
partial(
standardising,
preset=preset,
source_epsg=source_epsg,
target_epsg=target_epsg,
target_output=target_output,
cutline=cutline,
),
todo,
)
if entry is not None
]
p.close()
p.join()

Expand Down Expand Up @@ -100,7 +105,7 @@ def standardising(
target_epsg: str,
cutline: Optional[str],
target_output: str = "/tmp/",
) -> FileTiff:
) -> Optional[FileTiff]:
"""Apply transformations using GDAL to the source file.
Args:
Expand Down Expand Up @@ -189,5 +194,10 @@ def standardising(
# Need GDAL to write to temporary location so no broken files end up in the done folder.
run_gdal(command, input_file=input_file, output_file=standardized_working_path)

write(standardized_file_path, read(standardized_working_path), content_type=ContentType.GEOTIFF.value)
return tiff
with TiffFile(standardized_working_path) as file_handle:
if any(tile_byte_count != 0 for tile_byte_count in file_handle.pages.first.tags["TileByteCounts"].value):
write(standardized_file_path, read(standardized_working_path), content_type=ContentType.GEOTIFF.value)
return tiff

get_log().info("Skipping empty output image", path=input_file, sourceEPSG=source_epsg, targetEPSG=target_epsg)
return None
Empty file added scripts/tests/__init__.py
Empty file.
6 changes: 6 additions & 0 deletions scripts/tests/data/empty.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[
{
"output": "BQ30_5000_1234",
"input": ["./tests/data/empty.tiff"]
}
]
Binary file added scripts/tests/data/empty.tiff
Binary file not shown.

0 comments on commit 70d79b3

Please sign in to comment.