From 70d79b3d55678fd0a6fc6145005094eab27962b2 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Tue, 9 Jan 2024 13:55:46 +1300 Subject: [PATCH] feat: Remove empty TIFFs after standardising TDE-964 (#767) * feat: Remove empty TIFFs after standardising TDE-964 * refactor: Check tile byte counts only in first page --- .github/workflows/format-tests.yml | 6 +++++ poetry.lock | 21 ++++++++++++++-- pyproject.toml | 1 + scripts/standardising.py | 38 ++++++++++++++++++----------- scripts/tests/__init__.py | 0 scripts/tests/data/empty.json | 6 +++++ scripts/tests/data/empty.tiff | Bin 0 -> 412 bytes 7 files changed, 56 insertions(+), 16 deletions(-) create mode 100644 scripts/tests/__init__.py create mode 100644 scripts/tests/data/empty.json create mode 100644 scripts/tests/data/empty.tiff diff --git a/.github/workflows/format-tests.yml b/.github/workflows/format-tests.yml index 41c545f8c..0a8ca39bb 100644 --- a/.github/workflows/format-tests.yml +++ b/.github/workflows/format-tests.yml @@ -62,3 +62,9 @@ jobs: run: | docker run -v "${HOME}/tmp/:/tmp/" topo-imagery python3 translate_ascii.py --from-file ./tests/data/elevation_ascii.json --target /tmp/ cmp --silent "${HOME}/tmp/elevation_ascii.tiff" ./scripts/tests/data/output/elevation_ascii.tiff + + - name: End to end test - Remove empty files + run: | + docker run -v "${HOME}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 + empty_target_directory="$(find "${HOME}/tmp-empty" -maxdepth 0 -type d -empty)" + [[ -n "$empty_target_directory" ]] diff --git a/poetry.lock b/poetry.lock index 222dfa5e3..24c0f53ba 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "arrow" @@ -1245,6 +1245,23 @@ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib- tests = ["coverage[toml]", "freezegun (>=0.2.8)", "pretend", "pytest (>=6.0)", "pytest-asyncio (>=0.17)", "simplejson"] typing = ["mypy", "rich", "twisted"] +[[package]] +name = "tifffile" +version = "2023.12.9" +description = "Read and write TIFF files" +optional = false +python-versions = ">=3.9" +files = [ + {file = "tifffile-2023.12.9-py3-none-any.whl", hash = "sha256:9b066e4b1a900891ea42ffd33dab8ba34c537935618b9893ddef42d7d422692f"}, + {file = "tifffile-2023.12.9.tar.gz", hash = "sha256:9dd1da91180a6453018a241ff219e1905f169384355cd89c9ef4034c1b46cdb8"}, +] + +[package.dependencies] +numpy = "*" + +[package.extras] +all = ["defusedxml", "fsspec", "imagecodecs (>=2023.8.12)", "lxml", "matplotlib", "zarr"] + [[package]] name = "toml" version = "0.10.2" @@ -1391,4 +1408,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10.6" -content-hash = "86b657da05531f0060c67a9095e2787898ecf226aa7b217fac07cc0f03e6622e" +content-hash = "3094394b0af9bcd26ae16eb23ef09a4a819682822245db488438a041f3c48f65" diff --git a/pyproject.toml b/pyproject.toml index 00a18b2b2..788d6c83e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ boto3 = "^1.28.70" linz-logger = "^0.11.0" py-multihash = "^2.0.1" shapely = "^2.0.1" +tifffile = "^2023.12.9" [tool.poetry.dev-dependencies] black = "^23.11.0" diff --git a/scripts/standardising.py b/scripts/standardising.py index 2f52d2e6c..68ccc10ed 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -5,6 +5,7 @@ from typing import List, Optional from linz_logger import get_log +from tifffile import TiffFile from scripts.aws.aws_helper import is_s3 from scripts.cli.cli_helper import TileFiles @@ -55,17 +56,21 @@ def run_standardising( get_log().info("standardising_start", gdalVersion=gdal_version, fileCount=len(todo)) with Pool(concurrency) as p: - standardized_tiffs = p.map( - partial( - standardising, - preset=preset, - source_epsg=source_epsg, - target_epsg=target_epsg, - target_output=target_output, - cutline=cutline, - ), - todo, - ) + standardized_tiffs = [ + entry + for entry in p.map( + partial( + standardising, + preset=preset, + source_epsg=source_epsg, + target_epsg=target_epsg, + target_output=target_output, + cutline=cutline, + ), + todo, + ) + if entry is not None + ] p.close() p.join() @@ -100,7 +105,7 @@ def standardising( target_epsg: str, cutline: Optional[str], target_output: str = "/tmp/", -) -> FileTiff: +) -> Optional[FileTiff]: """Apply transformations using GDAL to the source file. Args: @@ -189,5 +194,10 @@ def standardising( # Need GDAL to write to temporary location so no broken files end up in the done folder. run_gdal(command, input_file=input_file, output_file=standardized_working_path) - write(standardized_file_path, read(standardized_working_path), content_type=ContentType.GEOTIFF.value) - return tiff + with TiffFile(standardized_working_path) as file_handle: + if any(tile_byte_count != 0 for tile_byte_count in file_handle.pages.first.tags["TileByteCounts"].value): + write(standardized_file_path, read(standardized_working_path), content_type=ContentType.GEOTIFF.value) + return tiff + + get_log().info("Skipping empty output image", path=input_file, sourceEPSG=source_epsg, targetEPSG=target_epsg) + return None diff --git a/scripts/tests/__init__.py b/scripts/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/scripts/tests/data/empty.json b/scripts/tests/data/empty.json new file mode 100644 index 000000000..6f246a42a --- /dev/null +++ b/scripts/tests/data/empty.json @@ -0,0 +1,6 @@ +[ + { + "output": "BQ30_5000_1234", + "input": ["./tests/data/empty.tiff"] + } +] diff --git a/scripts/tests/data/empty.tiff b/scripts/tests/data/empty.tiff new file mode 100644 index 0000000000000000000000000000000000000000..f3498a17043c197172c3b7e49a6a0673355c3604 GIT binary patch literal 412 zcmebD)MDUZU|@|mD)kSb;*HEc+1Q6#nu659#N1{vy%#CBn1 z0owyKlED>;9gL)xubGE|87O}Xh?`n?7}$XHCm>$m&di_yq?LeXZfs{_umZA^fb7O4 zObjeQwvnkRkc0t02M7yB!^E-E4(qKPK{5|Egn(%fn-T0%b_S5)AU4psV48_#W4i%J zo?~M>CxZb48!&(v8TynNctG?-4i+%2sq7c!?qXzMV4$F{;Fnsi5S5yklbDyH5K@$w cS6r4_RGg~dn_85dSdw3)08&;1Q(waX0JR?`v;Y7A literal 0 HcmV?d00001