From e2343d5f1959e71d3634f134cd7d61c1447ab261 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Tue, 21 Nov 2023 10:00:30 +1300 Subject: [PATCH 01/15] feat: Pin actions to hashes (#729) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Done with pin-github-action 1.8.0 using `npx pin-github-action .github/workflows/*.yml`. Dependabot should support updating in the same fashion . Had to `export GH_ADMIN_TOKEN=github_pat_…` using a fine-grained personal access tokens with no extra access to work around rate limiting *and* to be able to work in private repos . --- .github/workflows/containers.yml | 16 ++++++++-------- .github/workflows/format-tests.yml | 4 ++-- .github/workflows/release-please.yml | 18 +++++++++--------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml index 8409abaa6..7f5612a0c 100644 --- a/.github/workflows/containers.yml +++ b/.github/workflows/containers.yml @@ -17,7 +17,7 @@ jobs: AWS_CI_ROLE: ${{ secrets.AWS_CI_ROLE }} steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3 with: fetch-depth: 0 @@ -30,11 +30,11 @@ jobs: { echo "version=${GIT_VERSION}"; echo "version_major=${GIT_VERSION_MAJOR}"; echo "version_major_minor=${GIT_VERSION_MAJOR_MINOR}"; } >> "$GITHUB_OUTPUT" - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@885d1462b80bc1c1c7f0b00334ad271f09369c55 # v2 - name: Docker meta id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175 # v4 with: images: ${{ github.repository }} labels: | @@ -42,7 +42,7 @@ jobs: - name: Login to GitHub Container Registry if: ${{(github.ref == 'refs/heads/master') && !(startsWith(github.event.head_commit.message, 'release:'))}} - uses: docker/login-action@v2 + uses: docker/login-action@465a07811f14bebb1938fbed4728c6a1ff8901fc # v2 with: registry: ghcr.io username: ${{ github.repository_owner }} @@ -50,7 +50,7 @@ jobs: - name: Configure AWS Credentials if: ${{env.AWS_CI_ROLE != '' && (github.ref == 'refs/heads/master') && !(startsWith(github.event.head_commit.message, 'release:'))}} - uses: aws-actions/configure-aws-credentials@v2 + uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2 with: aws-region: ap-southeast-2 mask-aws-account-id: true @@ -59,11 +59,11 @@ jobs: - name: Login to Amazon ECR if: ${{env.AWS_CI_ROLE != '' && (github.ref == 'refs/heads/master') && !(startsWith(github.event.head_commit.message, 'release:'))}} id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 + uses: aws-actions/amazon-ecr-login@2fc7aceee09e9e4a7105c0d060c656fad0b4f63d # v1 - name: Setup docker tags id: tags - uses: actions/github-script@v6 + uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6 with: result-encoding: string script: | @@ -77,7 +77,7 @@ jobs: return tags.join(', ') - name: Build and push container - uses: docker/build-push-action@v4 + uses: docker/build-push-action@0a97817b6ade9f46837855d676c4cca3a2471fc9 # v4 with: context: . tags: ${{ steps.tags.outputs.result }} diff --git a/.github/workflows/format-tests.yml b/.github/workflows/format-tests.yml index 1d7a8f4dc..41c545f8c 100644 --- a/.github/workflows/format-tests.yml +++ b/.github/workflows/format-tests.yml @@ -5,10 +5,10 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3 - name: Use Python "3.10.6" - uses: actions/setup-python@v4 + uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4 with: python-version: "3.10.6" - name: Install diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 120d8b27c..c50e70aa9 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -13,7 +13,7 @@ jobs: outputs: release_created: ${{ steps.release.outputs.release_created }} steps: - - uses: google-github-actions/release-please-action@v3 + - uses: google-github-actions/release-please-action@db8f2c60ee802b3748b512940dde88eabd7b7e01 # v3 id: release with: release-type: python @@ -32,7 +32,7 @@ jobs: if: ${{ needs.release-please.outputs.release_created }} steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3 with: fetch-depth: 0 @@ -45,25 +45,25 @@ jobs: { echo "version=${GIT_VERSION}"; echo "version_major=${GIT_VERSION_MAJOR}"; echo "version_major_minor=${GIT_VERSION_MAJOR_MINOR}"; } >> "$GITHUB_OUTPUT" - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@885d1462b80bc1c1c7f0b00334ad271f09369c55 # v2 - name: Docker meta id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175 # v4 with: images: ${{ github.repository }} labels: | org.opencontainers.image.version=${{ steps.version.outputs.version }} - name: Login to GitHub Container Registry - uses: docker/login-action@v2 + uses: docker/login-action@465a07811f14bebb1938fbed4728c6a1ff8901fc # v2 with: registry: ghcr.io username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v2 + uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2 if: ${{env.AWS_CI_ROLE != ''}} with: aws-region: ap-southeast-2 @@ -72,12 +72,12 @@ jobs: - name: Login to Amazon ECR id: login-ecr - uses: aws-actions/amazon-ecr-login@v1 + uses: aws-actions/amazon-ecr-login@2fc7aceee09e9e4a7105c0d060c656fad0b4f63d # v1 if: ${{env.AWS_CI_ROLE != ''}} - name: Setup docker tags id: tags - uses: actions/github-script@v6 + uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6 with: result-encoding: string script: | @@ -95,7 +95,7 @@ jobs: return tags.join(', ') - name: Build and push container - uses: docker/build-push-action@v4 + uses: docker/build-push-action@0a97817b6ade9f46837855d676c4cca3a2471fc9 # v4 with: context: . tags: ${{ steps.tags.outputs.result }} From 52cc21ee977056b273d1867a673efcce7aad5f4e Mon Sep 17 00:00:00 2001 From: paulfouquet <86932794+paulfouquet@users.noreply.github.com> Date: Wed, 22 Nov 2023 15:47:56 +1300 Subject: [PATCH 02/15] build: upgrade GDAL to 3.8.0 TDE-829 (#732) * build: upgrade GDAL to 3.8.0 TDE-829 * test: update output file * test: remove --silent flag to debug * test: update test file --- Dockerfile | 2 +- scripts/standardising.py | 6 ------ .../tests/data/output/BK39_10000_0101.tiff | Bin 3257 -> 3693 bytes .../tests/data/output/BK39_10000_0102.tiff | Bin 4821 -> 4885 bytes 4 files changed, 1 insertion(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 05b3642ea..90c51549e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/osgeo/gdal:ubuntu-small-3.7.2 +FROM ghcr.io/osgeo/gdal:ubuntu-small-3.8.0 RUN apt-get update # Install pip diff --git a/scripts/standardising.py b/scripts/standardising.py index 6ca2c09b0..c20182bb3 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -186,12 +186,6 @@ def standardising( max_x = min_x + output_bounds.size.width command.extend(["-co", f"TARGET_SRS=EPSG:{target_epsg}", "-co", f"EXTENT={min_x},{min_y},{max_x},{max_y}"]) - # Workaround https://github.com/OSGeo/gdal/issues/8169 - if len(source_tiffs) > 1: - no_stats_file_path = os.path.join(tmp_path, files.output + "_no_stats.tiff") - run_gdal(command, input_file=input_file, output_file=no_stats_file_path) - input_file = no_stats_file_path - # Need GDAL to write to temporary location so no broken files end up in the done folder. run_gdal(command, input_file=input_file, output_file=standardized_working_path) diff --git a/scripts/tests/data/output/BK39_10000_0101.tiff b/scripts/tests/data/output/BK39_10000_0101.tiff index 8d28b6ffca590789f0c64b50bf04a297310fdbd0..3a5832d0d7ca4221531be8084f5a65eeb752e60e 100644 GIT binary patch delta 539 zcmdlf`BrAa0Y>482TPgMSSDLAD%B@2GcYu@@G!73FfddA+3VYx85Dr*6+rgJb|wZZ zAo~T7-LQm-VFe=t!zLhZ1hPTuqk!U{j8 zxD*s@JWEn@74i~uQ*D)kLmWdqgF`%>gX`mc9V0w_Lw%JLiW74Sa)6QsN_M6edPe4! zmX;Rg=9Wg5<~I5uy|@jD_jPsjLovYENYBjJ(A?P4%+So((ge2w^$=&o`+E9exWdRt z&m8Cgh%=0Fn*lWN&{jHpKmjrcG}A5WL~0M{UA zSHBPxqYSZV2I>WdUVK!%YfzAXkdi`5er{rBo~@FnucNzbd~irmsB=hYkgJlNfu4Z@ T+yEPWq^MyA#?9uXj3V3st6h#Z delta 99 zcmaDWvr}@y0Y;&T2TPfXm?m2=DtWJAWMF7&;bCB7U|=``WUp^$W>5gKzW~`A+nE@w mfNT>c28PBZOkg$3fa0HyvM>OJ80G=lAix5lHb*jxa038`JQA4z diff --git a/scripts/tests/data/output/BK39_10000_0102.tiff b/scripts/tests/data/output/BK39_10000_0102.tiff index f2f924595699d9378a6455f53dd10977624ee9ca..fd0d61b231f2db2cae556a5b7ab500835908b77b 100644 GIT binary patch delta 177 zcmcbrI#q3h86%S#%VY~iCEEmM28N~<9tJiB28IeCdwn}Ig94Df0?6Lj&ct8^WWNBi z8~>CYkY7>P^fcA RXppOtoq- Date: Thu, 23 Nov 2023 15:34:32 +1300 Subject: [PATCH 03/15] feat: force LERC overview max Z error threshold to 0.1 (10cm) TDE-873 (#740) * feat: use MAX_Z_ERROR_OVERVIEW for LERC TDE-873 * refactor: use lowercase for gdal arguments * test: fix test data output --- scripts/gdal/gdal_preset.py | 5 ++++- scripts/gdal/tests/gdal_preset_test.py | 7 ++++--- .../tests/data/output/BK39_10000_0101.tiff | Bin 3693 -> 3765 bytes .../tests/data/output/BK39_10000_0102.tiff | Bin 4885 -> 4957 bytes 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/gdal/gdal_preset.py b/scripts/gdal/gdal_preset.py index 025562652..d42a7bb0f 100644 --- a/scripts/gdal/gdal_preset.py +++ b/scripts/gdal/gdal_preset.py @@ -39,6 +39,9 @@ "-co", # Set Max Z Error to 1mm "max_z_error=0.001", + "-co", + # Set MAX Z ERROR OVERVIEW to 10cm + "max_z_error_overview=0.1", # Force all DEMS to AREA to be consistent # input tiffs vary between AREA or POINT "-mo", @@ -77,7 +80,7 @@ "-co", "overview_quality=90", "-co", - "OVERVIEWS=IGNORE_EXISTING", + "overviews=ignore_existing", ] diff --git a/scripts/gdal/tests/gdal_preset_test.py b/scripts/gdal/tests/gdal_preset_test.py index 92aeec91d..032c867ca 100644 --- a/scripts/gdal/tests/gdal_preset_test.py +++ b/scripts/gdal/tests/gdal_preset_test.py @@ -18,7 +18,7 @@ def test_preset_webp() -> None: assert "overview_compress=webp" in gdal_command assert "overview_resampling=lanczos" in gdal_command assert "overview_quality=90" in gdal_command - assert "OVERVIEWS=IGNORE_EXISTING" in gdal_command + assert "overviews=ignore_existing" in gdal_command assert "EPSG:2193" in gdal_command @@ -40,7 +40,7 @@ def test_preset_lzw() -> None: assert "overview_compress=webp" in gdal_command assert "overview_resampling=lanczos" in gdal_command assert "overview_quality=90" in gdal_command - assert "OVERVIEWS=IGNORE_EXISTING" in gdal_command + assert "overviews=ignore_existing" in gdal_command assert "EPSG:2193" in gdal_command @@ -56,12 +56,13 @@ def test_preset_dem_lerc() -> None: # LERC compression assert "compress=lerc" in gdal_command assert "max_z_error=0.001" in gdal_command + assert "max_z_error_overview=0.1" in gdal_command # No webp overviews assert "overview_compress=webp" not in gdal_command assert "overview_resampling=lanczos" not in gdal_command assert "overview_quality=90" not in gdal_command - assert "OVERVIEWS=IGNORE_EXISTING" not in gdal_command + assert "overviews=ignore_existing" not in gdal_command assert "EPSG:2193" in gdal_command diff --git a/scripts/tests/data/output/BK39_10000_0101.tiff b/scripts/tests/data/output/BK39_10000_0101.tiff index 3a5832d0d7ca4221531be8084f5a65eeb752e60e..0e7a02cd2037e59731dac315dcfff6594e8be5d2 100644 GIT binary patch delta 114 zcmaDWvsHG386#ufWOGJk+ZD_V3{5RO3~USx3 Date: Fri, 24 Nov 2023 09:04:09 +1300 Subject: [PATCH 04/15] release: 3.4.0 (#741) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- CHANGELOG.md | 9 +++++++++ pyproject.toml | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d35cd514b..65fb45fcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## [3.4.0](https://github.com/linz/topo-imagery/compare/v3.3.1...v3.4.0) (2023-11-23) + + +### Features + +* force LERC overview max Z error threshold to 0.1 (10cm) TDE-873 ([#740](https://github.com/linz/topo-imagery/issues/740)) ([0924c13](https://github.com/linz/topo-imagery/commit/0924c132fd01221cb0467cba2126dbc8ba80b269)) +* lint GitHub Actions workflows TDE-919 ([#720](https://github.com/linz/topo-imagery/issues/720)) ([1d32588](https://github.com/linz/topo-imagery/commit/1d325881607c69022dec6c93e178f85e9e05705e)) +* Pin actions to hashes ([#729](https://github.com/linz/topo-imagery/issues/729)) ([e2343d5](https://github.com/linz/topo-imagery/commit/e2343d5f1959e71d3634f134cd7d61c1447ab261)) + ## [3.3.1](https://github.com/linz/topo-imagery/compare/v3.3.0...v3.3.1) (2023-10-04) diff --git a/pyproject.toml b/pyproject.toml index f158bc116..2b9562da8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore_missing_imports = true [tool.poetry] name = "topo-imagery" -version = "3.3.1" +version = "3.4.0" description = "A collection of scripts for processing imagery" authors = [ "Blayne Chard ", From de8a129d60bb7fc16f7872542ee93bc3ce3c9570 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Mon, 4 Dec 2023 15:31:40 +1300 Subject: [PATCH 05/15] feat: Pin Docker image TDE-958 (#744) As per . --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 90c51549e..22dd84fc9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/osgeo/gdal:ubuntu-small-3.8.0 +FROM ghcr.io/osgeo/gdal:ubuntu-small-3.8.0@sha256:3ca7c26ef3aff8c8d134bbaa62e13d1a4c97c8a7b2853f1db0ff34c2f939a4de RUN apt-get update # Install pip From 250b82c196c1f3db601e9712d6834d6da28e6952 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Tue, 5 Dec 2023 08:54:56 +1300 Subject: [PATCH 06/15] feat: Dependabot for Docker TDE-963 (#746) Important to enable automated updates of dependencies once pinning is merged. --- .github/dependabot.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index c28fcab16..6bc98a16a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,5 +1,9 @@ version: 2 updates: + - package-ecosystem: docker + directory: / + schedule: + interval: daily - package-ecosystem: "github-actions" directory: "/" schedule: From 140f32a43c987b6f000097c230eceb1b1f657b5b Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Mon, 18 Dec 2023 14:23:45 +1300 Subject: [PATCH 07/15] refactor: PEP-8 compliance (#771) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: Add missing parameter docs * fix: Remove docs for non-existent parameter * docs: Fix reference to renamed parameter * refactor: Fix PEP-8 E713 "test for membership should be ‘not in’" . * refactor: Use lowercase function names * refactor: Avoid shadowing built-in name * refactor: Import from the same level consistently --- scripts/aws/aws_helper.py | 16 +++---- scripts/files/files_helper.py | 4 +- scripts/files/fs.py | 9 +++- scripts/files/fs_s3.py | 22 +++++----- scripts/files/tests/file_helper_test.py | 6 +-- scripts/files/tests/file_tiff_test.py | 10 ++--- scripts/files/tests/fs_s3_test.py | 55 ++++++++++++------------- scripts/gdal/gdal_bands.py | 1 + scripts/gdal/gdal_preset.py | 1 - scripts/gdal/tests/gdal_bands_test.py | 2 +- scripts/standardising.py | 2 +- scripts/thumbnails.py | 4 +- 12 files changed, 68 insertions(+), 64 deletions(-) diff --git a/scripts/aws/aws_helper.py b/scripts/aws/aws_helper.py index 656b57b3f..92a360ef1 100644 --- a/scripts/aws/aws_helper.py +++ b/scripts/aws/aws_helper.py @@ -4,9 +4,9 @@ from typing import Any, Dict, List, NamedTuple, Optional from urllib.parse import urlparse -import boto3 -import botocore +from boto3 import Session from botocore.credentials import AssumeRoleCredentialFetcher, DeferredRefreshableCredentials, ReadOnlyCredentials +from botocore.session import Session as BotocoreSession from linz_logger import get_log from scripts.aws.aws_credential_source import CredentialSource @@ -14,8 +14,8 @@ S3Path = NamedTuple("S3Path", [("bucket", str), ("key", str)]) aws_profile = environ.get("AWS_PROFILE") -session = boto3.Session(profile_name=aws_profile) -sessions: Dict[str, boto3.Session] = {} +session = Session(profile_name=aws_profile) +sessions: Dict[str, Session] = {} bucket_roles: List[CredentialSource] = [] @@ -40,14 +40,14 @@ def _init_roles() -> None: get_log().debug("bucket_config_loaded", config=bucket_config_path, prefix_count=len(bucket_roles)) -def _get_client_creator(local_session: boto3.Session) -> Any: +def _get_client_creator(local_session: Session) -> Any: def client_creator(service_name: str, **kwargs: Any) -> Any: return local_session.client(service_name, **kwargs) return client_creator -def get_session(prefix: str) -> boto3.Session: +def get_session(prefix: str) -> Session: """Get an AWS session to deal with an object on `s3`. Args: @@ -78,14 +78,14 @@ def get_session(prefix: str) -> boto3.Session: role_arn=cfg.roleArn, extra_args=extra_args, ) - botocore_session = botocore.session.Session() + botocore_session = BotocoreSession() # pylint:disable=protected-access botocore_session._credentials = DeferredRefreshableCredentials( method="assume-role", refresh_using=fetcher.fetch_credentials ) - current_session = boto3.Session(botocore_session=botocore_session) + current_session = Session(botocore_session=botocore_session) sessions[cfg.roleArn] = current_session get_log().info("role_assume", prefix=prefix, bucket=cfg.bucket, role_arn=cfg.roleArn) diff --git a/scripts/files/files_helper.py b/scripts/files/files_helper.py index f4570870d..90f3cbdd8 100644 --- a/scripts/files/files_helper.py +++ b/scripts/files/files_helper.py @@ -51,7 +51,7 @@ def is_tiff(path: str) -> bool: return path.lower().endswith((".tiff", ".tif")) -def is_GTiff(path: str, gdalinfo_data: Optional[GdalInfo] = None) -> bool: +def is_geotiff(path: str, gdalinfo_data: Optional[GdalInfo] = None) -> bool: """Verifies if a file is a GTiff based on the presence of the `coordinateSystem`. @@ -64,7 +64,7 @@ def is_GTiff(path: str, gdalinfo_data: Optional[GdalInfo] = None) -> bool: """ if not gdalinfo_data: gdalinfo_data = gdal_info(path) - if not "coordinateSystem" in gdalinfo_data: + if "coordinateSystem" not in gdalinfo_data: return False if gdalinfo_data["driverShortName"] == "GTiff": return True diff --git a/scripts/files/fs.py b/scripts/files/fs.py index c1d3c8ca0..2e6455ab7 100644 --- a/scripts/files/fs.py +++ b/scripts/files/fs.py @@ -58,6 +58,7 @@ def write_all(inputs: List[str], target: str, concurrency: Optional[int] = 4) -> Args: inputs: list of files to read target: target folder to write to + concurrency: max thread pool workers Returns: list of written file paths @@ -65,7 +66,8 @@ def write_all(inputs: List[str], target: str, concurrency: Optional[int] = 4) -> written_tiffs: List[str] = [] with ThreadPoolExecutor(max_workers=concurrency) as executor: futuress = { - executor.submit(write, os.path.join(target, f"{os.path.basename(input)}"), read(input)): input for input in inputs + executor.submit(write, os.path.join(target, f"{os.path.basename(input_)}"), read(input_)): input_ + for input_ in inputs } for future in as_completed(futuress): if future.exception(): @@ -86,6 +88,7 @@ def find_sidecars(inputs: List[str], extensions: List[str], concurrency: Optiona Args: inputs: list of input files to search for extensions extensions: the sidecar file extensions + concurrency: max thread pool workers Returns: list of existing sidecar files @@ -100,7 +103,9 @@ def _validate_path(path: str) -> Optional[str]: sidecars: List[str] = [] with ThreadPoolExecutor(max_workers=concurrency) as executor: for extension in extensions: - futuress = {executor.submit(_validate_path, f"{os.path.splitext(input)[0]}{extension}"): input for input in inputs} + futuress = { + executor.submit(_validate_path, f"{os.path.splitext(input_)[0]}{extension}"): input_ for input_ in inputs + } for future in as_completed(futuress): if future.exception(): get_log().warn("Find sidecar failed", error=future.exception()) diff --git a/scripts/files/fs_s3.py b/scripts/files/fs_s3.py index af7e451ec..b72ee6009 100644 --- a/scripts/files/fs_s3.py +++ b/scripts/files/fs_s3.py @@ -2,8 +2,8 @@ from concurrent.futures import ThreadPoolExecutor from typing import Any, Generator, List, Optional, Union -import boto3 -import botocore +from boto3 import client, resource +from botocore.exceptions import ClientError from linz_logger import get_log from scripts.aws.aws_helper import get_session, parse_path @@ -25,7 +25,7 @@ def write(destination: str, source: bytes, content_type: Optional[str] = None) - raise Exception("The 'source' is None.") s3_path = parse_path(destination) key = s3_path.key - s3 = boto3.resource("s3") + s3 = resource("s3") try: s3_object = s3.Object(s3_path.bucket, key) @@ -34,7 +34,7 @@ def write(destination: str, source: bytes, content_type: Optional[str] = None) - else: s3_object.put(Body=source) get_log().debug("write_s3_success", path=destination, duration=time_in_ms() - start_time) - except botocore.exceptions.ClientError as ce: + except ClientError as ce: get_log().error("write_s3_error", path=destination, error=f"Unable to write the file: {ce}") raise ce @@ -55,7 +55,7 @@ def read(path: str, needs_credentials: bool = False) -> bytes: start_time = time_in_ms() s3_path = parse_path(path) key = s3_path.key - s3 = boto3.resource("s3") + s3 = resource("s3") try: if needs_credentials: @@ -95,7 +95,7 @@ def exists(path: str, needs_credentials: bool = False) -> bool: True if the S3 Object exists """ s3_path, key = parse_path(path) - s3 = boto3.resource("s3") + s3 = resource("s3") try: if needs_credentials: @@ -168,7 +168,7 @@ def prefix_from_path(path: str) -> str: return path.replace(f"s3://{bucket_name}/", "") -def list_json_in_uri(uri: str, s3_client: Optional[boto3.client]) -> List[str]: +def list_json_in_uri(uri: str, s3_client: Optional[client]) -> List[str]: """Get the `JSON` files from a s3 path Args: @@ -179,7 +179,7 @@ def list_json_in_uri(uri: str, s3_client: Optional[boto3.client]) -> List[str]: a list of JSON files """ if not s3_client: - s3_client = boto3.client("s3") + s3_client = client("s3") files = [] paginator = s3_client.get_paginator("list_objects_v2") response_iterator = paginator.paginate(Bucket=bucket_name_from_path(uri), Prefix=prefix_from_path(uri)) @@ -195,7 +195,7 @@ def list_json_in_uri(uri: str, s3_client: Optional[boto3.client]) -> List[str]: return files -def _get_object(bucket: str, file_name: str, s3_client: boto3.client) -> Any: +def _get_object(bucket: str, file_name: str, s3_client: client) -> Any: """Get the object from `s3` Args: @@ -211,7 +211,7 @@ def _get_object(bucket: str, file_name: str, s3_client: boto3.client) -> Any: def get_object_parallel_multithreading( - bucket: str, files_to_read: List[str], s3_client: Optional[boto3.client], concurrency: int + bucket: str, files_to_read: List[str], s3_client: Optional[client], concurrency: int ) -> Generator[Any, Union[Any, BaseException], None]: """Get s3 objects in parallel @@ -225,7 +225,7 @@ def get_object_parallel_multithreading( the object when got """ if not s3_client: - s3_client = boto3.client("s3") + s3_client = client("s3") with ThreadPoolExecutor(max_workers=concurrency) as executor: future_to_key = {executor.submit(_get_object, bucket, key, s3_client): key for key in files_to_read} diff --git a/scripts/files/tests/file_helper_test.py b/scripts/files/tests/file_helper_test.py index 043b5fae1..4662fe851 100644 --- a/scripts/files/tests/file_helper_test.py +++ b/scripts/files/tests/file_helper_test.py @@ -1,4 +1,4 @@ -from scripts.files.files_helper import is_GTiff, is_tiff +from scripts.files.files_helper import is_geotiff, is_tiff from scripts.gdal.tests.gdalinfo import fake_gdal_info @@ -21,5 +21,5 @@ def test_is_geotiff() -> None: gdalinfo_not_geotiff["driverShortName"] = "GTiff" gdalinfo_geotiff["coordinateSystem"] = {"wkt": "PROJCRS['NZGD2000 / New Zealand Transverse Mercator 2000']"} - assert is_GTiff("file.tiff", gdalinfo_geotiff) is True - assert is_GTiff("file.tiff", gdalinfo_not_geotiff) is False + assert is_geotiff("file.tiff", gdalinfo_geotiff) is True + assert is_geotiff("file.tiff", gdalinfo_not_geotiff) is False diff --git a/scripts/files/tests/file_tiff_test.py b/scripts/files/tests/file_tiff_test.py index e07730423..c93e03a18 100644 --- a/scripts/files/tests/file_tiff_test.py +++ b/scripts/files/tests/file_tiff_test.py @@ -66,7 +66,7 @@ def test_check_band_count_invalid_4() -> None: assert file_tiff.get_errors() -def test_check_band_count_valid_1_DEM() -> None: +def test_check_band_count_valid_1_dem() -> None: """ tests check_band_count when the input layer has a valid band count which is 1 bands and a DEM preset @@ -80,7 +80,7 @@ def test_check_band_count_valid_1_DEM() -> None: assert not file_tiff.get_errors() -def test_check_band_count_invalid_alpha_DEM() -> None: +def test_check_band_count_invalid_alpha_dem() -> None: """ tests check_band_count when the input layer has a valid band count which is 2 bands where the second band is Alpha and DEM preset @@ -95,7 +95,7 @@ def test_check_band_count_invalid_alpha_DEM() -> None: assert file_tiff.get_errors() -def test_check_band_count_invalid_3_DEM() -> None: +def test_check_band_count_invalid_3_dem() -> None: """ tests check_band_count when the input layer has an invalid band count which is 3 bands where the preset is DEM. @@ -157,7 +157,7 @@ def test_check_color_interpretation_invalid() -> None: assert file_tiff.get_errors() -def test_check_color_interpretation_valid_DEM() -> None: +def test_check_color_interpretation_valid_dem() -> None: """ tests check_color_interpretation with the correct color interpretation """ @@ -170,7 +170,7 @@ def test_check_color_interpretation_valid_DEM() -> None: assert not file_tiff.get_errors() -def test_check_color_interpretation_invalid_DEM() -> None: +def test_check_color_interpretation_invalid_dem() -> None: """ tests check_color_interpretation with the incorrect color interpretation """ diff --git a/scripts/files/tests/fs_s3_test.py b/scripts/files/tests/fs_s3_test.py index 17b77825b..60aac0bf8 100644 --- a/scripts/files/tests/fs_s3_test.py +++ b/scripts/files/tests/fs_s3_test.py @@ -1,11 +1,10 @@ import json -import boto3 -import botocore -import pytest +from boto3 import client, resource +from botocore.exceptions import ClientError from moto import mock_s3 from moto.s3.responses import DEFAULT_REGION_NAME -from pytest import CaptureFixture +from pytest import CaptureFixture, raises from scripts.files.files_helper import ContentType from scripts.files.fs_s3 import exists, read, write @@ -13,35 +12,35 @@ @mock_s3 # type: ignore def test_write() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") write("s3://testbucket/test.file", b"test content") - resp = client.get_object(Bucket="testbucket", Key="test.file") + resp = boto3_client.get_object(Bucket="testbucket", Key="test.file") assert resp["Body"].read() == b"test content" assert resp["ContentType"] == "binary/octet-stream" @mock_s3 # type: ignore def test_write_content_type() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") write("s3://testbucket/test.tiff", b"test content", ContentType.GEOTIFF.value) - resp = client.get_object(Bucket="testbucket", Key="test.tiff") + resp = boto3_client.get_object(Bucket="testbucket", Key="test.tiff") assert resp["Body"].read() == b"test content" assert resp["ContentType"] == ContentType.GEOTIFF.value @mock_s3 # type: ignore def test_read() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content") + boto3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content") content = read("s3://testbucket/test.file") @@ -50,7 +49,7 @@ def test_read() -> None: @mock_s3 # type: ignore def test_read_bucket_not_found(capsys: CaptureFixture[str]) -> None: - with pytest.raises(botocore.exceptions.ClientError): + with raises(ClientError): read("s3://testbucket/test.file") # python-linz-logger uses structlog which doesn't use stdlib so can't capture the logs with `caplog` @@ -60,10 +59,10 @@ def test_read_bucket_not_found(capsys: CaptureFixture[str]) -> None: @mock_s3 # type: ignore def test_read_key_not_found(capsys: CaptureFixture[str]) -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - with pytest.raises(botocore.exceptions.ClientError): + with raises(ClientError): read("s3://testbucket/test.file") logs = json.loads(capsys.readouterr().out) @@ -72,10 +71,10 @@ def test_read_key_not_found(capsys: CaptureFixture[str]) -> None: @mock_s3 # type: ignore def test_exists() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content") + boto3_client.put_object(Bucket="testbucket", Key="test.file", Body=b"test content") file_exists = exists("s3://testbucket/test.file") @@ -84,10 +83,10 @@ def test_exists() -> None: @mock_s3 # type: ignore def test_directory_exists() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - client.put_object(Bucket="testbucket", Key="hello/test.file", Body=b"test content") + boto3_client.put_object(Bucket="testbucket", Key="hello/test.file", Body=b"test content") directory_exists = exists("s3://testbucket/hello/") @@ -105,10 +104,10 @@ def test_exists_bucket_not_exists(capsys: CaptureFixture[str]) -> None: @mock_s3 # type: ignore def test_exists_object_not_exists() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content") + boto3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content") file_exists = exists("s3://testbucket/test.file") @@ -117,10 +116,10 @@ def test_exists_object_not_exists() -> None: @mock_s3 # type: ignore def test_exists_object_starting_with_not_exists() -> None: - s3 = boto3.resource("s3", region_name=DEFAULT_REGION_NAME) - client = boto3.client("s3", region_name=DEFAULT_REGION_NAME) + s3 = resource("s3", region_name=DEFAULT_REGION_NAME) + boto3_client = client("s3", region_name=DEFAULT_REGION_NAME) s3.create_bucket(Bucket="testbucket") - client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content") + boto3_client.put_object(Bucket="testbucket", Key="hello/another.file", Body=b"test content") file_exists = exists("s3://testbucket/hello/another.fi") diff --git a/scripts/gdal/gdal_bands.py b/scripts/gdal/gdal_bands.py index def81b51d..e2f5f3cd0 100644 --- a/scripts/gdal/gdal_bands.py +++ b/scripts/gdal/gdal_bands.py @@ -28,6 +28,7 @@ def get_gdal_band_offset(file: str, info: Optional[GdalInfo] = None, preset: Opt Args: file: file to check info: optional precomputed gdalinfo + preset: "dem_lerc" preset used to differentiate single band elevation tiffs from single band historical imagery Returns: list of band mappings eg "-b 1 -b 1 -b 1" diff --git a/scripts/gdal/gdal_preset.py b/scripts/gdal/gdal_preset.py index d42a7bb0f..f0e5feca0 100644 --- a/scripts/gdal/gdal_preset.py +++ b/scripts/gdal/gdal_preset.py @@ -90,7 +90,6 @@ def get_gdal_command(preset: str, epsg: str) -> List[str]: Args: preset: gdal preset to use. Defined in `gdal.gdal_preset.py` epsg: the EPSG code of the file - convert_from: Defaults to None. Returns: a list of arguments to run `gdal_translate` diff --git a/scripts/gdal/tests/gdal_bands_test.py b/scripts/gdal/tests/gdal_bands_test.py index 187db841f..df290c1bf 100644 --- a/scripts/gdal/tests/gdal_bands_test.py +++ b/scripts/gdal/tests/gdal_bands_test.py @@ -21,7 +21,7 @@ def test_gdal_grey_bands_detection() -> None: assert " ".join(bands) == "-b 2 -b 2 -b 2 -b 1" -def test_gdal_grey_bands_DEM_detection() -> None: +def test_gdal_grey_bands_dem_detection() -> None: gdalinfo = fake_gdal_info() add_band(gdalinfo, color_interpretation="Gray") diff --git a/scripts/standardising.py b/scripts/standardising.py index c20182bb3..2f52d2e6c 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -104,7 +104,7 @@ def standardising( """Apply transformations using GDAL to the source file. Args: - file: path to the file to standardise + files: paths to the files to standardise preset: gdal preset to use. See `gdal.gdal_preset.py` source_epsg: EPSG code of the source file target_epsg: EPSG code of reprojection diff --git a/scripts/thumbnails.py b/scripts/thumbnails.py index 93109d0c8..152fb3123 100644 --- a/scripts/thumbnails.py +++ b/scripts/thumbnails.py @@ -7,7 +7,7 @@ from linz_logger import get_log -from scripts.files.files_helper import ContentType, get_file_name_from_path, is_GTiff, is_tiff +from scripts.files.files_helper import ContentType, get_file_name_from_path, is_geotiff, is_tiff from scripts.files.fs import exists, read, write from scripts.gdal import gdalinfo from scripts.gdal.gdal_helper import run_gdal @@ -47,7 +47,7 @@ def thumbnails(path: str, target: str) -> str | None: # For both GeoTIFF and TIFF (not georeferenced) this is done in 2 steps. # Why? because it hasn't been found another way to get the same visual aspect. gdalinfo_data = gdalinfo.gdal_info(source_tiff) - if is_GTiff(source_tiff, gdalinfo_data): + if is_geotiff(source_tiff, gdalinfo_data): get_log().info("thumbnail_generate_geotiff", path=target_thumbnail) run_gdal(get_thumbnail_command("jpeg", source_tiff, transitional_jpg, "50%", "50%", None, gdalinfo_data)) run_gdal(get_thumbnail_command("jpeg", transitional_jpg, tmp_thumbnail, "30%", "30%", None, gdalinfo_data)) From 4e84901abe30ca0294ae570caef7fba511fe0ce5 Mon Sep 17 00:00:00 2001 From: paulfouquet <86932794+paulfouquet@users.noreply.github.com> Date: Tue, 19 Dec 2023 13:29:41 +1300 Subject: [PATCH 08/15] docs: add explaination about collection provider tests (#764) --- scripts/stac/tests/collection_test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/stac/tests/collection_test.py b/scripts/stac/tests/collection_test.py index 6a01aa055..5483bbd50 100644 --- a/scripts/stac/tests/collection_test.py +++ b/scripts/stac/tests/collection_test.py @@ -130,29 +130,35 @@ def test_add_providers(setup_collection: ImageryCollection) -> None: assert {"name": "Maxar", "roles": ["producer"]} in collection.stac["providers"] -def test_default_provider_present() -> None: +def test_default_provider_roles_are_kept() -> None: + # given we are adding a non default role to the default provider licensor: Provider = {"name": "Toitū Te Whenua Land Information New Zealand", "roles": [ProviderRole.LICENSOR]} producer: Provider = {"name": "Maxar", "roles": [ProviderRole.PRODUCER]} title = "Test Urban Imagery" description = "Test Urban Imagery Description" collection = ImageryCollection(title, description, providers=[producer, licensor]) + # then it adds the non default role to the existing default role list assert { "name": "Toitū Te Whenua Land Information New Zealand", "roles": ["licensor", "host", "processor"], } in collection.stac["providers"] + # then it does not duplicate the default provider assert {"name": "Toitū Te Whenua Land Information New Zealand", "roles": ["host", "processor"]} not in collection.stac[ "providers" ] -def test_default_provider_missing() -> None: +def test_default_provider_is_present() -> None: + # given adding a provider producer: Provider = {"name": "Maxar", "roles": [ProviderRole.PRODUCER]} title = "Test Urban Imagery" description = "Test Urban Imagery Description" collection = ImageryCollection(title, description, providers=[producer]) + # then the default provider is still present assert {"name": "Toitū Te Whenua Land Information New Zealand", "roles": ["host", "processor"]} in collection.stac[ "providers" ] + # then the new provider is added assert {"name": "Maxar", "roles": ["producer"]} in collection.stac["providers"] From dd3e282660cc0e652667adf54fe3e5ad3b0b0bea Mon Sep 17 00:00:00 2001 From: Megan Davidson <33814653+MDavidson17@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:30:30 +1300 Subject: [PATCH 09/15] feat: title and description by arguments TDE-960 (#757) * feat: generate title & description from inputs * test: autogenerate title & description tests * fix: sort imports * fix: mypy & pylint * fix: don't allow/account for empty string * test: test for no empty strings j * fix: fix broken test * fix: lifecycle should never be none * fix: add choices to parameters * fix: name variable datetime only when is a datetime type * fix: add example for Historical Imagery Survey Number * fix: required field doesn't need default * fix: add nullable_str to argparse type remove now unecessary code * fix: make subtypes enums * fix: don't need to set to None * fix: Improved way of managing event and ending with a . * fix: remove unused param * fix: tidy choices enum listing * fix: gsd cannot be none * fix: str='' is the same as str=None * fix: typo * fix: use enum dictionary to map regions * fix: update tests * fix: nit * fix: list enums correctly * fix: args copy paste error * fix: rename subtype -> category to match metadata name and basemaps * fix: use kwargs to resolve too many args * fix: add pseudo * to make params keyword args * refactor: subtype -> category * refactor: parameters and relocate * fix: formatting * Update scripts/stac/imagery/metadata_constants.py Co-authored-by: Alice Fage * fix: merge conflict --------- Co-authored-by: Alice Fage --- scripts/collection_from_items.py | 70 ++++++++- scripts/stac/imagery/collection.py | 120 +++++++++++++++- scripts/stac/imagery/metadata_constants.py | 69 +++++++++ scripts/stac/tests/collection_test.py | 88 ++++++------ .../stac/tests/generate_description_test.py | 86 +++++++++++ scripts/stac/tests/generate_title_test.py | 134 ++++++++++++++++++ scripts/stac/tests/item_test.py | 18 ++- 7 files changed, 531 insertions(+), 54 deletions(-) create mode 100644 scripts/stac/imagery/metadata_constants.py create mode 100644 scripts/stac/tests/generate_description_test.py create mode 100644 scripts/stac/tests/generate_title_test.py diff --git a/scripts/collection_from_items.py b/scripts/collection_from_items.py index 642697ad4..a432f80c9 100644 --- a/scripts/collection_from_items.py +++ b/scripts/collection_from_items.py @@ -6,19 +6,67 @@ from boto3 import client from linz_logger import get_log -from scripts.cli.cli_helper import coalesce_multi_single +from scripts.cli.cli_helper import coalesce_multi_single, valid_date from scripts.files.fs_s3 import bucket_name_from_path, get_object_parallel_multithreading, list_json_in_uri from scripts.logging.time_helper import time_in_ms from scripts.stac.imagery.collection import ImageryCollection +from scripts.stac.imagery.metadata_constants import ( + HUMAN_READABLE_REGIONS, + CollectionTitleMetadata, + ElevationCategories, + ImageryCategories, +) from scripts.stac.imagery.provider import Provider, ProviderRole +# pylint: disable-msg=too-many-locals def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("--uri", dest="uri", help="s3 path to items and collection.json write location", required=True) parser.add_argument("--collection-id", dest="collection_id", help="Collection ID", required=True) - parser.add_argument("--title", dest="title", help="Collection title", required=True) - parser.add_argument("--description", dest="description", help="Collection description", required=True) + parser.add_argument( + "--category", + dest="category", + help="Dataset category description", + required=True, + choices=[type.value for type in ImageryCategories] + [type.value for type in ElevationCategories], + ) + parser.add_argument( + "--region", + dest="region", + help="Region of Dataset", + required=True, + choices=HUMAN_READABLE_REGIONS.keys(), + ) + parser.add_argument("--gsd", dest="gsd", help="GSD of imagery Dataset", type=str, required=True) + parser.add_argument( + "--location", dest="location", help="Optional Location of dataset, e.g.- Hutt City", type=str, required=False + ) + parser.add_argument( + "--start-date", + dest="start_date", + help="Start date in format YYYY-MM-DD (Inclusive)", + type=valid_date, + required=True, + ) + parser.add_argument( + "--end-date", dest="end_date", help="End date in format YYYY-MM-DD (Inclusive)", type=valid_date, required=True + ) + parser.add_argument("--event", dest="event", help="Event name if applicable", type=str, required=False) + parser.add_argument( + "--historic-survey-number", + dest="historic_survey_number", + help="Historic Survey Number if Applicable. E.g.- SCN8844", + type=str, + required=False, + ) + parser.add_argument( + "--lifecycle", + dest="lifecycle", + help="Designating dataset status", + required=True, + choices=["under development", "preview", "ongoing", "completed", "deprecated"], + ) parser.add_argument( "--producer", dest="producer", @@ -44,9 +92,19 @@ def main() -> None: for licensor_name in coalesce_multi_single(arguments.licensor_list, arguments.licensor): providers.append({"name": licensor_name, "roles": [ProviderRole.LICENSOR]}) - collection = ImageryCollection( - title=arguments.title, description=arguments.description, collection_id=arguments.collection_id, providers=providers - ) + title_metadata: CollectionTitleMetadata = { + "category": arguments.category, + "region": arguments.region, + "gsd": arguments.gsd, + "start_datetime": arguments.start_date, + "end_datetime": arguments.end_date, + "lifecycle": arguments.lifecyle, + "location": arguments.location, + "event": arguments.event, + "historic_survey_number": arguments.historic_survey_number, + } + + collection = ImageryCollection(title_metadata=title_metadata, collection_id=arguments.collection_id, providers=providers) if not uri.startswith("s3://"): msg = f"uri is not a s3 path: {uri}" diff --git a/scripts/stac/imagery/collection.py b/scripts/stac/imagery/collection.py index 94569448a..0b55331f7 100644 --- a/scripts/stac/imagery/collection.py +++ b/scripts/stac/imagery/collection.py @@ -6,6 +6,13 @@ from scripts.files.files_helper import ContentType from scripts.files.fs import write +from scripts.stac.imagery.metadata_constants import ( + HUMAN_READABLE_REGIONS, + CollectionTitleMetadata, + ElevationCategories, + ImageryCategories, + SubtypeParameterError, +) from scripts.stac.imagery.provider import Provider, ProviderRole from scripts.stac.util.STAC_VERSION import STAC_VERSION @@ -14,17 +21,22 @@ class ImageryCollection: stac: Dict[str, Any] def __init__( - self, title: str, description: str, collection_id: Optional[str] = None, providers: Optional[List[Provider]] = None + self, + title_metadata: CollectionTitleMetadata, + collection_id: Optional[str] = None, + providers: Optional[List[Provider]] = None, ) -> None: if not collection_id: collection_id = str(ulid.ULID()) + self.title_metadata = title_metadata + self.stac = { "type": "Collection", "stac_version": STAC_VERSION, "id": collection_id, - "title": title, - "description": description, + "title": self._title(), + "description": self._description(), "license": "CC-BY-4.0", "links": [{"rel": "self", "href": "./collection.json", "type": "application/json"}], "providers": [], @@ -179,3 +191,105 @@ def write_to(self, destination: str) -> None: destination: path of the destination """ write(destination, json.dumps(self.stac, ensure_ascii=False).encode("utf-8"), content_type=ContentType.JSON.value) + + def _title(self) -> str: + """Generates the title for imagery and elevation datasets. + Satellite Imagery / Urban Aerial Photos / Rural Aerial Photos: + [Location / Region if no Location specified] [GSD] [?Event Name] [Data Sub-Type] ([Year(s)]) [?- Preview] + DEM / DSM: + [Location / Region if no Location specified] [?- Event Name] LiDAR [GSD] [Data Sub-Type] ([Year(s)]) [?- Preview] + If Historic Survey Number: + [Location / Region if no Location specified] [GSD] [Survey Number] ([Year(s)]) [?- Preview] + + Returns: + Dataset Title + """ + # format optional metadata + location = self.title_metadata.get("location") + historic_survey_number = self.title_metadata.get("historic_survey_number") + event = self.title_metadata.get("event") + + # format date for metadata + if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year: + date = str(self.title_metadata["start_datetime"].year) + else: + date = f"{self.title_metadata['start_datetime'].year} - {self.title_metadata['end_datetime'].year}" + + # determine dataset name + if location: + name = location + else: + name = HUMAN_READABLE_REGIONS[self.title_metadata["region"]] + + # determine if dataset is preview + if self.title_metadata.get("lifecycle") == "preview": + preview = "- preview" + else: + preview = None + + if historic_survey_number: + return " ".join(f"{name} {self.title_metadata['gsd']} {historic_survey_number} ({date}) {preview or ''}".split()) + + if self.title_metadata["category"] in [ImageryCategories.SATELLITE, ImageryCategories.URBAN, ImageryCategories.RURAL]: + return " ".join( + f"{name} {self.title_metadata['gsd']} {event or ''} {self.title_metadata['category']} ({date}) {preview or ''}".split() # pylint: disable=line-too-long + ) + if self.title_metadata["category"] in [ElevationCategories.DEM, ElevationCategories.DSM]: + return " ".join( + f"{name} {self._elevation_title_event(event) or ''} LiDAR {self.title_metadata['gsd']} {self.title_metadata['category']} ({date}) {preview or ''}".split() # pylint: disable=line-too-long + ) + raise SubtypeParameterError(self.title_metadata["category"]) + + def _elevation_title_event(self, event: Optional[str]) -> Optional[str]: + if event: + return f"- {event}" + return None + + def _description(self) -> str: + """Generates the descriptions for imagery and elevation datasets. + Urban Aerial Photos / Rural Aerial Photos: + Orthophotography within the [Region] region captured in the [Year(s)] flying season. + DEM / DSM: + [Digital Surface Model / Digital Elevation Model] within the [region] [?- location] region in [year(s)]. + Satellite Imagery: + Satellite imagery within the [Region] region captured in [Year(s)]. + Historical Imagery: + Scanned aerial imagery within the [Region] region captured in [Year(s)]. + + Returns: + Dataset Description + """ + # format optional metadata + location = self.title_metadata.get("location") + historic_survey_number = self.title_metadata.get("historic_survey_number") + event = self.title_metadata.get("event") + + # format date for metadata + if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year: + date = str(self.title_metadata["start_datetime"].year) + else: + date = f"{self.title_metadata['start_datetime'].year} - {self.title_metadata['end_datetime'].year}" + + # format location for metadata description + if location: + location = f"- {location}" + + region = HUMAN_READABLE_REGIONS[self.title_metadata["region"]] + + if historic_survey_number: + desc = f"Scanned aerial imagery within the {region} region captured in {date}" + elif self.title_metadata["category"] == ImageryCategories.SATELLITE: + desc = f"Satellite imagery within the {region} region captured in {date}" + elif self.title_metadata["category"] in [ImageryCategories.URBAN, ImageryCategories.RURAL]: + desc = f"Orthophotography within the {region} region captured in the {date} flying season" + elif self.title_metadata["category"] == ElevationCategories.DEM: + desc = " ".join(f"Digital Elevation Model within the {region} {location or ''} region in {date}".split()) + elif self.title_metadata["category"] == ElevationCategories.DSM: + desc = " ".join(f"Digital Surface Model within the {region} {location or ''} region in {date}".split()) + else: + raise SubtypeParameterError(self.title_metadata["category"]) + + if event: + desc = desc + f", published as a record of the {event} event" + + return desc + "." diff --git a/scripts/stac/imagery/metadata_constants.py b/scripts/stac/imagery/metadata_constants.py new file mode 100644 index 000000000..cb1f6ad99 --- /dev/null +++ b/scripts/stac/imagery/metadata_constants.py @@ -0,0 +1,69 @@ +from datetime import datetime +from enum import Enum +from typing import Optional, TypedDict + + +class CollectionTitleMetadata(TypedDict): + """ + region: Region of Dataset + gsd: Dataset Ground Sample Distance + start_date: Dataset capture start date + end_date: Dataset capture end date + lifecycle: Dataset status + Optional: + location: Optional location of dataset, e.g. Hutt City + event: Optional details of capture event, e.g. Cyclone Gabrielle + historic_survey_number: Optional historic imagery survey number, e.g. SNC88445 + """ + + category: str + region: str + gsd: str + start_datetime: datetime + end_datetime: datetime + lifecycle: str + location: Optional[str] + event: Optional[str] + historic_survey_number: Optional[str] + + +class SubtypeParameterError(Exception): + def __init__(self, category: str) -> None: + self.message = f"Unrecognised/Unimplemented Subtype Parameter: {category}" + + +class ImageryCategories(str, Enum): + SATELLITE = "Satellite Imagery" + URBAN = "Urban Aerial Photos" + RURAL = "Rural Aerial Photos" + AERIAL = "Aerial Photos" + HISTORICAL = "Scanned Aerial Photos" + + +class ElevationCategories(str, Enum): + DEM = "DEM" + DSM = "DSM" + + +HUMAN_READABLE_REGIONS = { + "antarctica": "Antarctica", + "auckland": "Auckland", + "bay-of-plenty": "Bay of Plenty", + "canterbury": "Canterbury", + "gisborne": "Gisborne", + "global": "Global", + "hawkes-bay": "Hawke's Bay", + "manawatu-whanganui": "Manawatū-Whanganui", + "marlborough": "Marlborough", + "nelson": "Nelson", + "new-zealand": "New Zealand", + "northland": "Northland", + "otago": "Otago", + "pacific-islands": "Pacific Islands", + "southland": "Southland", + "taranaki": "Taranaki", + "tasman": "Tasman", + "waikato": "Waikato", + "wellington": "Wellington", + "west-coast": "West Coast", +} diff --git a/scripts/stac/tests/collection_test.py b/scripts/stac/tests/collection_test.py index 5483bbd50..182071a35 100644 --- a/scripts/stac/tests/collection_test.py +++ b/scripts/stac/tests/collection_test.py @@ -1,5 +1,6 @@ import json import os +from datetime import datetime from shutil import rmtree from tempfile import mkdtemp from typing import Generator @@ -9,43 +10,48 @@ from scripts.files.fs import read from scripts.stac.imagery.collection import ImageryCollection from scripts.stac.imagery.item import ImageryItem +from scripts.stac.imagery.metadata_constants import CollectionTitleMetadata from scripts.stac.imagery.provider import Provider, ProviderRole -@pytest.fixture(name="setup_collection", autouse=True) -def setup() -> Generator[ImageryCollection, None, None]: - title = "Test Urban Imagery" - description = "Test Urban Imagery Description" - collection = ImageryCollection(title, description) - yield collection +@pytest.fixture(name="metadata", autouse=True) +def setup() -> Generator[CollectionTitleMetadata, None, None]: + metadata: CollectionTitleMetadata = { + "category": "Urban Aerial Photos", + "region": "auckland", + "gsd": "0.3m", + "start_datetime": datetime(2022, 2, 2), + "end_datetime": datetime(2022, 2, 2), + "lifecycle": "completed", + "location": None, + "event": None, + "historic_survey_number": None, + } + yield metadata -def test_title_description_id_created_on_init() -> None: - title = "Test Urban Imagery" - description = "Test Urban Imagery Description" - collection = ImageryCollection(title, description) - assert collection.stac["title"] == "Test Urban Imagery" - assert collection.stac["description"] == "Test Urban Imagery Description" +def test_title_description_id_created_on_init(metadata: CollectionTitleMetadata) -> None: + collection = ImageryCollection(metadata) + assert collection.stac["title"] == "Auckland 0.3m Urban Aerial Photos (2022)" + assert collection.stac["description"] == "Orthophotography within the Auckland region captured in the 2022 flying season." assert collection.stac["id"] -def test_id_parsed_on_init() -> None: - title = "Test" - description = "Test" +def test_id_parsed_on_init(metadata: CollectionTitleMetadata) -> None: id_ = "Parsed-Ulid" - collection = ImageryCollection(title, description, id_) + collection = ImageryCollection(metadata, id_) assert collection.stac["id"] == "Parsed-Ulid" -def test_bbox_updated_from_none(setup_collection: ImageryCollection) -> None: - collection = setup_collection +def test_bbox_updated_from_none(metadata: CollectionTitleMetadata) -> None: + collection = ImageryCollection(metadata) bbox = [1799667.5, 5815977.0, 1800422.5, 5814986.0] collection.update_spatial_extent(bbox) assert collection.stac["extent"]["spatial"]["bbox"] == [bbox] -def test_bbox_updated_from_existing(setup_collection: ImageryCollection) -> None: - collection = setup_collection +def test_bbox_updated_from_existing(metadata: CollectionTitleMetadata) -> None: + collection = ImageryCollection(metadata) # init bbox bbox = [174.889641, -41.217532, 174.902344, -41.203521] collection.update_spatial_extent(bbox) @@ -56,16 +62,16 @@ def test_bbox_updated_from_existing(setup_collection: ImageryCollection) -> None assert collection.stac["extent"]["spatial"]["bbox"] == [[174.889641, -41.217532, 174.922965, -41.203521]] -def test_interval_updated_from_none(setup_collection: ImageryCollection) -> None: - collection = setup_collection +def test_interval_updated_from_none(metadata: CollectionTitleMetadata) -> None: + collection = ImageryCollection(metadata) start_datetime = "2021-01-27T00:00:00Z" end_datetime = "2021-01-27T00:00:00Z" collection.update_temporal_extent(start_datetime, end_datetime) assert collection.stac["extent"]["temporal"]["interval"] == [[start_datetime, end_datetime]] -def test_interval_updated_from_existing(setup_collection: ImageryCollection) -> None: - collection = setup_collection +def test_interval_updated_from_existing(metadata: CollectionTitleMetadata) -> None: + collection = ImageryCollection(metadata) # init interval start_datetime = "2021-01-27T00:00:00Z" end_datetime = "2021-01-27T00:00:00Z" @@ -78,8 +84,8 @@ def test_interval_updated_from_existing(setup_collection: ImageryCollection) -> assert collection.stac["extent"]["temporal"]["interval"] == [["2021-01-27T00:00:00Z", "2021-02-20T00:00:00Z"]] -def test_add_item(mocker, setup_collection: ImageryCollection) -> None: # type: ignore - collection = setup_collection +def test_add_item(mocker, metadata: CollectionTitleMetadata) -> None: # type: ignore + collection = ImageryCollection(metadata) checksum = "1220cdef68d62fb912110b810e62edc53de07f7a44fb2b310db700e9d9dd58baa6b4" mocker.patch("scripts.stac.util.checksum.multihash_as_hex", return_value=checksum) item = ImageryItem("BR34_5000_0304", "./test/BR34_5000_0304.tiff") @@ -100,43 +106,43 @@ def test_add_item(mocker, setup_collection: ImageryCollection) -> None: # type: assert collection.stac["extent"]["spatial"]["bbox"] == [bbox] -def test_write_collection(setup_collection: ImageryCollection) -> None: +def test_write_collection(metadata: CollectionTitleMetadata) -> None: target = mkdtemp() + collectionObj = ImageryCollection(metadata) collection_target = os.path.join(target, "collection.json") - setup_collection.write_to(collection_target) + collectionObj.write_to(collection_target) collection = json.loads(read(collection_target)) rmtree(target) - assert collection["title"] == setup_collection.stac["title"] + assert collection["title"] == collectionObj.stac["title"] -def test_write_collection_special_chars(setup_collection: ImageryCollection) -> None: +def test_write_collection_special_chars(metadata: CollectionTitleMetadata) -> None: target = mkdtemp() title = "Manawatū-Whanganui" - setup_collection.stac["title"] = title + collectionObj = ImageryCollection(metadata) + collectionObj.stac["title"] = title collection_target = os.path.join(target, "collection.json") - setup_collection.write_to(collection_target) + collectionObj.write_to(collection_target) collection = json.loads(read(collection_target)) rmtree(target) assert collection["title"] == title -def test_add_providers(setup_collection: ImageryCollection) -> None: - collection = setup_collection +def test_add_providers(metadata: CollectionTitleMetadata) -> None: + collection = ImageryCollection(metadata) producer: Provider = {"name": "Maxar", "roles": [ProviderRole.PRODUCER]} collection.add_providers([producer]) assert {"name": "Maxar", "roles": ["producer"]} in collection.stac["providers"] -def test_default_provider_roles_are_kept() -> None: +def test_default_provider_roles_are_kept(metadata: CollectionTitleMetadata) -> None: # given we are adding a non default role to the default provider licensor: Provider = {"name": "Toitū Te Whenua Land Information New Zealand", "roles": [ProviderRole.LICENSOR]} producer: Provider = {"name": "Maxar", "roles": [ProviderRole.PRODUCER]} - title = "Test Urban Imagery" - description = "Test Urban Imagery Description" - collection = ImageryCollection(title, description, providers=[producer, licensor]) + collection = ImageryCollection(metadata, providers=[producer, licensor]) # then it adds the non default role to the existing default role list assert { @@ -149,12 +155,10 @@ def test_default_provider_roles_are_kept() -> None: ] -def test_default_provider_is_present() -> None: +def test_default_provider_is_present(metadata: CollectionTitleMetadata) -> None: # given adding a provider producer: Provider = {"name": "Maxar", "roles": [ProviderRole.PRODUCER]} - title = "Test Urban Imagery" - description = "Test Urban Imagery Description" - collection = ImageryCollection(title, description, providers=[producer]) + collection = ImageryCollection(metadata, providers=[producer]) # then the default provider is still present assert {"name": "Toitū Te Whenua Land Information New Zealand", "roles": ["host", "processor"]} in collection.stac[ diff --git a/scripts/stac/tests/generate_description_test.py b/scripts/stac/tests/generate_description_test.py new file mode 100644 index 000000000..a5491ace0 --- /dev/null +++ b/scripts/stac/tests/generate_description_test.py @@ -0,0 +1,86 @@ +from datetime import datetime +from typing import Generator, Tuple + +import pytest + +from scripts.stac.imagery.collection import ImageryCollection +from scripts.stac.imagery.metadata_constants import CollectionTitleMetadata + + +@pytest.fixture(name="metadata", autouse=True) +def setup() -> Generator[Tuple[CollectionTitleMetadata, CollectionTitleMetadata], None, None]: + metadata_auck: CollectionTitleMetadata = { + "category": "Rural Aerial Photos", + "region": "auckland", + "gsd": "0.3m", + "start_datetime": datetime(2023, 1, 1), + "end_datetime": datetime(2023, 2, 2), + "lifecycle": "completed", + "location": None, + "event": None, + "historic_survey_number": None, + } + metadata_hb: CollectionTitleMetadata = { + "category": "Rural Aerial Photos", + "region": "hawkes-bay", + "gsd": "0.3m", + "start_datetime": datetime(2023, 1, 1), + "end_datetime": datetime(2023, 2, 2), + "lifecycle": "completed", + "location": None, + "event": None, + "historic_survey_number": None, + } + yield (metadata_auck, metadata_hb) + + +def test_generate_description_imagery(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + collection = ImageryCollection(metadata_auck) + description = "Orthophotography within the Auckland region captured in the 2023 flying season." + assert collection.stac["description"] == description + + +def test_generate_description_elevation(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + metadata_auck["category"] = "DEM" + collection = ImageryCollection(metadata_auck) + description = "Digital Elevation Model within the Auckland region in 2023." + assert collection.stac["description"] == description + + +def test_generate_description_elevation_location_input( + metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata] +) -> None: + metadata_auck, _ = metadata + metadata_auck["category"] = "DEM" + metadata_auck["location"] = "Central" + collection = ImageryCollection(metadata_auck) + description = "Digital Elevation Model within the Auckland - Central region in 2023." + assert collection.stac["description"] == description + + +def test_generate_description_satellite_imagery(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + metadata_auck["category"] = "Satellite Imagery" + collection = ImageryCollection(metadata_auck) + description = "Satellite imagery within the Auckland region captured in 2023." + assert collection.stac["description"] == description + + +def test_generate_description_historic_imagery(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + metadata_auck["category"] = "Aerial Photos" + metadata_auck["historic_survey_number"] = "SNC8844" + collection = ImageryCollection(metadata_auck) + description = "Scanned aerial imagery within the Auckland region captured in 2023." + assert collection.stac["description"] == description + + +def test_generate_description_event(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + _, metadata_hb = metadata + metadata_hb["event"] = "Cyclone Gabrielle" + collection = ImageryCollection(metadata_hb) + description = "Orthophotography within the Hawke's Bay region captured in the 2023 flying season, \ +published as a record of the Cyclone Gabrielle event." + assert collection.stac["description"] == description diff --git a/scripts/stac/tests/generate_title_test.py b/scripts/stac/tests/generate_title_test.py new file mode 100644 index 000000000..1ec5d1477 --- /dev/null +++ b/scripts/stac/tests/generate_title_test.py @@ -0,0 +1,134 @@ +from datetime import datetime +from typing import Generator, Tuple + +import pytest + +from scripts.stac.imagery.collection import ImageryCollection +from scripts.stac.imagery.metadata_constants import CollectionTitleMetadata + + +@pytest.fixture(name="metadata", autouse=True) +def setup() -> Generator[Tuple[CollectionTitleMetadata, CollectionTitleMetadata], None, None]: + metadata_auck: CollectionTitleMetadata = { + "category": "Rural Aerial Photos", + "region": "auckland", + "gsd": "0.3m", + "start_datetime": datetime(2023, 1, 1), + "end_datetime": datetime(2023, 2, 2), + "lifecycle": "completed", + "location": None, + "event": None, + "historic_survey_number": None, + } + metadata_hb: CollectionTitleMetadata = { + "category": "Rural Aerial Photos", + "region": "hawkes-bay", + "gsd": "0.3m", + "start_datetime": datetime(2023, 1, 1), + "end_datetime": datetime(2023, 2, 2), + "lifecycle": "completed", + "location": None, + "event": None, + "historic_survey_number": None, + } + yield (metadata_auck, metadata_hb) + + +def test_generate_imagery_title(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + title = "Auckland 0.3m Rural Aerial Photos (2023)" + collection = ImageryCollection(metadata_auck) + assert collection.stac["title"] == title + + +def test_generate_dem_title(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + metadata_auck["category"] = "DEM" + collection = ImageryCollection(metadata_auck) + title = "Auckland LiDAR 0.3m DEM (2023)" + assert collection.stac["title"] == title + + +def test_generate_dsm_title(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + metadata_auck["category"] = "DSM" + collection = ImageryCollection(metadata_auck) + title = "Auckland LiDAR 0.3m DSM (2023)" + assert collection.stac["title"] == title + + +def test_generate_satellite_imagery_title(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + metadata_auck["category"] = "Satellite Imagery" + collection = ImageryCollection(metadata_auck) + title = "Auckland 0.3m Satellite Imagery (2023)" + assert collection.stac["title"] == title + + +def test_generate_historic_imagery_title(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + title = "Auckland 0.3m SNC8844 (2023)" + metadata_auck, _ = metadata + metadata_auck["category"] = "Aerial Photos" + metadata_auck["historic_survey_number"] = "SNC8844" + collection = ImageryCollection(metadata_auck) + assert collection.stac["title"] == title + + +def test_generate_title_long_date(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + metadata_auck["end_datetime"] = datetime(2024, 1, 1) + collection = ImageryCollection(metadata_auck) + title = "Auckland 0.3m Rural Aerial Photos (2023 - 2024)" + assert collection.stac["title"] == title + + +def test_generate_title_location(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + metadata_auck["location"] = "Ponsonby" + collection = ImageryCollection(metadata_auck) + title = "Ponsonby 0.3m Rural Aerial Photos (2023)" + assert collection.stac["title"] == title + + +def test_generate_title_event_imagery(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + _, metadata_hb = metadata + metadata_hb["event"] = "Cyclone Gabrielle" + collection = ImageryCollection(metadata_hb) + title = "Hawke's Bay 0.3m Cyclone Gabrielle Rural Aerial Photos (2023)" + assert collection.stac["title"] == title + + +def test_generate_title_event_elevation(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + _, metadata_hb = metadata + metadata_hb["category"] = "DSM" + metadata_hb["event"] = "Cyclone Gabrielle" + collection = ImageryCollection(metadata_hb) + title = "Hawke's Bay - Cyclone Gabrielle LiDAR 0.3m DSM (2023)" + assert collection.stac["title"] == title + + +def test_generate_title_event_satellite_imagery(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + _, metadata_hb = metadata + metadata_hb["category"] = "Satellite Imagery" + metadata_hb["event"] = "Cyclone Gabrielle" + collection = ImageryCollection(metadata_hb) + title = "Hawke's Bay 0.3m Cyclone Gabrielle Satellite Imagery (2023)" + assert collection.stac["title"] == title + + +def test_generate_dsm_title_preview(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + metadata_auck["category"] = "DSM" + metadata_auck["lifecycle"] = "preview" + collection = ImageryCollection(metadata_auck) + title = "Auckland LiDAR 0.3m DSM (2023) - preview" + assert collection.stac["title"] == title + + +def test_generate_imagery_title_empty_optional_str(metadata: Tuple[CollectionTitleMetadata, CollectionTitleMetadata]) -> None: + metadata_auck, _ = metadata + metadata_auck["location"] = "" + metadata_auck["event"] = "" + collection = ImageryCollection(metadata_auck) + title = "Auckland 0.3m Rural Aerial Photos (2023)" + assert collection.stac["title"] == title diff --git a/scripts/stac/tests/item_test.py b/scripts/stac/tests/item_test.py index a45b900e8..e8894687c 100644 --- a/scripts/stac/tests/item_test.py +++ b/scripts/stac/tests/item_test.py @@ -1,6 +1,9 @@ +from datetime import datetime + from scripts.files.files_helper import get_file_name_from_path from scripts.stac.imagery.collection import ImageryCollection from scripts.stac.imagery.item import ImageryItem +from scripts.stac.imagery.metadata_constants import CollectionTitleMetadata def test_imagery_stac_item(mocker) -> None: # type: ignore @@ -34,10 +37,19 @@ def test_imagery_stac_item(mocker) -> None: # type: ignore def test_imagery_add_collection(mocker) -> None: # type: ignore - title = "Collection" - description = "Collection Description" + metadata: CollectionTitleMetadata = { + "category": "Urban Aerial Photos", + "region": "auckland", + "gsd": "0.3m", + "start_datetime": datetime(2022, 2, 2), + "end_datetime": datetime(2022, 2, 2), + "lifecycle": "completed", + "location": None, + "event": None, + "historic_survey_number": None, + } ulid = "fake_ulid" - collection = ImageryCollection(title=title, description=description, collection_id=ulid) + collection = ImageryCollection(title_metadata=metadata, collection_id=ulid) path = "./test/BR34_5000_0302.tiff" id_ = get_file_name_from_path(path) From c3d9f233f3eebd53e83fe19be7ffcc0b11fe77a9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:40:33 +1300 Subject: [PATCH 10/15] release: 3.5.0 (#784) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- CHANGELOG.md | 14 ++++++++++++++ pyproject.toml | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65fb45fcc..ee587468f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## [3.5.0](https://github.com/linz/topo-imagery/compare/v3.4.0...v3.5.0) (2023-12-19) + + +### Features + +* Dependabot for Docker TDE-963 ([#746](https://github.com/linz/topo-imagery/issues/746)) ([250b82c](https://github.com/linz/topo-imagery/commit/250b82c196c1f3db601e9712d6834d6da28e6952)) +* Pin Docker image TDE-958 ([#744](https://github.com/linz/topo-imagery/issues/744)) ([de8a129](https://github.com/linz/topo-imagery/commit/de8a129d60bb7fc16f7872542ee93bc3ce3c9570)) +* title and description by arguments TDE-960 ([#757](https://github.com/linz/topo-imagery/issues/757)) ([dd3e282](https://github.com/linz/topo-imagery/commit/dd3e282660cc0e652667adf54fe3e5ad3b0b0bea)) + + +### Documentation + +* add explaination about collection provider tests ([#764](https://github.com/linz/topo-imagery/issues/764)) ([4e84901](https://github.com/linz/topo-imagery/commit/4e84901abe30ca0294ae570caef7fba511fe0ce5)) + ## [3.4.0](https://github.com/linz/topo-imagery/compare/v3.3.1...v3.4.0) (2023-11-23) diff --git a/pyproject.toml b/pyproject.toml index 2b9562da8..e7630d7f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore_missing_imports = true [tool.poetry] name = "topo-imagery" -version = "3.4.0" +version = "3.5.0" description = "A collection of scripts for processing imagery" authors = [ "Blayne Chard ", From 275a6658f2144f2a4cf2a690a2ea2b701aafc44b Mon Sep 17 00:00:00 2001 From: Megan Davidson <33814653+MDavidson17@users.noreply.github.com> Date: Wed, 20 Dec 2023 11:15:25 +1300 Subject: [PATCH 11/15] fix: typo in prod (#788) --- scripts/collection_from_items.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/collection_from_items.py b/scripts/collection_from_items.py index a432f80c9..0ecb5d7e9 100644 --- a/scripts/collection_from_items.py +++ b/scripts/collection_from_items.py @@ -98,7 +98,7 @@ def main() -> None: "gsd": arguments.gsd, "start_datetime": arguments.start_date, "end_datetime": arguments.end_date, - "lifecycle": arguments.lifecyle, + "lifecycle": arguments.lifecycle, "location": arguments.location, "event": arguments.event, "historic_survey_number": arguments.historic_survey_number, From 5c18471037cc28bf27ab54a2335dd7d70c49e86a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 20 Dec 2023 11:20:03 +1300 Subject: [PATCH 12/15] release: 3.5.1 (#789) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee587468f..283395ec2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [3.5.1](https://github.com/linz/topo-imagery/compare/v3.5.0...v3.5.1) (2023-12-19) + + +### Bug Fixes + +* typo in prod ([#788](https://github.com/linz/topo-imagery/issues/788)) ([275a665](https://github.com/linz/topo-imagery/commit/275a6658f2144f2a4cf2a690a2ea2b701aafc44b)) + ## [3.5.0](https://github.com/linz/topo-imagery/compare/v3.4.0...v3.5.0) (2023-12-19) diff --git a/pyproject.toml b/pyproject.toml index e7630d7f3..e978b135e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore_missing_imports = true [tool.poetry] name = "topo-imagery" -version = "3.5.0" +version = "3.5.1" description = "A collection of scripts for processing imagery" authors = [ "Blayne Chard ", From 4892b6b30dfc5cb7cc3a00a5eef52b6fde8447ee Mon Sep 17 00:00:00 2001 From: Megan Davidson <33814653+MDavidson17@users.noreply.github.com> Date: Thu, 21 Dec 2023 14:21:54 +1300 Subject: [PATCH 13/15] fix: minor changes to be consistent with current data (#790) --- scripts/stac/imagery/collection.py | 6 +++--- scripts/stac/tests/generate_title_test.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/stac/imagery/collection.py b/scripts/stac/imagery/collection.py index 0b55331f7..f23053dc5 100644 --- a/scripts/stac/imagery/collection.py +++ b/scripts/stac/imagery/collection.py @@ -213,7 +213,7 @@ def _title(self) -> str: if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year: date = str(self.title_metadata["start_datetime"].year) else: - date = f"{self.title_metadata['start_datetime'].year} - {self.title_metadata['end_datetime'].year}" + date = f"{self.title_metadata['start_datetime'].year}-{self.title_metadata['end_datetime'].year}" # determine dataset name if location: @@ -223,7 +223,7 @@ def _title(self) -> str: # determine if dataset is preview if self.title_metadata.get("lifecycle") == "preview": - preview = "- preview" + preview = "- Preview" else: preview = None @@ -268,7 +268,7 @@ def _description(self) -> str: if self.title_metadata["start_datetime"].year == self.title_metadata["end_datetime"].year: date = str(self.title_metadata["start_datetime"].year) else: - date = f"{self.title_metadata['start_datetime'].year} - {self.title_metadata['end_datetime'].year}" + date = f"{self.title_metadata['start_datetime'].year}-{self.title_metadata['end_datetime'].year}" # format location for metadata description if location: diff --git a/scripts/stac/tests/generate_title_test.py b/scripts/stac/tests/generate_title_test.py index 1ec5d1477..545685734 100644 --- a/scripts/stac/tests/generate_title_test.py +++ b/scripts/stac/tests/generate_title_test.py @@ -78,7 +78,7 @@ def test_generate_title_long_date(metadata: Tuple[CollectionTitleMetadata, Colle metadata_auck, _ = metadata metadata_auck["end_datetime"] = datetime(2024, 1, 1) collection = ImageryCollection(metadata_auck) - title = "Auckland 0.3m Rural Aerial Photos (2023 - 2024)" + title = "Auckland 0.3m Rural Aerial Photos (2023-2024)" assert collection.stac["title"] == title @@ -121,7 +121,7 @@ def test_generate_dsm_title_preview(metadata: Tuple[CollectionTitleMetadata, Col metadata_auck["category"] = "DSM" metadata_auck["lifecycle"] = "preview" collection = ImageryCollection(metadata_auck) - title = "Auckland LiDAR 0.3m DSM (2023) - preview" + title = "Auckland LiDAR 0.3m DSM (2023) - Preview" assert collection.stac["title"] == title From 83f88f9842fce44b8d389e5d6bf933cf1df5995d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 21 Dec 2023 15:10:03 +1300 Subject: [PATCH 14/15] release: 3.5.2 (#792) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 283395ec2..68f7b4947 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [3.5.2](https://github.com/linz/topo-imagery/compare/v3.5.1...v3.5.2) (2023-12-21) + + +### Bug Fixes + +* minor changes to be consistent with current data ([#790](https://github.com/linz/topo-imagery/issues/790)) ([4892b6b](https://github.com/linz/topo-imagery/commit/4892b6b30dfc5cb7cc3a00a5eef52b6fde8447ee)) + ## [3.5.1](https://github.com/linz/topo-imagery/compare/v3.5.0...v3.5.1) (2023-12-19) diff --git a/pyproject.toml b/pyproject.toml index e978b135e..00a18b2b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ ignore_missing_imports = true [tool.poetry] name = "topo-imagery" -version = "3.5.1" +version = "3.5.2" description = "A collection of scripts for processing imagery" authors = [ "Blayne Chard ", From 70d79b3d55678fd0a6fc6145005094eab27962b2 Mon Sep 17 00:00:00 2001 From: Victor Engmark Date: Tue, 9 Jan 2024 13:55:46 +1300 Subject: [PATCH 15/15] feat: Remove empty TIFFs after standardising TDE-964 (#767) * feat: Remove empty TIFFs after standardising TDE-964 * refactor: Check tile byte counts only in first page --- .github/workflows/format-tests.yml | 6 +++++ poetry.lock | 21 ++++++++++++++-- pyproject.toml | 1 + scripts/standardising.py | 38 ++++++++++++++++++----------- scripts/tests/__init__.py | 0 scripts/tests/data/empty.json | 6 +++++ scripts/tests/data/empty.tiff | Bin 0 -> 412 bytes 7 files changed, 56 insertions(+), 16 deletions(-) create mode 100644 scripts/tests/__init__.py create mode 100644 scripts/tests/data/empty.json create mode 100644 scripts/tests/data/empty.tiff diff --git a/.github/workflows/format-tests.yml b/.github/workflows/format-tests.yml index 41c545f8c..0a8ca39bb 100644 --- a/.github/workflows/format-tests.yml +++ b/.github/workflows/format-tests.yml @@ -62,3 +62,9 @@ jobs: run: | docker run -v "${HOME}/tmp/:/tmp/" topo-imagery python3 translate_ascii.py --from-file ./tests/data/elevation_ascii.json --target /tmp/ cmp --silent "${HOME}/tmp/elevation_ascii.tiff" ./scripts/tests/data/output/elevation_ascii.tiff + + - name: End to end test - Remove empty files + run: | + docker run -v "${HOME}/tmp-empty/:/tmp/" topo-imagery python3 standardise_validate.py --from-file=./tests/data/empty.json --preset=webp --target-epsg=2193 --source-epsg=2193 --target=/tmp --collection-id=123 --start-datetime=2023-01-01 --end-datetime=2023-01-01 + empty_target_directory="$(find "${HOME}/tmp-empty" -maxdepth 0 -type d -empty)" + [[ -n "$empty_target_directory" ]] diff --git a/poetry.lock b/poetry.lock index 222dfa5e3..24c0f53ba 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "arrow" @@ -1245,6 +1245,23 @@ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib- tests = ["coverage[toml]", "freezegun (>=0.2.8)", "pretend", "pytest (>=6.0)", "pytest-asyncio (>=0.17)", "simplejson"] typing = ["mypy", "rich", "twisted"] +[[package]] +name = "tifffile" +version = "2023.12.9" +description = "Read and write TIFF files" +optional = false +python-versions = ">=3.9" +files = [ + {file = "tifffile-2023.12.9-py3-none-any.whl", hash = "sha256:9b066e4b1a900891ea42ffd33dab8ba34c537935618b9893ddef42d7d422692f"}, + {file = "tifffile-2023.12.9.tar.gz", hash = "sha256:9dd1da91180a6453018a241ff219e1905f169384355cd89c9ef4034c1b46cdb8"}, +] + +[package.dependencies] +numpy = "*" + +[package.extras] +all = ["defusedxml", "fsspec", "imagecodecs (>=2023.8.12)", "lxml", "matplotlib", "zarr"] + [[package]] name = "toml" version = "0.10.2" @@ -1391,4 +1408,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10.6" -content-hash = "86b657da05531f0060c67a9095e2787898ecf226aa7b217fac07cc0f03e6622e" +content-hash = "3094394b0af9bcd26ae16eb23ef09a4a819682822245db488438a041f3c48f65" diff --git a/pyproject.toml b/pyproject.toml index 00a18b2b2..788d6c83e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ boto3 = "^1.28.70" linz-logger = "^0.11.0" py-multihash = "^2.0.1" shapely = "^2.0.1" +tifffile = "^2023.12.9" [tool.poetry.dev-dependencies] black = "^23.11.0" diff --git a/scripts/standardising.py b/scripts/standardising.py index 2f52d2e6c..68ccc10ed 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -5,6 +5,7 @@ from typing import List, Optional from linz_logger import get_log +from tifffile import TiffFile from scripts.aws.aws_helper import is_s3 from scripts.cli.cli_helper import TileFiles @@ -55,17 +56,21 @@ def run_standardising( get_log().info("standardising_start", gdalVersion=gdal_version, fileCount=len(todo)) with Pool(concurrency) as p: - standardized_tiffs = p.map( - partial( - standardising, - preset=preset, - source_epsg=source_epsg, - target_epsg=target_epsg, - target_output=target_output, - cutline=cutline, - ), - todo, - ) + standardized_tiffs = [ + entry + for entry in p.map( + partial( + standardising, + preset=preset, + source_epsg=source_epsg, + target_epsg=target_epsg, + target_output=target_output, + cutline=cutline, + ), + todo, + ) + if entry is not None + ] p.close() p.join() @@ -100,7 +105,7 @@ def standardising( target_epsg: str, cutline: Optional[str], target_output: str = "/tmp/", -) -> FileTiff: +) -> Optional[FileTiff]: """Apply transformations using GDAL to the source file. Args: @@ -189,5 +194,10 @@ def standardising( # Need GDAL to write to temporary location so no broken files end up in the done folder. run_gdal(command, input_file=input_file, output_file=standardized_working_path) - write(standardized_file_path, read(standardized_working_path), content_type=ContentType.GEOTIFF.value) - return tiff + with TiffFile(standardized_working_path) as file_handle: + if any(tile_byte_count != 0 for tile_byte_count in file_handle.pages.first.tags["TileByteCounts"].value): + write(standardized_file_path, read(standardized_working_path), content_type=ContentType.GEOTIFF.value) + return tiff + + get_log().info("Skipping empty output image", path=input_file, sourceEPSG=source_epsg, targetEPSG=target_epsg) + return None diff --git a/scripts/tests/__init__.py b/scripts/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/scripts/tests/data/empty.json b/scripts/tests/data/empty.json new file mode 100644 index 000000000..6f246a42a --- /dev/null +++ b/scripts/tests/data/empty.json @@ -0,0 +1,6 @@ +[ + { + "output": "BQ30_5000_1234", + "input": ["./tests/data/empty.tiff"] + } +] diff --git a/scripts/tests/data/empty.tiff b/scripts/tests/data/empty.tiff new file mode 100644 index 0000000000000000000000000000000000000000..f3498a17043c197172c3b7e49a6a0673355c3604 GIT binary patch literal 412 zcmebD)MDUZU|@|mD)kSb;*HEc+1Q6#nu659#N1{vy%#CBn1 z0owyKlED>;9gL)xubGE|87O}Xh?`n?7}$XHCm>$m&di_yq?LeXZfs{_umZA^fb7O4 zObjeQwvnkRkc0t02M7yB!^E-E4(qKPK{5|Egn(%fn-T0%b_S5)AU4psV48_#W4i%J zo?~M>CxZb48!&(v8TynNctG?-4i+%2sq7c!?qXzMV4$F{;Fnsi5S5yklbDyH5K@$w cS6r4_RGg~dn_85dSdw3)08&;1Q(waX0JR?`v;Y7A literal 0 HcmV?d00001