Skip to content

Commit

Permalink
fix: download sidecar files, ignore 404
Browse files Browse the repository at this point in the history
  • Loading branch information
amfage committed Jan 15, 2024
1 parent 4d13ec8 commit 22421f1
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 17 deletions.
34 changes: 21 additions & 13 deletions scripts/files/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from scripts.aws.aws_helper import is_s3
from scripts.files import fs_local, fs_s3
from boto3 import client, resource


def write(destination: str, source: bytes, content_type: Optional[str] = None) -> str:
Expand Down Expand Up @@ -52,13 +53,12 @@ def exists(path: str) -> bool:
return fs_local.exists(path)


def write_all(inputs: List[str], target: str, optional_inputs: List[str] = [], concurrency: Optional[int] = 4) -> List[str]:
def write_all(inputs: List[str], target: str, concurrency: Optional[int] = 4) -> List[str]:
"""Writes list of files to target destination using multithreading.
Args:
inputs: list of files to read
target: target folder to write to
optional_inputs: list of optional files to read, e.g. sidecar files
concurrency: max thread pool workers
Returns:
Expand All @@ -80,18 +80,26 @@ def write_all(inputs: List[str], target: str, optional_inputs: List[str] = [], c
get_log().error("Missing Files", count=len(inputs) - len(written_tiffs))
raise Exception("Not all mandatory source files were written")

# get sidecar files
return written_tiffs


def write_sidecars(inputs: List[str], target: str, concurrency: Optional[int] = 4):
"""Writes list of files to target destination using multithreading.
Args:
inputs: list of files to read
target: target folder to write to
concurrency: max thread pool workers
Returns:
list of written file paths
"""
with ThreadPoolExecutor(max_workers=concurrency) as executor:
try:
futuress = {
executor.submit(
write, os.path.join(target, f"{os.path.basename(optional_input_)}"), read(optional_input_)
): optional_input_
for optional_input_ in optional_inputs
executor.submit(write, os.path.join(target, f"{os.path.basename(input_)}"), read(input_)): input_
for input_ in inputs
}
except:
get_log().warn("Failed Read-Write", error=future.exception())
for future in as_completed(futuress):
written_tiffs.append(future.result())

return written_tiffs
except resource("s3").meta.client.exceptions.NoSuchKey as nsk:
print(nsk)
pass
10 changes: 6 additions & 4 deletions scripts/standardising.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from scripts.cli.cli_helper import TileFiles
from scripts.files.file_tiff import FileTiff, FileTiffType
from scripts.files.files_helper import ContentType, is_tiff
from scripts.files.fs import exists, read, write, write_all
from scripts.files.fs import exists, read, write, write_all, write_sidecars
from scripts.gdal.gdal_bands import get_gdal_band_offset
from scripts.gdal.gdal_helper import get_gdal_version, run_gdal
from scripts.gdal.gdal_preset import (
Expand Down Expand Up @@ -132,9 +132,11 @@ def standardising(
standardized_working_path = os.path.join(tmp_path, standardized_file_name)
sidecars: List[str] = []
for extension in [".prj", ".tfw"]:
for input in files.inputs:
sidecars.append(f"{os.path.splitext(input)[0]}{extension}")
source_files = write_all(files.inputs, f"{tmp_path}/source/", optional_inputs=sidecars)
for i in files.inputs:
sidecars.append(f"{os.path.splitext(i)[0]}{extension}")
source_files = write_all(files.inputs, f"{tmp_path}/source/")
write_sidecars(sidecars, f"{tmp_path}/source/")
source_tiffs = []
source_tiffs = [file for file in source_files if is_tiff(file)]

vrt_add_alpha = True
Expand Down

0 comments on commit 22421f1

Please sign in to comment.