Skip to content

Commit

Permalink
Merge pull request #116 from wri/develop
Browse files Browse the repository at this point in the history
Develop to master
  • Loading branch information
jterry64 authored Feb 1, 2023
2 parents f27bc2d + c9ecba5 commit 93ff657
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 19 deletions.
2 changes: 1 addition & 1 deletion src/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ COPY . $WORKDIR
# installing dependencies to build package
RUN pip install . -t python

# change 31852
# change 31855

# Precompile all python packages and remove .py files
RUN find python/ -type f -name '*.pyc' -print0 | xargs -0 rm -rf
Expand Down
38 changes: 21 additions & 17 deletions src/datapump/jobs/geotrellis.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ def upload(self):
table.partitions.dict()
if table.partitions
else table.partitions,
table.longitude_field,
table.latitude_field,
longitude_field=table.longitude_field,
latitude_field=table.latitude_field,
)
else:
client.append(table.dataset, table.version, table.source_uri)
Expand All @@ -208,8 +208,8 @@ def upload(self):
table.cluster.dict() if table.cluster else table.cluster,
table.table_schema,
table.partitions.dict() if table.partitions else table.partitions,
table.longitude_field,
table.latitude_field,
longitude_field=table.longitude_field,
latitude_field=table.latitude_field,
)

def check_upload(self) -> JobStatus:
Expand Down Expand Up @@ -278,18 +278,20 @@ def _get_result_tables(self) -> List[AnalysisResultTable]:
bucket, prefix = get_s3_path_parts(result_path)

LOGGER.debug(f"Looking for analysis results at {result_path}")
resp = get_s3_client().list_objects_v2(Bucket=bucket, Prefix=prefix)

LOGGER.debug(resp)

if "Contents" not in resp:
raise AssertionError("No results found in S3")

keys = [
item["Key"]
for item in resp["Contents"]
if item["Key"].endswith(".csv") and "download" not in item["Key"]
]
paginator = get_s3_client().get_paginator("list_objects_v2")
pages = paginator.paginate(Bucket=bucket, Prefix=prefix)

keys = []
for page in pages:
if "Contents" not in page:
raise AssertionError("No results found in S3")

page_keys = [
item["Key"]
for item in page["Contents"]
if item["Key"].endswith(".csv") and "download" not in item["Key"]
]
keys += page_keys

result_tables = [
self._get_result_table(bucket, path, list(files))
Expand Down Expand Up @@ -551,6 +553,7 @@ def _get_field_type(self, field, is_whitelist=False):
or field.endswith("__perc")
or field.endswith("__year")
or field.endswith("__week")
or field.endswith("__decile")
or field == "adm1"
or field == "adm2"
):
Expand Down Expand Up @@ -861,7 +864,7 @@ def _configurations(self, worker_count: str) -> List[Dict[str, Any]]:
"spark.driver.cores": "1",
"spark.executor.cores": "1",
"spark.yarn.executor.memoryOverhead": "1G",
"spark.dynamicAllocation.enabled": "false"
"spark.dynamicAllocation.enabled": "false",
}

if self.geotrellis_version >= "2.0.0":
Expand Down Expand Up @@ -905,6 +908,7 @@ def _configurations(self, worker_count: str) -> List[Dict[str, Any]]:
class FireAlertsGeotrellisJob(GeotrellisJob):
alert_type: str
alert_sources: Optional[List[str]] = []
timeout_sec = 43200

FIRE_SOURCE_DEFAULT_PATHS: Dict[str, str] = {
"viirs": f"s3://{GLOBALS.s3_bucket_data_lake}/nasa_viirs_fire_alerts/v1/vector/epsg-4326/tsv",
Expand Down
1 change: 0 additions & 1 deletion src/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
"google-cloud-storage~=2.1.0",
"pyshp~=2.1.0",
"pydantic~=1.7.2",
"smart-open~=4.0.1",
"retry~=0.9.2",
], # noqa: E231
)

0 comments on commit 93ff657

Please sign in to comment.