Skip to content

Commit

Permalink
GTC-2631 Use Version type
Browse files Browse the repository at this point in the history
  • Loading branch information
manukala6 committed Jan 10, 2025
1 parent 2d27842 commit c0d614f
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN pip install . -t python
# to change the hash of the file and get TF to realize it needs to be
# redeployed. Ticket for a better solution:
# https://gfw.atlassian.net/browse/GTC-1250
# change 15
# change 16

RUN yum install -y zip geos-devel

Expand Down
27 changes: 12 additions & 15 deletions src/datapump/jobs/geotrellis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from itertools import groupby
from pathlib import Path
from pprint import pformat
from packaging.version import Version
from typing import Any, Dict, List, Optional, Tuple

from ..clients.aws import get_emr_client, get_s3_client, get_s3_path_parts
Expand Down Expand Up @@ -79,7 +80,7 @@ class GeotrellisJob(Job):
features_1x1: str
sync_version: Optional[str] = None
feature_type: GeotrellisFeatureType = GeotrellisFeatureType.feature
geotrellis_version: str
geotrellis_version: Version
sync: bool = False
sync_type: Optional[SyncType] = None
change_only: bool = False
Expand Down Expand Up @@ -483,7 +484,7 @@ def _get_indices_and_cluster(
raise e

# schema change in version 2.1.4
if self.geotrellis_version < "2.1.4":
if self.geotrellis_version < Version("2.1.4"):
threshold_field = "umd_tree_cover_density__threshold"
glad_conf_field = "is__confirmed_alert"
glad_date_field = "alert__date"
Expand Down Expand Up @@ -736,7 +737,7 @@ def _get_step(self) -> Dict[str, Any]:
"cluster",
"--class",
"org.globalforestwatch.summarystats.SummaryMain",
f"{GLOBALS.geotrellis_jar_path}/treecoverloss-assembly-{self.geotrellis_version}.jar",
f"{GLOBALS.geotrellis_jar_path}/treecoverloss-assembly-{str(self.geotrellis_version)}.jar",
]

# after 1.5, analysis is an argument instead of an option
Expand Down Expand Up @@ -805,9 +806,14 @@ def _run_job_flow(self, name, instances, steps, applications, configurations):

# Spark/Scala upgrade in version 2.0.0
emr_version = (
GLOBALS.emr_version if self.geotrellis_version > "2.0.0" else "emr-6.1.0"
GLOBALS.emr_version if self.geotrellis_version > Version("2.0.0") else "emr-6.1.0"
)

# If using version 2.4.1 or earlier, use older GDAL version
bootstrap_path = f"s3://{GLOBALS.s3_bucket_pipeline}/geotrellis/bootstrap/gdal-3.8.3.sh"
if self.geotrellis_version < Version("2.4.1"):
bootstrap_path = f"s3://{GLOBALS.s3_bucket_pipeline}/geotrellis/bootstrap/gdal.sh"

request = {
"Name": name,
"ReleaseLabel": emr_version,
Expand All @@ -821,7 +827,7 @@ def _run_job_flow(self, name, instances, steps, applications, configurations):
{
"Name": "Install GDAL",
"ScriptBootstrapAction": {
"Path": f"s3://{GLOBALS.s3_bucket_pipeline}/geotrellis/bootstrap/gdal-3.8.3.sh"
"Path": bootstrap_path
},
},
],
Expand All @@ -833,15 +839,6 @@ def _run_job_flow(self, name, instances, steps, applications, configurations):
if GLOBALS.emr_service_role:
request["ServiceRole"] = GLOBALS.emr_service_role

# If using version 2.4.1 or earlier, use older GDAL version
if self.geotrellis_version < "2.4.1":
request["BootstrapActions"] = {
"Name": "Install GDAL",
"ScriptBootstrapAction": {
"Path": f"s3://{GLOBALS.s3_bucket_pipeline}/geotrellis/bootstrap/gdal.sh",
},
},

LOGGER.info(f"Sending EMR request:\n{pformat(request)}")

response = client.run_job_flow(**request)
Expand Down Expand Up @@ -977,7 +974,7 @@ def _configurations(self, worker_count: int) -> List[Dict[str, Any]]:
"spark.dynamicAllocation.enabled": "false",
}

if self.geotrellis_version >= "2.0.0":
if self.geotrellis_version >= Version("2.0.0"):
spark_defaults.update(
{
"spark.decommission.enabled": "true",
Expand Down
1 change: 1 addition & 0 deletions src/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@
"pyshp~=2.3.1",
"pydantic~=1.10.11",
"retry~=0.9.2",
"packaging~=24.2"
], # noqa: E231
)

0 comments on commit c0d614f

Please sign in to comment.