From c0d614f7d25c3b2bba12c5bdb9c00b22fba0477b Mon Sep 17 00:00:00 2001 From: manukala6 Date: Fri, 10 Jan 2025 08:20:50 -0800 Subject: [PATCH] GTC-2631 Use Version type --- src/Dockerfile | 2 +- src/datapump/jobs/geotrellis.py | 27 ++++++++++++--------------- src/setup.py | 1 + 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/Dockerfile b/src/Dockerfile index 692a22c..76414c9 100644 --- a/src/Dockerfile +++ b/src/Dockerfile @@ -15,7 +15,7 @@ RUN pip install . -t python # to change the hash of the file and get TF to realize it needs to be # redeployed. Ticket for a better solution: # https://gfw.atlassian.net/browse/GTC-1250 -# change 15 +# change 16 RUN yum install -y zip geos-devel diff --git a/src/datapump/jobs/geotrellis.py b/src/datapump/jobs/geotrellis.py index 734aeb9..99d64d2 100644 --- a/src/datapump/jobs/geotrellis.py +++ b/src/datapump/jobs/geotrellis.py @@ -7,6 +7,7 @@ from itertools import groupby from pathlib import Path from pprint import pformat +from packaging.version import Version from typing import Any, Dict, List, Optional, Tuple from ..clients.aws import get_emr_client, get_s3_client, get_s3_path_parts @@ -79,7 +80,7 @@ class GeotrellisJob(Job): features_1x1: str sync_version: Optional[str] = None feature_type: GeotrellisFeatureType = GeotrellisFeatureType.feature - geotrellis_version: str + geotrellis_version: Version sync: bool = False sync_type: Optional[SyncType] = None change_only: bool = False @@ -483,7 +484,7 @@ def _get_indices_and_cluster( raise e # schema change in version 2.1.4 - if self.geotrellis_version < "2.1.4": + if self.geotrellis_version < Version("2.1.4"): threshold_field = "umd_tree_cover_density__threshold" glad_conf_field = "is__confirmed_alert" glad_date_field = "alert__date" @@ -736,7 +737,7 @@ def _get_step(self) -> Dict[str, Any]: "cluster", "--class", "org.globalforestwatch.summarystats.SummaryMain", - f"{GLOBALS.geotrellis_jar_path}/treecoverloss-assembly-{self.geotrellis_version}.jar", + f"{GLOBALS.geotrellis_jar_path}/treecoverloss-assembly-{str(self.geotrellis_version)}.jar", ] # after 1.5, analysis is an argument instead of an option @@ -805,9 +806,14 @@ def _run_job_flow(self, name, instances, steps, applications, configurations): # Spark/Scala upgrade in version 2.0.0 emr_version = ( - GLOBALS.emr_version if self.geotrellis_version > "2.0.0" else "emr-6.1.0" + GLOBALS.emr_version if self.geotrellis_version > Version("2.0.0") else "emr-6.1.0" ) + # If using version 2.4.1 or earlier, use older GDAL version + bootstrap_path = f"s3://{GLOBALS.s3_bucket_pipeline}/geotrellis/bootstrap/gdal-3.8.3.sh" + if self.geotrellis_version < Version("2.4.1"): + bootstrap_path = f"s3://{GLOBALS.s3_bucket_pipeline}/geotrellis/bootstrap/gdal.sh" + request = { "Name": name, "ReleaseLabel": emr_version, @@ -821,7 +827,7 @@ def _run_job_flow(self, name, instances, steps, applications, configurations): { "Name": "Install GDAL", "ScriptBootstrapAction": { - "Path": f"s3://{GLOBALS.s3_bucket_pipeline}/geotrellis/bootstrap/gdal-3.8.3.sh" + "Path": bootstrap_path }, }, ], @@ -833,15 +839,6 @@ def _run_job_flow(self, name, instances, steps, applications, configurations): if GLOBALS.emr_service_role: request["ServiceRole"] = GLOBALS.emr_service_role - # If using version 2.4.1 or earlier, use older GDAL version - if self.geotrellis_version < "2.4.1": - request["BootstrapActions"] = { - "Name": "Install GDAL", - "ScriptBootstrapAction": { - "Path": f"s3://{GLOBALS.s3_bucket_pipeline}/geotrellis/bootstrap/gdal.sh", - }, - }, - LOGGER.info(f"Sending EMR request:\n{pformat(request)}") response = client.run_job_flow(**request) @@ -977,7 +974,7 @@ def _configurations(self, worker_count: int) -> List[Dict[str, Any]]: "spark.dynamicAllocation.enabled": "false", } - if self.geotrellis_version >= "2.0.0": + if self.geotrellis_version >= Version("2.0.0"): spark_defaults.update( { "spark.decommission.enabled": "true", diff --git a/src/setup.py b/src/setup.py index 13e1119..a1d5ed7 100644 --- a/src/setup.py +++ b/src/setup.py @@ -15,5 +15,6 @@ "pyshp~=2.3.1", "pydantic~=1.10.11", "retry~=0.9.2", + "packaging~=24.2" ], # noqa: E231 )