From a8b6c0daa5f8b3e55bde33d5c94accc59d6795b8 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Thu, 12 Sep 2024 11:42:15 -0400 Subject: [PATCH 1/3] Workaround GTC-2986 by checking if version is successfully created after getting 5XX response --- src/datapump/clients/data_api.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/datapump/clients/data_api.py b/src/datapump/clients/data_api.py index b91c7dc..672b794 100644 --- a/src/datapump/clients/data_api.py +++ b/src/datapump/clients/data_api.py @@ -129,16 +129,26 @@ def create_vector_version( def create_version( self, dataset: str, version: str, payload: Dict[str, Any] ) -> Dict[str, Any]: - - uri = f"{GLOBALS.data_api_uri}/dataset/{dataset}/{version}" - return self._send_request(ValidMethods.put, uri, payload)["data"] + try: + uri = f"{GLOBALS.data_api_uri}/dataset/{dataset}/{version}" + return self._send_request(ValidMethods.put, uri, payload)["data"] + except DataApiResponseError as e: + # Workaround for GTC-2986 + # Getting a 500 response when creating version, but version is still created + # causing a 400 response on subsequent retries since we're trying to PUT + # an already existing version. + # For now, let's just return the version if it exists. + # Otherwise, propagate original exception. + try: + return self.get_version(dataset, version) + except DataApiResponseError: + raise e def create_aux_asset( self, dataset: str, version: str, payload: Dict[str, Any] ) -> Dict[str, Any]: - uri = f"{GLOBALS.data_api_uri}/dataset/{dataset}/{version}/assets" - return self._send_request(ValidMethods.post, uri, payload)["data"] + return self.get_version(ValidMethods.post, uri, payload)["data"] def append( self, dataset: str, version: str, source_uris: List[str] From e96f59b7008b1faaa85dfdc8af5724ea91ef2836 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Mon, 16 Sep 2024 11:13:13 -0700 Subject: [PATCH 2/3] Only sync past day of subscriptions instead of default 2 days to avoid timeout --- src/datapump/sync/rw_areas.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/datapump/sync/rw_areas.py b/src/datapump/sync/rw_areas.py index f5f2d91..6567fb3 100644 --- a/src/datapump/sync/rw_areas.py +++ b/src/datapump/sync/rw_areas.py @@ -3,6 +3,7 @@ import os import traceback from contextlib import contextmanager +from datetime import datetime, timedelta from typing import Any, Dict, Iterator, List, Optional, Set, Tuple import requests @@ -82,8 +83,10 @@ def get_pending_areas() -> List[Any]: LOGGER.info(f"Using token {token()} for {api_prefix()} API") headers: Dict[str, str] = {"Authorization": f"Bearer {token()}"} - # Area sync - sync_url: str = f"https://{api_prefix()}-api.globalforestwatch.org/v2/area/sync" + # For some reason we are the only place calling this RW API to sync + # new subscriptions with areas. See GTC-2987 to fix this workflow. + yesterday = (datetime.now() - timedelta(1)).strftime("%Y-%m-%d") + sync_url: str = f"https://{api_prefix()}-api.globalforestwatch.org/v2/area/sync?startDate={yesterday}" sync_resp = requests.post(sync_url, headers=headers) if sync_resp.status_code != 200: From e35fc954b573733ddb6340376cf781d4a2056db9 Mon Sep 17 00:00:00 2001 From: Justin Terry Date: Mon, 16 Sep 2024 11:19:10 -0700 Subject: [PATCH 3/3] Revert bad change --- src/datapump/clients/data_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datapump/clients/data_api.py b/src/datapump/clients/data_api.py index 672b794..c14cb94 100644 --- a/src/datapump/clients/data_api.py +++ b/src/datapump/clients/data_api.py @@ -148,7 +148,7 @@ def create_aux_asset( self, dataset: str, version: str, payload: Dict[str, Any] ) -> Dict[str, Any]: uri = f"{GLOBALS.data_api_uri}/dataset/{dataset}/{version}/assets" - return self.get_version(ValidMethods.post, uri, payload)["data"] + return self._send_request(ValidMethods.post, uri, payload)["data"] def append( self, dataset: str, version: str, source_uris: List[str]