From f4af022a31e69e51d57c63fd33b6923349eb4735 Mon Sep 17 00:00:00 2001 From: annajungbluth Date: Fri, 15 Mar 2024 10:38:45 +0000 Subject: [PATCH] removed duplicate scripts, and reorganized msg folder --- .../multi-sat/1.0-image-visualizations.ipynb | 2 +- rs_tools/_src/data/goes/download.py | 4 +- rs_tools/_src/data/modis/__init__.py | 3 +- rs_tools/_src/data/modis/download.py | 2 +- rs_tools/_src/data/msg/__init__.py | 0 .../_src/data/msg/download.py | 0 scripts/goes-download.py | 421 ------------------ scripts/modis-download.py | 375 ---------------- 8 files changed, 6 insertions(+), 801 deletions(-) create mode 100644 rs_tools/_src/data/msg/__init__.py rename scripts/msg-download.py => rs_tools/_src/data/msg/download.py (100%) delete mode 100644 scripts/goes-download.py delete mode 100644 scripts/modis-download.py diff --git a/notebooks/dev/multi-sat/1.0-image-visualizations.ipynb b/notebooks/dev/multi-sat/1.0-image-visualizations.ipynb index ae81576..02ae542 100644 --- a/notebooks/dev/multi-sat/1.0-image-visualizations.ipynb +++ b/notebooks/dev/multi-sat/1.0-image-visualizations.ipynb @@ -4064,7 +4064,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.undefined" } }, "nbformat": 4, diff --git a/rs_tools/_src/data/goes/download.py b/rs_tools/_src/data/goes/download.py index 5ec5e01..6d4bb7c 100644 --- a/rs_tools/_src/data/goes/download.py +++ b/rs_tools/_src/data/goes/download.py @@ -330,7 +330,7 @@ def _check_input_processing_level(processing_level: str) -> bool: return True else: msg = "Unrecognized processing level" - msg += f"\nNeeds to be 'L1b' or 'L2'. Others are not yet test" + msg += f"\nNeeds to be 'L1b' or 'L2'. Others are not yet tested" raise ValueError(msg) @@ -393,7 +393,7 @@ def _check_save_dir(save_dir: str) -> bool: return True else: try: - os.mkdir(save_dir) + os.makedirs(save_dir) return True except: msg = "Save directory does not exist" diff --git a/rs_tools/_src/data/modis/__init__.py b/rs_tools/_src/data/modis/__init__.py index 2d5793a..97f99a8 100644 --- a/rs_tools/_src/data/modis/__init__.py +++ b/rs_tools/_src/data/modis/__init__.py @@ -3,7 +3,7 @@ from pathlib import Path from datetime import datetime - +# TODO: Expand mapping to other resolutions (250m, 500m) MODIS_NAME_TO_ID= dict( terra="MOD021KM", terra_geo="MOD03", @@ -13,6 +13,7 @@ aqua_cloud="MYD35_L2", ) +# TODO: Expand mapping to other resolutions (250m, 500m) MODIS_ID_TO_NAME = dict( MYD021KM="aqua", MYDO3="aqua_geo", diff --git a/rs_tools/_src/data/modis/download.py b/rs_tools/_src/data/modis/download.py index 72adc46..c044ad9 100644 --- a/rs_tools/_src/data/modis/download.py +++ b/rs_tools/_src/data/modis/download.py @@ -327,7 +327,7 @@ def _check_save_dir(save_dir: str) -> bool: return True else: try: - os.mkdir(save_dir) + os.makedirs(save_dir) return True except: msg = "Save directory does not exist" diff --git a/rs_tools/_src/data/msg/__init__.py b/rs_tools/_src/data/msg/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/msg-download.py b/rs_tools/_src/data/msg/download.py similarity index 100% rename from scripts/msg-download.py rename to rs_tools/_src/data/msg/download.py diff --git a/scripts/goes-download.py b/scripts/goes-download.py deleted file mode 100644 index 57afb09..0000000 --- a/scripts/goes-download.py +++ /dev/null @@ -1,421 +0,0 @@ -from typing import Optional, List, Union -import os -import xarray as xr -import matplotlib.pyplot as plt -import pandas as pd -import numpy as np -import tqdm -import typer -from loguru import logger -from datetime import datetime, timedelta - -from goes2go import GOES -from goes2go.data import goes_nearesttime - -# The cadence depends on the measurement scale -# The full disk is measured every 15 mins -# CONUS is measured every 5 mins -# Mesoscale is measured every 1 min -DOMAIN_TIMESTEP = { - 'F': 15, - 'C': 5, - 'M': 1 -} - - -def goes_download( - start_date: str, - end_date: Optional[str]=None, - start_time: Optional[str]='00:00:00', - end_time: Optional[str]='23:59:00', - daily_window_t0: Optional[str]='00:00:00', - daily_window_t1: Optional[str]='23:59:00', - time_step: Optional[str]=None, - satellite_number: int=16, - save_dir: Optional[str] = ".", - instrument: str = "ABI", - processing_level: str = 'L1b', - data_product: str = 'Rad', - domain: str = 'F', - bands: Optional[str] = "all", - check_bands_downloaded: bool = False, -): - """ - Downloads GOES satellite data for a specified time period and set of bands. - - Args: - start_date (str): The start date of the data download in the format 'YYYY-MM-DD'. - end_date (str, optional): The end date of the data download in the format 'YYYY-MM-DD'. If not provided, the end date will be the same as the start date. - start_time (str, optional): The start time of the data download in the format 'HH:MM:SS'. Default is '00:00:00'. - end_time (str, optional): The end time of the data download in the format 'HH:MM:SS'. Default is '23:59:00'. - daily_window_t0 (str, optional): The start time of the daily window in the format 'HH:MM:SS'. Default is '00:00:00'. Used if e.g., only day/night measurements are required. - daily_window_t1 (str, optional): The end time of the daily window in the format 'HH:MM:SS'. Default is '23:59:00'. Used if e.g., only day/night measurements are required. - time_step (str, optional): The time step between each data download in the format 'HH:MM:SS'. If not provided, the default is 1 hour. - satellite_number (int): The satellite number. Default is 16. - save_dir (str, optional): The directory where the downloaded files will be saved. Default is the current directory. - instrument (str): The instrument name. Default is 'ABI'. - processing_level (str): The processing level of the data. Default is 'L1b'. - data_product (str): The data product to download. Default is 'Rad'. - domain (str): The domain of the data. Default is 'F' - Full Disk. - bands (str, optional): The bands to download. Default is 'all'. - check_bands_downloaded (bool, optional): Whether to check if all bands were successfully downloaded for each time step. Default is False. - - Returns: - list: A list of file paths for the downloaded files. - - Examples: - # ========================= - # GOES LEVEL 1B Test Cases - # ========================= - # custom day - python scripts/goes-download.py 2020-10-01 --end-date 2020-10-01 - # custom day + end points - python scripts/goes-download.py 2020-10-01 --end-date 2020-10-01 --start-time 00:00:00 --end-time 23:00:00 - # custom day + end points + time window - python scripts/goes-download.py 2020-10-01 --end-date 2020-10-01 --start-time 00:00:00 --end-time 23:00:00 --daily-window-t0 08:30:00 --daily-window-t1 21:30:00 - # custom day + end points + time window + timestep - python scripts/goes-download.py 2020-10-01 --end-date 2020-10-01 --start-time 00:00:00 --end-time 23:00:00 --daily-window-t0 08:30:00 --daily-window-t1 21:30:00 --time-step 06:00:00 - # =================================== - # GOES LEVEL 2 CLOUD MASK Test Cases - # =================================== - python scripts/goes-download.py 2020-10-01 --start-time 10:00:00 --end-time 11:00:00 --processing-level L2 --data-product ACM - - # ==================== - # FAILURE TEST CASES - # ==================== - python scripts/goes-download.py 2018-10-01 --end-date 2018-10-01 --daily-window-t0 17:00:00 --daily-window-t1 17:14:00 --time-step 00:15:00 --save-dir /home/juanjohn/data/ - python scripts/goes-download.py 2018-10-01 --end-date 2018-10-01 --daily-window-t0 17:00:00 --daily-window-t1 17:14:00 --time-step 00:15:00 --save-dir /home/juanjohn/data/ --check-bands-downloaded - """ - - # run checks - # check satellite details - _check_input_processing_level(processing_level=processing_level) - _check_instrument(instrument=instrument) - _check_satellite_number(satellite_number=satellite_number) - logger.info(f"Satellite Number: {satellite_number}") - _check_domain(domain=domain) - # compile bands - if processing_level == 'L1b': - list_of_bands = _check_bands(bands=bands) - elif processing_level == 'L2': - list_of_bands = None - else: - raise ValueError('bands not correctly specified for given processing level') - # check data product - data_product = f"{instrument}-{processing_level}-{data_product}{domain}" - logger.info(f"Data Product: {data_product}") - _check_data_product_name(data_product=data_product) - - # check start/end dates/times - if end_date is None: - end_date = start_date - - # combine date and time information - start_datetime_str = start_date + ' ' + start_time - end_datetime_str = end_date + ' ' + end_time - _check_datetime_format(start_datetime_str, end_datetime_str) - # datetime conversion - start_datetime = datetime.strptime(start_datetime_str, "%Y-%m-%d %H:%M:%S") - end_datetime = datetime.strptime(end_datetime_str, "%Y-%m-%d %H:%M:%S") - _check_start_end_times(start_datetime=start_datetime, end_datetime=end_datetime) - - # define time step for data query - if time_step is None: - time_step = '1:00:00' - logger.info("No timedelta specified. Default is 1 hour.") - _check_timedelta_format(time_delta=time_step) - - # convert str to datetime object - hours, minutes, seconds = convert_str2time(time=time_step) - time_delta = timedelta(hours=hours, minutes=minutes, seconds=seconds) - - _check_timedelta(time_delta=time_delta, domain=domain) - - # Compile list of dates/times - list_of_dates = np.arange(start_datetime, end_datetime + time_delta, time_delta).astype(datetime) - print('Times to check: ',list_of_dates[0], list_of_dates[-1]) - - window_date = '1991-10-19' # Add arbitrary date to convert into proper datetime object - start_datetime_window_str = window_date + ' ' + daily_window_t0 - end_datetime_window_str = window_date + ' ' + daily_window_t1 - _check_start_end_times(start_datetime=start_datetime, end_datetime=end_datetime) - # datetime conversion - daily_window_t0_datetime = datetime.strptime(start_datetime_window_str, "%Y-%m-%d %H:%M:%S") - daily_window_t1_datetime = datetime.strptime(end_datetime_window_str, "%Y-%m-%d %H:%M:%S") - _check_start_end_times(start_datetime=daily_window_t0_datetime, end_datetime=daily_window_t1_datetime) - - # filter function - check that query times fall within desired time window - def is_in_between(date): - return daily_window_t0_datetime.time() <= date.time() <= daily_window_t1_datetime.time() - - # compile new list of dates within desired time window - list_of_dates = list(filter(is_in_between, list_of_dates)) - - # check if save_dir is valid before attempting to download - _check_save_dir(save_dir=save_dir) - - files = [] - - # create progress bars for dates and bands - pbar_time = tqdm.tqdm(list_of_dates) - - for itime in pbar_time: - - pbar_time.set_description(f"Time - {itime}") - - if processing_level == 'L1b': - sub_files_list = _goes_level1_download( - time=itime, - list_of_bands=list_of_bands, - satellite_number=satellite_number, - data_product=data_product, - domain=domain, - save_dir=save_dir, - check_bands_downloaded=check_bands_downloaded, - ) - elif processing_level == 'L2': - sub_files_list = _goes_level2_download( - time=itime, - satellite_number=satellite_number, - data_product=data_product, - domain=domain, - save_dir=save_dir) - else: - raise ValueError(f"Unrecognized processing level: {processing_level}") - - files += sub_files_list - - return files - - -def _goes_level2_download(time, - satellite_number, - data_product, - domain, - save_dir): - - try: - ifile: pd.DataFrame = goes_nearesttime( - attime=time, - within=pd.to_timedelta(15, 'm'), - satellite=satellite_number, - product=data_product, - domain=domain, - return_as="filelist", - save_dir=save_dir, - ) - # extract filepath from GOES download pandas dataframe - filepath: str = os.path.join(save_dir, ifile.file[0]) - return [filepath] - except IndexError: - logger.info(f"Data could not be downloaded for time step {time}.") - return [] - -def _goes_level1_download(time, - list_of_bands, - satellite_number, - data_product, - domain, - save_dir, - check_bands_downloaded - ): - - sub_files_list: list[str] = [] - pbar_bands = tqdm.tqdm(list_of_bands) - - - for iband in pbar_bands: - - pbar_bands.set_description(f"Band - {iband}") - # download file - try: - ifile: pd.DataFrame = goes_nearesttime( - attime=time, - within=pd.to_timedelta(15, 'm'), - satellite=satellite_number, - product=data_product, - domain=domain, - bands=iband, - return_as="filelist", - save_dir=save_dir, - ) - # extract filepath from GOES download pandas dataframe - filepath: str = os.path.join(save_dir, ifile.file[0]) - sub_files_list += [filepath] - - except IndexError: - logger.info(f"Band {iband} could not be downloaded for time step {time}.") - if check_bands_downloaded: - logger.info(f"Deleting all other bands for time step {time}.") - delete_list_of_files(sub_files_list) # delete partially downloaded bands - return [] - - return sub_files_list - - -def _check_datetime_format(start_datetime_str: str, end_datetime_str: str) -> bool: - try: - datetime.strptime(start_datetime_str, "%Y-%m-%d %H:%M:%S") - datetime.strptime(end_datetime_str, "%Y-%m-%d %H:%M:%S") - return True - except Exception as e: - msg = "Please check date/time format" - msg += "\nExpected date format: %Y-%m-%d" - msg += "\nExpected time format: %H:%M:%S" - raise SyntaxError(msg) - - -def _check_start_end_times(start_datetime: datetime, end_datetime: datetime) -> bool: - """ check end_datetime is after start_datetime """ - if start_datetime < end_datetime: - return True - else: - msg = "Start datetime must be before end datetime\n" - msg += f"This does not hold for start = {str(start_datetime)} and end = {str(end_datetime)}" - raise ValueError(msg) - -def _check_timedelta_format(time_delta: str) -> bool: - try: - time_list = time_delta.split(":") - assert len(time_list) == 3 - assert 0 <= int(time_list[0]) # Check that hours is >= 0, and convertible to int - assert 0 <= int(time_list[1]) < 60 # Check that minutes < 60, and convertible to int - assert 0 <= int(time_list[2]) < 60 # Check that seconds < 60, and convertible to int - - except Exception as e: - msg = "Please check time step format" - msg += "\nExpected time format: %H:%M:%S" - raise SyntaxError(msg) - -def _check_timedelta(time_delta: datetime, domain: str) -> bool: - if time_delta.days > 0: return True - - if time_delta.seconds >= DOMAIN_TIMESTEP[domain] * 60: return True - - msg = "Time delta must not be smaller than the time resolution of the data\n" - msg += f"Time delta {str(time_delta)} is too small for domain {domain}\n" - msg += f"The minimum required time delta is {DOMAIN_TIMESTEP[domain]} min" - raise ValueError(msg) - -def _check_domain(domain: str) -> bool: - """checks domain GOES data""" - if str(domain) in ["F", "C", "M"]: - return True - else: - msg = "Unrecognized domain" - msg += f"\nNeeds to be 'F', 'C', 'M'." - msg += "\nOthers are not yet implemented" - raise ValueError(msg) - - -def _check_satellite_number(satellite_number: int) -> bool: - """checks satellite number for GOES data""" - if satellite_number in [16, 17, 18]: - return True - else: - msg = "Unrecognized satellite number level" - msg += f"\nNeeds to be 16, 17, or 18." - msg += "\nOthers are not yet implemented" - msg += f"\nInput: {satellite_number}" - raise ValueError(msg) - - -def _check_input_processing_level(processing_level: str) -> bool: - """checks processing level for GOES data""" - if processing_level in ["L1b", "L2"]: - return True - else: - msg = "Unrecognized processing level" - msg += f"\nNeeds to be 'L1b' or 'L2'. Others are not yet tested" - raise ValueError(msg) - - -def _check_instrument(instrument: str) -> bool: - """checks instrument for GOES data.""" - if instrument in ["ABI"]: - return True - else: - msg = "Unrecognized instrument" - msg += f"\nNeeds to be 'ABI'. Others are not yet tested" - raise ValueError(msg) - -def _check_data_product_name(data_product: str) -> bool: - if data_product in ['ABI-L1b-RadF', 'ABI-L1b-RadM', 'ABI-L1b-RadC', 'ABI-L1b-Rad', - 'ABI-L2-ACMF', 'ABI-L2-ACMM', 'ABI-L2-ACMC']: - return True - else: - msg = f"Unrecognized data product {data_product}" - raise ValueError(msg) - -def _check_bands(bands: str) -> List[int]: - if bands in ['all']: - list_of_bands = list(np.arange(1, 17)) - return list_of_bands - else: - try: - list_of_bands = list(set(map(int, bands.split(' ')))) - logger.debug(f"List of str Bands to Ints: {list_of_bands}") - - criteria = lambda x: 17 > x > 0 - result = list(map(criteria, list_of_bands)) - logger.debug(f"Result from criteria: {result}") - - assert sum(result) == len(list_of_bands) - return list_of_bands - except Exception as e: - msg = "Unrecognized bands" - msg += f"\nNeeds to be 'all' or string of valid bands separated by spaces" - msg += '\n(e.g., "13 14", \'1 2 3\').' - raise ValueError(msg) - -def convert_str2time(time: str): - time_list = time.split(":") - hours = int(time_list[0]) - minutes = int(time_list[1]) - seconds = int(time_list[2]) - - return hours, minutes, seconds - -def delete_list_of_files(file_list: List[str]) -> None: - for file_path in file_list: - try: - os.remove(file_path) - except OSError as e: - print(f"Error: {file_path} : {e.strerror}") - -def _check_save_dir(save_dir: str) -> bool: - """ check if save_dir exists """ - if os.path.isdir(save_dir): - return True - else: - try: - os.makedirs(save_dir) - return True - except: - msg = "Save directory does not exist" - msg += f"\nReceived: {save_dir}" - msg += "\nCould not create directory" - raise ValueError(msg) - -def main(input: str): - - print(input) - -if __name__ == '__main__': - typer.run(goes_download) - - """ - # custom day - python rs_tools/scripts/goes-download.py 2020-10-01 --end-date 2020-10-01 - # custom day + end points - python rs_tools/scripts/goes-download.py 2020-10-01 --end-date 2020-10-01 --start-time 00:00:00 --end-time 23:00:00 - # custom day + end points + time window - python rs_tools/scripts/goes-download.py 2020-10-01 --end-date 2020-10-01 --start-time 00:00:00 --end-time 23:00:00 --daily-window-t0 08:30:00 --daily-window-t1 21:30:00 - # custom day + end points + time window + time step - python rs_tools/scripts/goes-download.py 2020-10-01 --end-date 2020-10-01 --start-time 00:00:00 --end-time 23:00:00 --daily-window-t0 08:30:00 --daily-window-t1 21:30:00 --time-step 06:00:00 - # ==================== - # FAILURE TEST CASES - # ==================== - python scripts/goes-download.py 2018-10-01 --end-date 2018-10-01 --daily-window-t0 17:00:00 --daily-window-t1 17:14:00 --time-step 00:15:00 --save-dir /home/juanjohn/data/ - python scripts/goes-download.py 2018-10-01 --end-date 2018-10-01 --daily-window-t0 17:00:00 --daily-window-t1 17:14:00 --time-step 00:15:00 --save-dir /home/juanjohn/data/ --check-bands-downloaded - """ diff --git a/scripts/modis-download.py b/scripts/modis-download.py deleted file mode 100644 index 5d44a0b..0000000 --- a/scripts/modis-download.py +++ /dev/null @@ -1,375 +0,0 @@ -from typing import Optional, List, Union -import os -import xarray as xr -import matplotlib.pyplot as plt -import pandas as pd -import numpy as np -import tqdm -import typer -from loguru import logger -from datetime import datetime, timedelta -import earthaccess - -def modis_download( - start_date: str, - end_date: Optional[str]=None, - start_time: Optional[str]='00:00:00', # used for daily window - end_time: Optional[str]='23:59:00', # used for daily window - day_step: Optional[int]=1, - satellite: str='Terra', - save_dir: Optional[str]=".", - processing_level: str = 'L1b', - resolution: str = "1KM", - bounding_box: Optional[tuple[float, float, float, float]]=(-180, -90, 180, 90), # TODO: Add polygon option - earthdata_username: Optional[str]="", - earthdata_password: Optional[str]="", - day_night_flag: Optional[str]=None, - identifier: Optional[str] = "02" -): - """ - Downloads MODIS satellite data for a specified time period and location. - - Args: - start_date (str): The start date of the data download in the format 'YYYY-MM-DD'. - end_date (str, optional): The end date of the data download in the format 'YYYY-MM-DD'. If not provided, the end date will be the same as the start date. - start_time (str, optional): The start time of the data download in the format 'HH:MM:SS'. Default is '00:00:00'. - end_time (str, optional): The end time of the data download in the format 'HH:MM:SS'. Default is '23:59:00'. - day_step (int, optional): The time step (in days) between downloads. This is to allow the user to download data every e.g. 2 days. If not provided, the default is daily downloads. - satellite (str): The satellite. Options are "Terra" and "Aqua", with "Terra" as default. - save_dir (str, optional): The directory where the downloaded files will be saved. Default is the current directory. - processing_level (str): The processing level of the data. Default is 'L1b'. - resolution (str): The resolution of the data. Options are "QKM" (250m), "HKM (500m), "1KM" (1000m), with "1KM" as default. Not all bands are measured at all resolutions. - bounding_box (tuple, optional): The region to be downloaded. - earthdata_username (str): Username associated with the NASA Earth Data login. Required for download. - earthdata_password (str): Password associated with the NASA Earth Data login. Required for download. - - Returns: - list: A list of file paths for the downloaded files. - - Examples: - # ========================= - # MODIS LEVEL 1B Test Cases - # ========================= - # one day - successfully downloaded 4 granules (all nighttime) - python scripts/modis-download.py 2018-10-01 --start-time 08:00:00 --end-time 8:10:00 --save-dir ./notebooks/modisdata/test_script/ - - # multiple days - finds 62 granules, stopped download for times sake but seemed to work - python scripts/modis-download.py 2018-10-01 --end-date 2018-10-9 --day-step 3 --start-time 08:00:00 --end-time 13:00:00 --save-dir ./notebooks/modisdata/test_script/ - - # test bounding box - successfully downloaded 4 files (all daytime) - python scripts/modis-download.py 2018-10-01 --start-time 08:00:00 --end-time 13:00:00 --save-dir ./notebooks/modisdata/test_script/ --bounding-box -10 -10 20 5 - - # test day/night flag - successfully downloaded 1 file (daytime only) - python scripts/modis-download.py 2018-10-15 --save-dir ./notebooks/modisdata/test_script/ --bounding-box -10 10 -5 15 --day-night-flag day - - # ========================= - # MODIS LEVEL 2 CLOUD MASK Test Cases - # ========================= - - # one day - successfully downloaded 4 granules (all nighttime) - python scripts/modis-download.py 2018-10-01 --start-time 08:00:00 --end-time 8:10:00 --save-dir ./notebooks/modisdata/ --processing-level L2 --identifier 35 - - # ==================== - # FAILURE TEST CASES - # ==================== - # bounding box input invalid - throws error as expected - python scripts/modis-download.py 2018-10-01 --bounding-box a b c d - - # end date before start date - throws error as expected - python scripts/modis-download.py 2018-10-01 --end-date 2018-09-01 - - # empty results - warns user as expected - python scripts/modis-download.py 2018-10-01 --start-time 07:00:00 --end-time 7:10:00 --save-dir ./notebooks/modisdata/test_script/ --bounding-box -10 -10 -5 -5 - - """ - # check if earthdata login is available - _check_earthdata_login(earthdata_username=earthdata_username, earthdata_password=earthdata_password) - - # check if netcdf4 backend is available - _check_netcdf4_backend() - - # run checks - # translate str inputs to modis specific names - _check_input_processing_level(processing_level=processing_level) - _check_identifier(identifier=identifier) - satellite_code = _check_satellite(satellite=satellite) - resolution_code = _check_resolution(resolution=resolution) - logger.info(f"Satellite: {satellite}") - # check data product - if processing_level == 'L1b': - data_product = f"{satellite_code}{identifier}{resolution_code}" - elif processing_level == 'L2': - # TODO: Implement other level-2 products or allow passing in data_product? - # NOTE: Resolution argument not needed for cloud mask download - data_product = f"{satellite_code}{identifier}_{processing_level}" - else: - raise ValueError("Incorrect processing level, downloader only implemented for 'L1b' and 'L2'") - - logger.info(f"Data Product: {data_product}") - _check_data_product_name(data_product=data_product) - - # check start/end dates/times - if end_date is None: - end_date = start_date - - # combine date and time information - start_datetime_str = start_date + ' ' + start_time - end_datetime_str = end_date + ' ' + end_time - _check_datetime_format(start_datetime_str=start_datetime_str, end_datetime_str=end_datetime_str) - # datetime conversion - start_datetime = datetime.strptime(start_datetime_str, "%Y-%m-%d %H:%M:%S") - end_datetime = datetime.strptime(end_datetime_str, "%Y-%m-%d %H:%M:%S") - _check_start_end_dates(start_datetime=start_datetime, end_datetime=end_datetime) - - # compile list of dates/times - day_delta = timedelta(days=day_step) - list_of_dates = np.arange(start_datetime, end_datetime, day_delta).astype(datetime) - - list_of_daily_windows = [get_daily_window(daily_start, end_time) for daily_start in list_of_dates] - - # check if save_dir is valid before attempting to download - _check_save_dir(save_dir=save_dir) - - # check that bounding box is valid - # TODO: Add option to add multiple location requests - # NOTE: earthaccess allows other ways to specify spatial extent, e.g. polygon, point - # NOTE: extend to allow these options - _check_bounding_box(bounding_box=bounding_box) - - # create dictionary of earthaccess search parameters - search_params = { - "short_name": data_product, - "bounding_box": bounding_box, - } - - # if day_night_flag was provided, check that day_night_flag is valid - if day_night_flag: - _check_day_night_flag(day_night_flag=day_night_flag) - # add day_night_flag to search parameters - search_params["day_night_flag"] = day_night_flag - - # TODO: remove - logging search_params for testing - logger.info(f"Search parameters: {search_params}") - - files = [] - - # create progress bar for dates - pbar_time = tqdm.tqdm(list_of_daily_windows) - - for itime in pbar_time: - pbar_time.set_description(f"Time - {itime[0]} to {itime[1]}") - success_flag = True - - # add daytime window to search parameters - search_params["temporal"] = itime - - # search for data - results_day = earthaccess.search_data(**search_params) - - # check if any results were returned - if not results_day: - # if not: log warning and continue to next date - success_flag = False - warn = f"No data found for {itime[0]} to {itime[1]} in the specified bounding box" - if day_night_flag: warn += f" for {day_night_flag}-time measurements only" - logger.warning(warn) - continue - - files_day = earthaccess.download(results_day, save_dir) - # TODO: can this fail? if yes, use try / except to prevent the programme from crashing - # TODO: check file sizes - if less than X MB (ca 70MB) the download failed - if success_flag: - files += files_day - - return files - -# start/end times are used as daily window -def get_daily_window(daily_start, end_time): - """computes tuple of start and end date/time for each day for earthaccess call""" - day = daily_start.strftime("%Y-%m-%d") - daily_end = day + ' ' + end_time - return (daily_start.strftime("%Y-%m-%d %H:%M:%S"), daily_end) - - -def _check_earthdata_login(earthdata_username: str, earthdata_password: str) -> bool: - """check if earthdata login is available in environment variables / as input arguments""" - if earthdata_username and earthdata_password: - os.environ["EARTHDATA_USERNAME"] = earthdata_username - os.environ["EARTHDATA_PASSWORD"] = earthdata_password - - if os.environ.get("EARTHDATA_USERNAME") is None or os.environ.get("EARTHDATA_PASSWORD") is None: - msg = "Please set your Earthdata credentials as environment variables using:" - msg += "\nexport EARTHDATA_USERNAME=" - msg += "\nexport EARTHDATA_PASSWORD=" - msg += "\nOr provide them as command line arguments using:" - msg += "\n--earthdata-username --earthdata-password " - raise ValueError(msg) - - # check if credentials are valid - auth_obj = earthaccess.login('environment') - - if auth_obj.authenticated: - return True - else: - msg = "Earthdata login failed." - msg += "\nPlease check your credentials and set them as environment variables using:" - msg += "\nexport EARTHDATA_USERNAME=" - msg += "\nexport EARTHDATA_PASSWORD=" - msg += "\nOr provide them as command line arguments using:" - msg += "\n--earthdata-username --earthdata-password " - raise ValueError(msg) - -def _check_netcdf4_backend() -> bool: - """check if xarray netcdf4 backend is available""" - if 'netcdf4' in xr.backends.list_engines().keys(): - return True - else: - msg = "Please install netcdf4 backend for xarray using one of the following commands:" - msg += "\npip install netCDF4" - msg += "\nconda install -c conda-forge netCDF4" - raise ValueError(msg) - -def _check_input_processing_level(processing_level: str) -> bool: - """checks processing level for MODIS data""" - if processing_level in ["L1b", "L2"]: - return True - else: - msg = "Unrecognized processing level" - msg += f"\nNeeds to be 'L1b' or 'L2'. Others are not yet implemented" - raise ValueError(msg) - -def _check_identifier(identifier: str) -> bool: - if identifier in ["02", "35"]: - return True - else: - msg = "Unrecognized data identifier" - msg += f"\nNeeds to be '02' or '35'. Others are not yet tested" - raise ValueError(msg) - -def _check_satellite(satellite: str) -> str: - if satellite == 'Aqua': - return 'MYD' - elif satellite == 'Terra': - return 'MOD' - else: - msg = "Unrecognized satellite" - msg += f"\nNeeds to be 'Aqua' or 'Terra'. Others are not yet implemented" - raise ValueError(msg) - -def _check_resolution(resolution: str) -> str: - if resolution in ["1KM", "1Km", "1km"]: - return "1KM" - elif resolution in ["500M", "500m"]: - return "HKM" - elif resolution in ["250M", "250m"]: - return "QKM" - else: - msg = "Unrecognized resolution" - msg += f"\nNeeds to be '1KM', '500M', '250M. Others are not available" - raise ValueError(msg) - -def _check_data_product_name(data_product: str) -> bool: - if data_product in ['MOD021KM', 'MOD02HKM', 'MOD02QKM', 'MYD021KM', 'MYD02HKM', 'MYD02QKM', - 'MOD35_L2', 'MYD35_L2']: - return True - else: - msg = "Unrecognized data product" - msg += f"\nOnly implemented for TERRA/AQUA MODIS and 1KM, 500M, 250M resolution (Level 1B), and cloud mask (Level 2)." - raise ValueError(msg) - -def _check_datetime_format(start_datetime_str: str, end_datetime_str: str) -> bool: - try: - datetime.strptime(start_datetime_str, "%Y-%m-%d %H:%M:%S") - datetime.strptime(end_datetime_str, "%Y-%m-%d %H:%M:%S") - return True - except Exception as e: - msg = "Please check date/time format" - msg += "\nExpected date format: %Y-%m-%d" - msg += "\nExpected time format: %H:%M:%S" - raise SyntaxError(msg) - -def _check_start_end_dates(start_datetime: datetime, end_datetime: datetime) -> bool: - """ check end_datetime is after start_datetime """ - if start_datetime < end_datetime: - return True - else: - msg = "Start datetime must be before end datetime\n" - msg += f"This does not hold for start = {str(start_datetime)} and end = {str(end_datetime)}" - raise ValueError(msg) - -def _check_bounding_box(bounding_box: List[float]) -> bool: - """ check if bounding box is valid """ - lower_left_lon, lower_left_lat , upper_right_lon, upper_right_lat = bounding_box - - # check that latitudes and longitudes are within valid range - if lower_left_lon < -180 or upper_right_lon > 180 or lower_left_lat < -90 or upper_right_lat > 90: - msg = "Bounding box must be between -180 and 180 for longitude and -90 and 90 for latitude" - msg += f"\nReceived: [lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat] = {bounding_box} " - raise ValueError(msg) - - # check that upper lat is above lower lat - if lower_left_lat > upper_right_lat: - msg = "The bounding box north value ({upper_right_lat}) must be greater than the south value ({lower_left_lat})" - msg = "Bounding box must be in the format [lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat]" - msg += f"\nReceived: {bounding_box}" - raise ValueError(msg) - - # warn if bounding box crosses the dateline - if lower_left_lon > upper_right_lon: - logger.warning(f"The bounding box crosses the dateline: it ranges from {lower_left_lon} to {upper_right_lon} degrees longitude") - - return True - - -def _check_save_dir(save_dir: str) -> bool: - """ check if save_dir exists """ - if os.path.isdir(save_dir): - return True - else: - try: - os.makedirs(save_dir) - return True - except: - msg = "Save directory does not exist" - msg += f"\nReceived: {save_dir}" - msg += "\nCould not create directory" - raise ValueError(msg) - -def _check_day_night_flag(day_night_flag: str) -> bool: - """ check if day_night_flag is valid """ - if day_night_flag in ["day", "night"]: - return True - else: - msg = "Unrecognized day/night flag" - msg += f"\nReceived: {day_night_flag}" - msg += f"\nIf provided, it needs to be 'day' or 'night'." - raise ValueError(msg) - - -if __name__ == '__main__': - typer.run(modis_download) - - """ - # one day - successfully downloaded 4 granules (all nighttime) - python scripts/modis-download.py 2018-10-01 --start-time 08:00:00 --end-time 8:10:00 --save-dir ./notebooks/modisdata/test_script/ - - # multiple days - finds 62 granules, stopped download for times sake but seemed to work - python scripts/modis-download.py 2018-10-01 --end-date 2018-10-9 --day-step 3 --start-time 08:00:00 --end-time 13:00:00 --save-dir ./notebooks/modisdata/test_script/ - - # test bounding box - successfully downloaded 4 files (all daytime) - python scripts/modis-download.py 2018-10-01 --start-time 08:00:00 --end-time 13:00:00 --save-dir ./notebooks/modisdata/test_script/ --bounding-box -10 -10 20 5 - - - # ==================== - # FAILURE TEST CASES - # ==================== - # bounding box input invalid - throws error as expected - python scripts/modis-download.py 2018-10-01 --bounding-box a b c d - - # end date before start date - throws error as expected - python scripts/modis-download.py 2018-10-01 --end-date 2018-09-01 - - # empty results - warns user as expected - python scripts/modis-download.py 2018-10-01 --start-time 07:00:00 --end-time 7:10:00 --save-dir ./notebooks/modisdata/test_script/ --bounding-box -10 -10 -5 -5 - - """