Skip to content

Commit

Permalink
Merge pull request #49 from spaceml-org/5-overpass-download
Browse files Browse the repository at this point in the history
5 overpass download
  • Loading branch information
annajungbluth authored Jul 3, 2024
2 parents 579af2c + d4400df commit 17bffb3
Show file tree
Hide file tree
Showing 21 changed files with 426 additions and 189 deletions.
4 changes: 2 additions & 2 deletions config/esl-3dclouds/geoprocess.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# PATH WITH RAW DATA
read_path: /mnt/disks/data/miniset/
read_path: /home/anna.jungbluth/data/

# PATH FOR SAVING GEOPROCESSED DATA
save_path: /mnt/disks/data/miniset-full-disk/
save_path: /home/anna.jungbluth/data/geoprocessing/

defaults:
- _self_
6 changes: 3 additions & 3 deletions config/esl-3dclouds/main.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defaults:
- download
- geoprocess
- patch
- satellite: aqua
# - patch
- satellite: msg

stage: download
stage: geoprocess
8 changes: 4 additions & 4 deletions config/esl-3dclouds/satellite/msg.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
download:
_target_: rs_tools._src.data.msg.downloader_msg_(modis_overpass).download
_target_: rs_tools._src.data.msg.downloader_msg_modis_overpass.download
save_dir: ${save_dir}
start_date: ${period.start_date}
start_time: ${period.start_time}
Expand All @@ -8,10 +8,10 @@ download:

geoprocess:
_target_: rs_tools._src.geoprocessing.msg.geoprocessor_msg.geoprocess
read_path: ${read_path}/msg/raw
save_path: ${save_path}/msg/geoprocessed
read_path: ${read_path}
save_path: ${save_path}
resolution: null
region: "-70 -15 20 5"
region: null
resample_method: bilinear

# preprocess:
Expand Down
154 changes: 104 additions & 50 deletions rs_tools/_src/data/goes/download.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Optional, List, Union
import os
import warnings
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
Expand All @@ -22,15 +23,20 @@
'M': 1
}

# Ignore FutureWarning
warnings.simplefilter(action='ignore', category=FutureWarning)

# Your code here

def goes_download(
start_date: str,
start_date: Optional[str]='2018-10-01', # TODO: Wrap date/time parameters in dict to reduce arguments?
end_date: Optional[str]=None,
start_time: Optional[str]='00:00:00',
end_time: Optional[str]='23:59:00',
daily_window_t0: Optional[str]='00:00:00',
daily_window_t1: Optional[str]='23:59:00',
time_step: Optional[str]=None,
predefined_timestamps: Optional[List[str]]=None,
satellite_number: int=16,
save_dir: Optional[str] = ".",
instrument: str = "ABI",
Expand All @@ -44,13 +50,14 @@ def goes_download(
Downloads GOES satellite data for a specified time period and set of bands.
Args:
start_date (str): The start date of the data download in the format 'YYYY-MM-DD'.
start_date (str, optional): The start date of the data download in the format 'YYYY-MM-DD'.
end_date (str, optional): The end date of the data download in the format 'YYYY-MM-DD'. If not provided, the end date will be the same as the start date.
start_time (str, optional): The start time of the data download in the format 'HH:MM:SS'. Default is '00:00:00'.
end_time (str, optional): The end time of the data download in the format 'HH:MM:SS'. Default is '23:59:00'.
daily_window_t0 (str, optional): The start time of the daily window in the format 'HH:MM:SS'. Default is '00:00:00'. Used if e.g., only day/night measurements are required.
daily_window_t1 (str, optional): The end time of the daily window in the format 'HH:MM:SS'. Default is '23:59:00'. Used if e.g., only day/night measurements are required.
time_step (str, optional): The time step between each data download in the format 'HH:MM:SS'. If not provided, the default is 1 hour.
predefined_timestamps (list, optional): A list of timestamps to download. Expected format is datetime or str following 'YYYY-MM-DD HH:MM:SS'. If provided, start/end dates/times will be ignored.
satellite_number (int, optional): The satellite number. Default is 16.
save_dir (str, optional): The directory where the downloaded files will be saved. Default is the current directory.
instrument (str, optional): The instrument name. Default is 'ABI'.
Expand Down Expand Up @@ -106,54 +113,20 @@ def goes_download(
logger.info(f"Data Product: {data_product}")
_check_data_product_name(data_product=data_product)

# check start/end dates/times
if end_date is None:
end_date = start_date

# combine date and time information
start_datetime_str = start_date + ' ' + start_time
end_datetime_str = end_date + ' ' + end_time
_check_datetime_format(start_datetime_str, end_datetime_str)
# datetime conversion
start_datetime = datetime.strptime(start_datetime_str, "%Y-%m-%d %H:%M:%S")
end_datetime = datetime.strptime(end_datetime_str, "%Y-%m-%d %H:%M:%S")
_check_start_end_times(start_datetime=start_datetime, end_datetime=end_datetime)

# define time step for data query
if time_step is None:
time_step = '1:00:00'
logger.info("No timedelta specified. Default is 1 hour.")
_check_timedelta_format(time_delta=time_step)

# convert str to datetime object
hours, minutes, seconds = convert_str2time(time=time_step)
time_delta = timedelta(hours=hours, minutes=minutes, seconds=seconds)

_check_timedelta(time_delta=time_delta, domain=domain)

# Compile list of dates/times
list_of_dates = np.arange(start_datetime, end_datetime + time_delta, time_delta).astype(datetime)
print('Times to check: ',list_of_dates[0], list_of_dates[-1])

window_date = '1991-10-19' # Add arbitrary date to convert into proper datetime object
start_datetime_window_str = window_date + ' ' + str(daily_window_t0)
end_datetime_window_str = window_date + ' ' + str(daily_window_t1)
_check_start_end_times(start_datetime=start_datetime, end_datetime=end_datetime)
# datetime conversion
daily_window_t0_datetime = datetime.strptime(start_datetime_window_str, "%Y-%m-%d %H:%M:%S")
daily_window_t1_datetime = datetime.strptime(end_datetime_window_str, "%Y-%m-%d %H:%M:%S")
_check_start_end_times(start_datetime=daily_window_t0_datetime, end_datetime=daily_window_t1_datetime)

# filter function - check that query times fall within desired time window
def is_in_between(date):
return daily_window_t0_datetime.time() <= date.time() <= daily_window_t1_datetime.time()

# compile new list of dates within desired time window
list_of_dates = list(filter(is_in_between, list_of_dates))
if list_of_dates == []:
msg = f"No times within the specified time window {daily_window_t0_datetime.time()} - {daily_window_t1_datetime.time()} for time step {time_step}"
msg += f"\n adjust daily window times or time step"
raise ValueError(msg)
# TODO: Allow passing as argument?
timestamp_dict = {
'start_date': start_date,
'end_date': end_date,
'start_time': start_time,
'end_time': end_time,
'daily_window_t0': daily_window_t0,
'daily_window_t1': daily_window_t1,
'time_step': time_step,
'domain': domain,
}

# compile list of dates
list_of_dates = _compile_list_of_dates(timestamp_dict=timestamp_dict, predefined_timestamps=predefined_timestamps)

# check if save_dir is valid before attempting to download
_check_save_dir(save_dir=save_dir)
Expand Down Expand Up @@ -256,6 +229,87 @@ def _goes_level1_download(time,

return sub_files_list

def _compile_list_of_dates(timestamp_dict: dict, predefined_timestamps: List[str]) -> List[datetime]:
if predefined_timestamps is not None:
_check_predefined_timestamps(predefined_timestamps=predefined_timestamps)
if type(predefined_timestamps[0]) is datetime:
list_of_dates = predefined_timestamps
elif type(predefined_timestamps[0]) is str:
list_of_dates = [datetime.strptime(x, "%Y-%m-%d %H:%M:%S") for x in predefined_timestamps]
logger.info(f"Using predefined timestamps.")
elif timestamp_dict['start_date'] is not None:
# check start/end dates/times
if timestamp_dict['end_date'] is None:
end_date = timestamp_dict['start_date']
else:
end_date = timestamp_dict['end_date']
# combine date and time information
start_datetime_str = timestamp_dict['start_date'] + ' ' + timestamp_dict['start_time']
end_datetime_str = end_date + ' ' + timestamp_dict['end_time']
_check_datetime_format(start_datetime_str, end_datetime_str)
# datetime conversion
start_datetime = datetime.strptime(start_datetime_str, "%Y-%m-%d %H:%M:%S")
end_datetime = datetime.strptime(end_datetime_str, "%Y-%m-%d %H:%M:%S")
_check_start_end_times(start_datetime=start_datetime, end_datetime=end_datetime)

# define time step for data query
if timestamp_dict['time_step'] is None:
time_step = '1:00:00'
logger.info("No timedelta specified. Default is 1 hour.")
_check_timedelta_format(time_delta=time_step)

# convert str to datetime object
hours, minutes, seconds = convert_str2time(time=time_step)
time_delta = timedelta(hours=hours, minutes=minutes, seconds=seconds)

_check_timedelta(time_delta=time_delta, domain=timestamp_dict['domain'])

# Compile list of dates/times
list_of_dates = np.arange(start_datetime, end_datetime, time_delta).astype(datetime)
print('Times to check: ',list_of_dates[0], list_of_dates[-1])

window_date = '1991-10-19' # Add arbitrary date to convert into proper datetime object
start_datetime_window_str = window_date + ' ' + timestamp_dict['daily_window_t0']
end_datetime_window_str = window_date + ' ' + timestamp_dict['daily_window_t1']
_check_start_end_times(start_datetime=start_datetime, end_datetime=end_datetime)
# datetime conversion
daily_window_t0_datetime = datetime.strptime(start_datetime_window_str, "%Y-%m-%d %H:%M:%S")
daily_window_t1_datetime = datetime.strptime(end_datetime_window_str, "%Y-%m-%d %H:%M:%S")
_check_start_end_times(start_datetime=daily_window_t0_datetime, end_datetime=daily_window_t1_datetime)

# filter function - check that query times fall within desired time window
def is_in_between(date):
return daily_window_t0_datetime.time() <= date.time() <= daily_window_t1_datetime.time()

# compile new list of dates within desired time window
list_of_dates = list(filter(is_in_between, list_of_dates))
logger.info("Compiling timestamps from specific parameters.")

else:
msg = "Please provide either predefined timestamps or start date"
raise ValueError(msg)
return list_of_dates

def _check_predefined_timestamps(predefined_timestamps: List[str]) -> bool:
if type(predefined_timestamps) is not list:
msg = "Please provide predefined timestamps as a list"
raise ValueError(msg)
if type(predefined_timestamps[0]) is str: # Check type of first element
try:
for x in predefined_timestamps:
datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
return True
except Exception as e:
msg = "Please check predefined timestamps"
msg += "\nExpected date format: %Y-%m-%d"
msg += "\nExpected time format: %H:%M:%S"
raise SyntaxError(msg)
elif type(predefined_timestamps[0]) is datetime: # Check type of first element
return True
else:
msg = "Please check predefined timestamps"
msg += "\nExpected either datetime objects or strings in the format %Y-%m-%d %H:%M:%S"
raise SyntaxError(msg)

def _check_datetime_format(start_datetime_str: str, end_datetime_str: str) -> bool:
try:
Expand Down
1 change: 0 additions & 1 deletion rs_tools/_src/data/msg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def from_filename(cls, file_name: str):
"""

file_name = Path(file_name)
print(file_name)
components = file_name.name.split('-')
save_path = str(file_name.parents[0])

Expand Down
Loading

0 comments on commit 17bffb3

Please sign in to comment.