From 95ef8f32705d130397afa09e86397bb160422dc2 Mon Sep 17 00:00:00 2001 From: annajungbluth Date: Thu, 26 Sep 2024 11:12:43 +0000 Subject: [PATCH] added functionality to skip processed files --- .../geoprocessing/goes/geoprocessor_goes16.py | 13 ++++++----- .../geoprocessing/modis/geoprocessor_modis.py | 22 ++++++++++--------- .../geoprocessing/msg/geoprocessor_msg.py | 11 +++++----- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/rs_tools/_src/geoprocessing/goes/geoprocessor_goes16.py b/rs_tools/_src/geoprocessing/goes/geoprocessor_goes16.py index 38fd725..fde35d6 100644 --- a/rs_tools/_src/geoprocessing/goes/geoprocessor_goes16.py +++ b/rs_tools/_src/geoprocessing/goes/geoprocessor_goes16.py @@ -286,7 +286,7 @@ def preprocess_cloud_mask(self, files: List[str]) -> xr.Dataset: return ds - def preprocess_files(self): + def preprocess_files(self, skip_if_exists: bool = True): """ Preprocesses multiple files in read path and saves processed files to save path. """ @@ -297,11 +297,10 @@ def preprocess_files(self): for itime in pbar_time: - # TODO: Make it modular whether to overwrite or not - # skip if file already exists itime_name = format_goes_dates(itime) save_filename = Path(self.save_path).joinpath(f"{itime_name}_goes16.nc") - if os.path.exists(save_filename): + # skip if file already exists + if skip_if_exists and os.path.exists(save_filename): logger.info(f"File already exists. Skipping: {save_filename}") continue @@ -350,6 +349,7 @@ def geoprocess( save_path: str = "./", region: str = None, resample_method: str = "bilinear", + skip_if_exists: bool = True ): """ Geoprocesses GOES 16 files @@ -360,7 +360,8 @@ def geoprocess( save_path (str, optional): The path to save the geoprocessed files to. Defaults to "./". region (str, optional): The geographic region to extract ("lon_min, lat_min, lon_max, lat_max"). Defaults to None. resample_method (str, optional): The resampling method to use. Defaults to "bilinear". - + skip_if_exists (bool, optional): Whether to skip if the file already exists. Defaults to True. + Returns: None """ @@ -380,7 +381,7 @@ def geoprocess( resample_method=resample_method ) logger.info(f"GeoProcessing Files...") - goes16_geoprocessor.preprocess_files() + goes16_geoprocessor.preprocess_files(skip_if_exists=skip_if_exists) logger.info(f"Finished GOES 16 GeoProcessing Script...!") diff --git a/rs_tools/_src/geoprocessing/modis/geoprocessor_modis.py b/rs_tools/_src/geoprocessing/modis/geoprocessor_modis.py index 7f2d59e..07cf1bd 100644 --- a/rs_tools/_src/geoprocessing/modis/geoprocessor_modis.py +++ b/rs_tools/_src/geoprocessing/modis/geoprocessor_modis.py @@ -213,7 +213,7 @@ def preprocess_cloud_mask(self, files: List[str]) -> xr.Dataset: return ds - def preprocess_files(self): + def preprocess_files(self, skip_if_exists: bool = True): """ Preprocesses multiple files in read path and saves processed files to save path. """ @@ -224,6 +224,13 @@ def preprocess_files(self): for itime in pbar_time: + itime_name = format_modis_dates(itime) + save_filename = Path(self.save_path).joinpath(f"{itime_name}_{self.satellite}.nc") + # skip if file already exists + if skip_if_exists and os.path.exists(save_filename): + logger.info(f"File already exists. Skipping: {save_filename}") + continue + pbar_time.set_description(f"Processing: {itime}") # get files from unique times @@ -261,13 +268,6 @@ def preprocess_files(self): # check if save path exists, and create if not if not os.path.exists(self.save_path): os.makedirs(self.save_path) - - # remove file if it already exists - itime_name = format_modis_dates(itime) - save_filename = Path(self.save_path).joinpath(f"{itime_name}_{self.satellite}.nc") - if os.path.exists(save_filename): - logger.info(f"File already exists. Overwriting file: {save_filename}") - os.remove(save_filename) # save to netcdf ds.to_netcdf(save_filename, engine="netcdf4") @@ -275,7 +275,8 @@ def preprocess_files(self): def geoprocess( satellite: str, read_path: str = "./", - save_path: str = "./" + save_path: str = "./", + skip_if_exists: bool = True ): """ Geoprocesses MODIS files @@ -284,6 +285,7 @@ def geoprocess( satellite (str, optional): The satellite of the data to geoprocess. read_path (str, optional): The path to read the files from. Defaults to "./". save_path (str, optional): The path to save the geoprocessed files to. Defaults to "./". + skip_if_exists (bool, optional): Whether to skip if the file already exists. Defaults to True. Returns: None @@ -297,7 +299,7 @@ def geoprocess( save_path=save_path ) logger.info(f"GeoProcessing Files...") - modis_geoprocessor.preprocess_files() + modis_geoprocessor.preprocess_files(skip_if_exists=skip_if_exists) logger.info(f"Finished {satellite.upper()} GeoProcessing Script...!") diff --git a/rs_tools/_src/geoprocessing/msg/geoprocessor_msg.py b/rs_tools/_src/geoprocessing/msg/geoprocessor_msg.py index d971f77..0c737e1 100644 --- a/rs_tools/_src/geoprocessing/msg/geoprocessor_msg.py +++ b/rs_tools/_src/geoprocessing/msg/geoprocessor_msg.py @@ -275,7 +275,7 @@ def preprocess_cloud_mask(self, files: List[str]) -> xr.Dataset: return ds - def preprocess_files(self): + def preprocess_files(self, skip_if_exists: bool = True): """ Preprocesses multiple files in read path and saves processed files to save path. """ @@ -290,10 +290,9 @@ def preprocess_files(self): for itime in pbar_time: - # TODO: Make it modular whether to overwrite or not - # skip if file already exists save_filename = Path(self.save_path).joinpath(f"{itime}_msg.nc") - if os.path.exists(save_filename): + if skip_if_exists and os.path.exists(save_filename): + # skip if file already exists logger.info(f"File already exists. Skipping: {save_filename}") continue @@ -340,6 +339,7 @@ def geoprocess( save_path: str = "./", region: str = None, resample_method: str = "bilinear", + skip_existing: bool = True ): """ Geoprocesses MSG files @@ -350,6 +350,7 @@ def geoprocess( save_path (str, optional): The path to save the geoprocessed files to. Defaults to "./". region (str, optional): The geographic region to extract ("lon_min, lat_min, lon_max, lat_max"). Defaults to None. resample_method (str, optional): The resampling method to use. Defaults to "bilinear". + skip_existing (bool, optional): Whether to skip existing files. Defaults to True. Returns: None @@ -368,7 +369,7 @@ def geoprocess( resample_method=resample_method ) logger.info(f"GeoProcessing Files...") - msg_geoprocessor.preprocess_files() + msg_geoprocessor.preprocess_files(skip_if_exists=skip_if_exists) logger.info(f"Finished MSG GeoProcessing Script...!")