Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hydra integration #39

Merged
merged 38 commits into from
Apr 9, 2024
Merged
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
11ca362
added function to convert units
annajungbluth Mar 19, 2024
d762b2c
added function to convert units
annajungbluth Mar 19, 2024
e5ddbac
added goes16 downloader for pipeline
annajungbluth Mar 19, 2024
765517e
added downloader for terra
annajungbluth Mar 19, 2024
d7a3323
tested terra aqua downloader
annajungbluth Mar 19, 2024
637d3f8
tested goes downloader
annajungbluth Mar 19, 2024
23fcfb0
added and tested msg downloader
annajungbluth Mar 19, 2024
49a3900
First Merge to Hydra-Integration (#35)
jejjohnson Mar 20, 2024
e4925c7
added GOES filename parser
annajungbluth Mar 21, 2024
3849a45
added msg filename parser
annajungbluth Mar 21, 2024
968e9ef
added filename parser for msg
annajungbluth Mar 21, 2024
4402f04
started developing msg pipeline
annajungbluth Mar 21, 2024
0a72a6d
continued developing msg pipeline
annajungbluth Mar 21, 2024
cee8a13
continued working on msg pipeline
annajungbluth Mar 22, 2024
7fa2144
continued hydra integration and finished download components
annajungbluth Mar 22, 2024
6ecc687
added examples to main.py
annajungbluth Mar 22, 2024
75d0db6
added goes geoprocessor to repo, added docstrings
annajungbluth Mar 27, 2024
2828f05
deleted obsolete scripts
annajungbluth Mar 27, 2024
3089ef0
tested goes geoprocessor
annajungbluth Mar 27, 2024
0618df4
updated default args
annajungbluth Mar 27, 2024
a5c3928
notebooks/dev/goes/1.4-GOES-geoprocess-val.ipynb
annajungbluth Mar 27, 2024
cad391b
tested goes geoprocessor
annajungbluth Mar 27, 2024
ef9fa9a
renamed script for consistency
annajungbluth Mar 28, 2024
4ffdf1f
work in progress modis geoprocessing
annajungbluth Mar 28, 2024
2b33e92
merged with other branch
annajungbluth Mar 28, 2024
951d280
continued developing modis geoprocessor
annajungbluth Mar 28, 2024
ba94f25
tested modis geoprocessor
annajungbluth Mar 28, 2024
a04d71e
fixed errors, standardized code, and started with msg geoprocessor
annajungbluth Apr 1, 2024
def165a
fixed modis problem
annajungbluth Apr 1, 2024
075758d
fixed modis naming issue
annajungbluth Apr 2, 2024
34f04d9
fixed small errors and tested MSG geoprocessor
annajungbluth Apr 2, 2024
0729507
fixed default arguments
annajungbluth Apr 2, 2024
10a0314
finished hydra integration of geoprocessing
annajungbluth Apr 4, 2024
7d814d2
started developing prepatcher
annajungbluth Apr 4, 2024
5eb9175
work in progress patcher
annajungbluth Apr 4, 2024
7953222
fixed coordinate, dimension problem
annajungbluth Apr 5, 2024
31b0366
tested patcher and fixed small errors
annajungbluth Apr 5, 2024
4b95a40
integrated patcher into hydra pipeline
annajungbluth Apr 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
started developing prepatcher
annajungbluth committed Apr 4, 2024
commit 7d814d2daeaac74ad3946c25f2568b96c593dbb6
22 changes: 17 additions & 5 deletions notebooks/1.1-pipeline-goes16.ipynb
Original file line number Diff line number Diff line change
@@ -18,10 +18,22 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"id": "5d7511e4-80b3-4053-8c5c-bb430d8a0c14",
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'autoroot'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mautoroot\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mrs_tools\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_src\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_list_filenames\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mrioxarray\u001b[39;00m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'autoroot'"
]
}
],
"source": [
"import autoroot\n",
"from rs_tools._src.utils.io import get_list_filenames\n",
@@ -5913,9 +5925,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:rs_tools]",
"display_name": "Python 3",
"language": "python",
"name": "conda-env-rs_tools-py"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -5927,7 +5939,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.9.6"
}
},
"nbformat": 4,
700 changes: 700 additions & 0 deletions notebooks/dev/modis/1.2-MODIS-patch-val.ipynb

Large diffs are not rendered by default.

137 changes: 137 additions & 0 deletions rs_tools/_src/preprocessing/prepatcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import autoroot
import numpy as np
from xrpatcher._src.base import XRDAPatcher
import rioxarray
import os
from pathlib import Path
from dataclasses import dataclass
from typing import Optional, List, Union, Tuple
from tqdm import tqdm
from rs_tools import goes_download, modis_download, MODIS_VARIABLES, get_modis_channel_numbers
from rs_tools._src.utils.io import get_list_filenames
from rs_tools._src.geoprocessing.grid import create_latlon_grid
import typer
from loguru import logger
import xarray as xr
from satpy import Scene
import datetime
from rs_tools._src.data.modis import MODISFileName, MODIS_ID_TO_NAME, MODIS_NAME_TO_ID, get_modis_paired_files
import pandas as pd
from datetime import datetime

def _check_filetype(file_type: str) -> bool:
"""checks instrument for GOES data."""
if file_type in ["nc", "np"]:
return True
else:
msg = "Unrecognized file type"
msg += f"\nNeeds to be 'nc' or 'np'. Others are not yet tested"
raise ValueError(msg)

@dataclass(frozen=True)
class PrePatcher:
read_path: str
save_path: str
patch_size: int
stride_size: int
save_filetype: str
"""
A class for preprocessing and saving patches from NetCDF files.
Attributes:
read_path (str): The path to the directory containing the NetCDF files.
save_path (str): The path to save the patches.
patch_size (int): The size of each patch.
stride_size (int): The stride size for generating patches.
save_filetype (str, optional): The file type to save patches as. Options are [nc, np]
Methods:
nc_files(self) -> List[str]: Returns a list of all NetCDF filenames in the read_path directory.
save_patches(self): Preprocesses and saves patches from the NetCDF files.
"""
@property
def nc_files(self) -> List[str]:
"""
Returns a list of all NetCDF filenames in the read_path directory.
Returns:
List[str]: A list of NetCDF filenames.
"""
# get list of all filenames within the path
files = get_list_filenames(self.read_path, ".nc")
return files

def save_patches(self):
"""
Preprocesses and saves patches from the NetCDF files.
"""
pbar = tqdm(self.nc_files)

for ifile in pbar:
# extract & log timestamp
itime = str(Path(ifile).name).split("_")[0]
pbar.set_description(f"Processing: {itime}")
# open dataset
ds = xr.open_dataarray(ifile, engine="netcdf4")
# define patch parameters
patches = dict(x=self.patch_size, y=self.patch_size)
strides = dict(x=self.stride_size, y=self.stride_size)
# start patching
patcher = XRDAPatcher(da=ds, patches=patches, strides=strides)

# check if save path exists, and create if not
if not os.path.exists(self.save_path):
os.makedirs(self.save_path)

for i, ipatch in tqdm(enumerate(patcher), total=len(patcher)):
if self.save_filetype == "nc":
ipatch.to_netcdf(Path(self.save_path).joinpath(f"{itime}_patch_{i}.nc"), engine="netcdf4")
elif self.save_filetype == "np":
# save as numpy files
# NOTE: This saves all variables, including time and band_wavelength
# TODO: Simplify saving?
np.save(Path(self.save_path).joinpath(f"{itime}_radiance_patch_{i}"), ipatch.values)
np.save(Path(self.save_path).joinpath(f"{itime}_latitude_patch_{i}"), ipatch.latitude.values)
np.save(Path(self.save_path).joinpath(f"{itime}_longitude_patch_{i}"), ipatch.longitude.values)
np.save(Path(self.save_path).joinpath(f"{itime}_cloudmask_patch_{i}"), ipatch.cloud_mask.values)

def prepatch(
read_path: str = "/Users/anna.jungbluth/Desktop/git/rs_tools/data/goes16/geoprocessed",
save_path: str = "/Users/anna.jungbluth/Desktop/git/rs_tools/data/goes16/analysis",
patch_size: int = 256,
stride_size: int = 256,
save_filetype: str = 'nc'
):
"""
Patches satellite data into smaller patches for training.
Args:
read_path (str, optional): The path to read the input files from. Defaults to "./".
save_path (str, optional): The path to save the extracted patches. Defaults to "./".
patch_size (int, optional): The size of each patch. Defaults to 256.
stride_size (int, optional): The stride size for patch extraction. Defaults to 256.
save_filetype (str, optional): The file type to save patches as. Options are [nc, np]
Returns:
None
"""
_check_filetype(file_type=save_filetype)

# Initialize Prepatcher
logger.info(f"Initializing Prepatcher...")
prepatcher = PrePatcher(
read_path=read_path,
save_path=save_path,
patch_size=patch_size,
stride_size=stride_size,
save_filetype=save_filetype
)
logger.info(f"Patching Files...: {save_path}")
prepatcher.save_patches()

logger.info(f"Finished Prepatching Script...!")

if __name__ == '__main__':
"""
python scripts/pipeline/prepatch.py --read-path "/path/to/netcdf/file" --save-path /path/to/save/patches
"""
typer.run(prepatch)