From 86bb37423855ea4d7ed75a482b5b15bec413b20b Mon Sep 17 00:00:00 2001 From: dcodrut Date: Sun, 19 Jan 2025 23:45:38 +0100 Subject: [PATCH] doc update --- torchgeo/datasets/dl4gam_alps.py | 80 +++++++++++++++++++------------- 1 file changed, 49 insertions(+), 31 deletions(-) diff --git a/torchgeo/datasets/dl4gam_alps.py b/torchgeo/datasets/dl4gam_alps.py index e2b2f912fd0..a476ef508f8 100644 --- a/torchgeo/datasets/dl4gam_alps.py +++ b/torchgeo/datasets/dl4gam_alps.py @@ -28,40 +28,50 @@ class DL4GAMAlps(NonGeoDataset): r"""A Multi-modal Dataset for Glacier Mapping (Segmentation) in the European Alps. - The dataset consists of Sentinel-2 images from 2015 (mainly), 2016 and 2017, and binary segmentation masks for - glaciers, based on an inventory built by glaciology experts - (`Paul et al. 2020 `_). + The dataset consists of Sentinel-2 images from 2015 (mainly), 2016 and 2017, and + binary segmentation masks for glaciers, based on an inventory built by glaciology + experts (`Paul et al. 2020 `_). - Given that glacier ice is not always visible in the images, due to seasonal snow, shadow/cloud cover and, most - importantly, debris cover, the dataset also includes additional features that can help in the segmentation task. + Given that glacier ice is not always visible in the images, due to seasonal snow, + shadow/cloud cover and, most importantly, debris cover, the dataset also includes + additional features that can help in the segmentation task. Dataset features: - * Sentinel-2 images (all bands, including cloud and shadow masks which can be used for loss masking) + * Sentinel-2 images (all bands, including cloud and shadow masks which can be used + for loss masking) * glacier mask (0: no glacier, 1: glacier) * debris mask (0: no debris, 1: debris) based on a mix of three publications (`Scherler et al. 2018 `_, `Herreid & Pellicciotti 2020 `_, - `Linsbauer et al. 2021 `_) - * DEM (Copernicus GLO-30) + five derived features (using `xDEM `_): slope, + `Linsbauer et al. 2021 + `_) + * DEM (Copernicus GLO-30) + five derived features + (using `xDEM `_): slope, aspect, terrain ruggedness index, planform and profile curvatures - * dh/dt (surface elevation change) map over 2010-2015 (`Hugonnet et al. 2021 `_) + * dh/dt (surface elevation change) map over 2010-2015 + (`Hugonnet et al. 2021 `_) * v (surface velocity) map over 2015 (`ITS_LIVE `_) Other specifications: - * temporal coverage: one acquisition per glacier, from either 2015 (mainly), 2016, or 2017 - * spatial coverage: only glaciers larger than 0.1 km\ :sup:`2`\ are considered (n=1593, after manual QC), totalling - ~1685 km\ :sup:`2`\ which represents ~93% of the total inventory area for this region + * temporal coverage: one acquisition per glacier, from either 2015 (mainly), 2016, + or 2017 + * spatial coverage: only glaciers larger than 0.1 km\ :sup:`2`\ are considered + (n=1593, after manual QC), totalling ~1685 km\ :sup:`2`\ which represents ~93% of + the total inventory area for this region * 2251 patches sampled with overlap from the 1593 glaciers; or 11440 for the `large` version, obtained with an increased sampling overlap * the dataset download size is 5.8 GB (11 GB when unarchived); or 29.5 GB (52 GB when unarchived) for the `large` version - * the dataset is provided at 10m GSD (after bilinearly resampling some of the Sentinel-2 bands and the additional - features which come at a lower resolution) - * the dataset provides fixed training, validation, and test geographical splits (70-10-20, by glacier area) - * five different splits are provided, according to a five-fold cross-validation scheme - * all the features/masks are stacked and provided as NetCDF files (one or more per glacier), structured as + * the dataset is provided at 10m GSD (after bilinearly resampling some of the + Sentinel-2 bands and the additional features which come at a lower resolution) + * the dataset provides fixed training, validation, and test geographical splits + (70-10-20, by glacier area) + * five different splits are provided, according to a five-fold cross-validation + scheme + * all the features/masks are stacked and provided as NetCDF files (one or more per + glacier), structured as `data/{glacier_id}/{glacier_id}_{patch_number}_{center_x}_{center_y}.nc` * data is projected and geocoded in local UTM zones @@ -75,13 +85,16 @@ class DL4GAMAlps(NonGeoDataset): This dataset requires the following additional libraries to be installed: - * `xarray `_ - * `netcdf4 `_ + * `xarray `_ + * `netcdf4 `_ .. versionadded:: 0.7 """ - r_url = 'https://huggingface.co/datasets/dcodrut/dl4gam_alps/resolve/7d20ca8a2b30c5518e086ffaa5ce37e6a66c42c1/data' + r_url = ( + 'https://huggingface.co/datasets/dcodrut/dl4gam_alps/resolve/' + '7d20ca8a2b30c5518e086ffaa5ce37e6a66c42c1/data' + ) download_metadata: ClassVar[dict[str, dict[str, str]]] = { 'dataset_small': { 'url': f'{r_url}/patches/inv_r_128_s_128.tar.gz', @@ -146,19 +159,21 @@ def __init__( Args: root: root directory where dataset can be found split: one of "train", "val", or "test" - cv_iter: one of 1, 2, 3, 4, 5 (for the five-fold geographical cross-validation scheme) + cv_iter: one of 1, 2, 3, 4, 5 (for the five-fold geographical + cross-validation scheme) version: one of "small" or "large" (controls the sampling overlap) bands: the Sentinel-2 bands to use as input (default: RGB + NIR + SWIR) - extra_features: additional features to include (default: None; see the class attribute for the available) - transforms: a function/transform that takes input sample and its target as entry and returns a transformed - version + extra_features: additional features to include (default: None; see the class + attribute for the available) + transforms: a function/transform that takes input sample and its target as + entry and returns a transformed version download: if True, download dataset and store it in the root directory checksum: if True, check the MD5 of the downloaded files (may be slow) Raises: - AssertionError: if the ``split``, ``cv_iter``, ``version``, ``bands`` or ``extra_features`` are invalid - DatasetNotFoundError: If dataset is not found and *download* is False. - DependencyNotFoundError: If xarray is not installed. + AssertionError: if any parameters are invalid. + DatasetNotFoundError: if dataset is not found and *download* is False. + DependencyNotFoundError: if xarray is not installed. """ lazy_import('xarray') @@ -212,7 +227,7 @@ def __len__(self) -> int: return len(self.fp_patches) def __getitem__(self, index: int) -> dict[str, Tensor]: - """It loads the netcdf file for the given index and returns the sample as a dict. + """Load the NetCDF file for the given index and return the sample as a dict. Args: index: index of the sample to return @@ -221,7 +236,8 @@ def __getitem__(self, index: int) -> dict[str, Tensor]: dict: a dictionary containing the sample with the following: * the Sentinel-2 image (selected bands) - * the glacier mask (binary mask with all the glaciers in the current patch) + * the glacier mask (binary mask with all the glaciers in the current + patch) * the debris mask * the cloud and shadow mask * the additional features (DEM, derived features, etc.) if required @@ -250,7 +266,8 @@ def __getitem__(self, index: int) -> dict[str, Tensor]: assert feature in nc, f'Feature {feature} not found in the netcdf file' vals = nc[feature].values.astype(np.float32) - # impute the missing values with the mean or zero (for dh/dt and surface velocity) + # impute the missing values with the mean + # or zero (for dh/dt and surface velocity) v_fill = 0.0 if feature in ('dhdt', 'v') else np.nanmean(vals) vals[np.isnan(vals)] = v_fill @@ -328,7 +345,8 @@ def plot( sample: a sample returned by :meth:`DL4GAMAlps.__getitem__` show_titles: flag indicating whether to show titles above each panel suptitle: optional string to use as a suptitle - clip_extrema: flag indicating whether to clip the lowest/highest 2.5% of the values for contrast enhancement + clip_extrema: flag indicating whether to clip the lowest/highest 2.5% of the + values for contrast enhancement Returns: a matplotlib Figure with the rendered sample