Skip to content

Commit

Permalink
Implement export data via cli (#627)
Browse files Browse the repository at this point in the history
  • Loading branch information
savente93 authored Nov 1, 2023
1 parent f111fdf commit 5447e4e
Show file tree
Hide file tree
Showing 10 changed files with 241 additions and 35 deletions.
2 changes: 1 addition & 1 deletion docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Added
-----
- Support for exporting data catalogs to STAC catalog formats. (#617)
- Support for reading data catalogs from STAC catalog formats. (#625)

- Support exporting data from catalogs from the CLI (#627)


Changed
Expand Down
4 changes: 2 additions & 2 deletions hydromt/cli/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import inspect
import logging
import typing
from typing import Dict, List, Union
from typing import Dict, List, Optional, Union

from hydromt.gis_utils import utm_crs

Expand Down Expand Up @@ -127,7 +127,7 @@ def get_predifined_catalogs() -> Dict:

def get_region(
region: dict,
data_libs: Union[List, str] = None,
data_libs: Optional[Union[List, str]] = None,
hydrography_fn: str = "merit_hydro",
basin_index_fn: str = "merit_hydro_index",
) -> str:
Expand Down
16 changes: 14 additions & 2 deletions hydromt/cli/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
from ast import literal_eval
from os.path import isfile
from pathlib import Path
from typing import Any, Dict, Union
from typing import Any, Dict, Optional, Union
from warnings import warn

import click
import yaml

from .. import config
from ..error import DeprecatedError
Expand Down Expand Up @@ -86,7 +87,9 @@ def parse_json(ctx, param, value: str) -> Dict[str, Any]:
### general parsing methods ##


def parse_config(path: Union[Path, str] = None, opt_cli: Dict = None) -> Dict:
def parse_config(
path: Optional[Union[Path, str]] = None, opt_cli: Optional[Dict] = None
) -> Dict:
"""Parse config from ini `path` and combine with command line options `opt_cli`."""
opt = {}
if path is not None and isfile(path):
Expand Down Expand Up @@ -114,3 +117,12 @@ def parse_config(path: Union[Path, str] = None, opt_cli: Dict = None) -> Dict:
for option, value in opt_cli[section].items():
opt[section].update({option: value})
return opt


def parse_export_config_yaml(ctx, param, value) -> Dict:
if value:
with open(value, "r") as stream:
yml = yaml.load(stream, Loader=yaml.FullLoader)
return yml
else:
return {}
114 changes: 112 additions & 2 deletions hydromt/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
"""command line interface for hydromt models."""

import logging
from json import loads as json_decode
from os.path import join
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

import click
import numpy as np

from hydromt.data_catalog import DataCatalog
from hydromt.typing import ExportConfigDict

from .. import __version__, log
from ..models import MODELS
from . import cli_utils
Expand Down Expand Up @@ -49,6 +55,10 @@ def print_models(ctx, param, value):
type=click.Path(resolve_path=True),
help="Path to hydroMT configuration file, for the model specific implementation.",
)
export_dest_path = click.argument(
"export_dest_path",
type=click.Path(resolve_path=True, dir_okay=True, file_okay=False),
)
arg_root = click.argument(
"MODEL_ROOT",
type=click.Path(resolve_path=True, dir_okay=True, file_okay=False),
Expand Down Expand Up @@ -110,6 +120,14 @@ def print_models(ctx, param, value):
help="Flag: If provided cache tiled rasterdatasets",
)

export_config_opt = click.option(
"-f",
"--export-config",
callback=cli_utils.parse_export_config_yaml,
help="read options from a config file for exporting. options from CLI will "
"override these options",
)

## MAIN


Expand All @@ -131,8 +149,6 @@ def main(ctx, models): # , quiet, verbose):


## BUILD


@main.command(short_help="Build models")
@click.argument(
"MODEL",
Expand Down Expand Up @@ -318,6 +334,100 @@ def update(
logger.removeHandler(handler)


## Export
@main.command(
short_help="Export data",
)
@click.option(
"-t",
"--target",
)
@region_opt
@export_dest_path
@export_config_opt
@data_opt
@deltares_data_opt
@overwrite_opt
@quiet_opt
@verbose_opt
@click.pass_context
def export(
ctx: click.Context,
export_dest_path: Path,
target: Optional[Union[str, Path]],
export_config: Optional[ExportConfigDict],
region: Optional[Dict[Any, Any]],
data: Optional[List[Path]],
dd: bool,
fo: bool,
quiet: int,
verbose: int,
):
"""Export the data from a catalog.
Example usage:
--------------
export the data of in a single source, in a pertcular region
hydromt export -r "{'subbasin': [-7.24, 62.09], 'uparea': 50}" -t era5_hourly -d ../hydromt/data/catalogs/artifact_data.yml .
export all data of in a single source
hydromt export --dd -t era5_hourly .
export data as detailed in an export config yaml file
hydromt export -f /path/to/export_config.yaml .
""" # noqa: E501
# logger
log_level = max(10, 30 - 10 * (verbose - quiet))
logger = log.setuplog(
"export", join(export_dest_path, "hydromt.log"), log_level=log_level
)
logger.info(f"Output dir: {export_dest_path}")

if data:
data_libs = list(data) # add data catalogs from cli
else:
data_libs = []

if dd and "deltares_data" not in data_libs: # deltares_data from cli
data_libs = ["deltares_data"] + data_libs # prepend!

if export_config:
args = export_config.pop("args", {})
if "catalog" in args.keys():
data_libs = data_libs + args.pop("catalog")
time_tuple = args.pop("time_tuple", None)
region = region or args.pop("region", None)
if isinstance(region, str):
region = json_decode(region)
else:
time_tuple = None
region = None

if target:
export_targets = [{"source": target}]
elif export_config:
export_targets = export_config["sources"]
else:
export_targets = None

try:
data_catalog = DataCatalog(data_libs=data_libs)
data_catalog.export_data(
export_dest_path,
source_names=export_targets,
time_tuple=time_tuple,
)

except Exception as e:
logger.exception(e) # catch and log errors
raise
finally:
for handler in logger.handlers[:]:
handler.close()
logger.removeHandler(handler)


## CLIP


Expand Down
17 changes: 13 additions & 4 deletions hydromt/data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from pystac import Catalog as StacCatalog
from pystac import CatalogType, MediaType

from hydromt.typing import ErrorHandleMethod, SourceSpecDict
from hydromt.typing import Bbox, ErrorHandleMethod, SourceSpecDict, TimeRange
from hydromt.utils import partition_dictionaries

from . import __version__
Expand Down Expand Up @@ -1130,8 +1130,8 @@ def to_dataframe(self, source_names: Optional[List] = None) -> pd.DataFrame:
def export_data(
self,
data_root: Union[Path, str],
bbox: List = None,
time_tuple: Tuple = None,
bbox: Optional[Bbox] = None,
time_tuple: Optional[TimeRange] = None,
source_names: Optional[List] = None,
unit_conversion: bool = True,
meta: Optional[Dict] = None,
Expand Down Expand Up @@ -1172,7 +1172,16 @@ def export_data(
source_vars = {}
if len(source_names) > 0:
sources = {}
for name in source_names:
for source in source_names:
# support both strings and SourceSpecDicts here
if isinstance(source, str):
name = source
elif isinstance(source, Dict):
name = source["source"]
else:
raise RuntimeError(
f"unknown source type: {source} of type {type(source).__name__}"
)
# deduce variables from name
if "[" in name:
variables = name.split("[")[-1].split("]")[0].split(",")
Expand Down
2 changes: 1 addition & 1 deletion hydromt/nodata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Functions to handle nodata values.""" ""
"""Functions to handle nodata values."""
from enum import Enum
from logging import Logger

Expand Down
7 changes: 6 additions & 1 deletion hydromt/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Tuple, TypedDict, Union
from typing import Any, Dict, List, Tuple, TypedDict, Union

GeoDataframeSource = Union[str, Path]
GeoDatasetSource = Union[str, Path]
Expand All @@ -24,6 +24,11 @@
},
)

ExportConfigDict = TypedDict(
"ExportConfigDict",
{"args": Dict[str, Any], "meta": Dict[str, Any], "sources": List[SourceSpecDict]},
)


class ErrorHandleMethod(Enum):
"""Strategies for error handling withing hydromt."""
Expand Down
10 changes: 10 additions & 0 deletions tests/data/export_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
args:
catalog:
- tests/data/test_sources.yml
region:
subbasin: [-7.24, 62.09]
uparea: 50

sources:
- source: hydro_lakes[precip]
- source: gtsmv3_eu_era5
3 changes: 2 additions & 1 deletion tests/data/test_sources.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
root: d:/hydromt_testdata
meta:
root: d:/hydromt_testdata
era5:
path: ERA5/daily/era5_{year}_daily.nc
data_type: RasterDataset
Expand Down
Loading

0 comments on commit 5447e4e

Please sign in to comment.