From 5447e4e25dd94342e1e524afcc0c9e91e65e31e9 Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Wed, 1 Nov 2023 15:18:54 +0100 Subject: [PATCH] Implement export data via cli (#627) --- docs/changelog.rst | 2 +- hydromt/cli/api.py | 4 +- hydromt/cli/cli_utils.py | 16 ++++- hydromt/cli/main.py | 114 ++++++++++++++++++++++++++++++++++- hydromt/data_catalog.py | 17 ++++-- hydromt/nodata.py | 2 +- hydromt/typing.py | 7 ++- tests/data/export_config.yml | 10 +++ tests/data/test_sources.yml | 3 +- tests/test_cli.py | 101 ++++++++++++++++++++++++------- 10 files changed, 241 insertions(+), 35 deletions(-) create mode 100644 tests/data/export_config.yml diff --git a/docs/changelog.rst b/docs/changelog.rst index 9ba8fa79e..3f5b0f008 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -15,7 +15,7 @@ Added ----- - Support for exporting data catalogs to STAC catalog formats. (#617) - Support for reading data catalogs from STAC catalog formats. (#625) - +- Support exporting data from catalogs from the CLI (#627) Changed diff --git a/hydromt/cli/api.py b/hydromt/cli/api.py index 8ed165a0f..66a652e3f 100644 --- a/hydromt/cli/api.py +++ b/hydromt/cli/api.py @@ -3,7 +3,7 @@ import inspect import logging import typing -from typing import Dict, List, Union +from typing import Dict, List, Optional, Union from hydromt.gis_utils import utm_crs @@ -127,7 +127,7 @@ def get_predifined_catalogs() -> Dict: def get_region( region: dict, - data_libs: Union[List, str] = None, + data_libs: Optional[Union[List, str]] = None, hydrography_fn: str = "merit_hydro", basin_index_fn: str = "merit_hydro_index", ) -> str: diff --git a/hydromt/cli/cli_utils.py b/hydromt/cli/cli_utils.py index 400881075..b691d222e 100644 --- a/hydromt/cli/cli_utils.py +++ b/hydromt/cli/cli_utils.py @@ -6,10 +6,11 @@ from ast import literal_eval from os.path import isfile from pathlib import Path -from typing import Any, Dict, Union +from typing import Any, Dict, Optional, Union from warnings import warn import click +import yaml from .. import config from ..error import DeprecatedError @@ -86,7 +87,9 @@ def parse_json(ctx, param, value: str) -> Dict[str, Any]: ### general parsing methods ## -def parse_config(path: Union[Path, str] = None, opt_cli: Dict = None) -> Dict: +def parse_config( + path: Optional[Union[Path, str]] = None, opt_cli: Optional[Dict] = None +) -> Dict: """Parse config from ini `path` and combine with command line options `opt_cli`.""" opt = {} if path is not None and isfile(path): @@ -114,3 +117,12 @@ def parse_config(path: Union[Path, str] = None, opt_cli: Dict = None) -> Dict: for option, value in opt_cli[section].items(): opt[section].update({option: value}) return opt + + +def parse_export_config_yaml(ctx, param, value) -> Dict: + if value: + with open(value, "r") as stream: + yml = yaml.load(stream, Loader=yaml.FullLoader) + return yml + else: + return {} diff --git a/hydromt/cli/main.py b/hydromt/cli/main.py index 3fbfed8f8..8b0430833 100644 --- a/hydromt/cli/main.py +++ b/hydromt/cli/main.py @@ -2,11 +2,17 @@ """command line interface for hydromt models.""" import logging +from json import loads as json_decode from os.path import join +from pathlib import Path +from typing import Any, Dict, List, Optional, Union import click import numpy as np +from hydromt.data_catalog import DataCatalog +from hydromt.typing import ExportConfigDict + from .. import __version__, log from ..models import MODELS from . import cli_utils @@ -49,6 +55,10 @@ def print_models(ctx, param, value): type=click.Path(resolve_path=True), help="Path to hydroMT configuration file, for the model specific implementation.", ) +export_dest_path = click.argument( + "export_dest_path", + type=click.Path(resolve_path=True, dir_okay=True, file_okay=False), +) arg_root = click.argument( "MODEL_ROOT", type=click.Path(resolve_path=True, dir_okay=True, file_okay=False), @@ -110,6 +120,14 @@ def print_models(ctx, param, value): help="Flag: If provided cache tiled rasterdatasets", ) +export_config_opt = click.option( + "-f", + "--export-config", + callback=cli_utils.parse_export_config_yaml, + help="read options from a config file for exporting. options from CLI will " + "override these options", +) + ## MAIN @@ -131,8 +149,6 @@ def main(ctx, models): # , quiet, verbose): ## BUILD - - @main.command(short_help="Build models") @click.argument( "MODEL", @@ -318,6 +334,100 @@ def update( logger.removeHandler(handler) +## Export +@main.command( + short_help="Export data", +) +@click.option( + "-t", + "--target", +) +@region_opt +@export_dest_path +@export_config_opt +@data_opt +@deltares_data_opt +@overwrite_opt +@quiet_opt +@verbose_opt +@click.pass_context +def export( + ctx: click.Context, + export_dest_path: Path, + target: Optional[Union[str, Path]], + export_config: Optional[ExportConfigDict], + region: Optional[Dict[Any, Any]], + data: Optional[List[Path]], + dd: bool, + fo: bool, + quiet: int, + verbose: int, +): + """Export the data from a catalog. + + Example usage: + -------------- + + export the data of in a single source, in a pertcular region + hydromt export -r "{'subbasin': [-7.24, 62.09], 'uparea': 50}" -t era5_hourly -d ../hydromt/data/catalogs/artifact_data.yml . + + export all data of in a single source + hydromt export --dd -t era5_hourly . + + export data as detailed in an export config yaml file + hydromt export -f /path/to/export_config.yaml . + """ # noqa: E501 + # logger + log_level = max(10, 30 - 10 * (verbose - quiet)) + logger = log.setuplog( + "export", join(export_dest_path, "hydromt.log"), log_level=log_level + ) + logger.info(f"Output dir: {export_dest_path}") + + if data: + data_libs = list(data) # add data catalogs from cli + else: + data_libs = [] + + if dd and "deltares_data" not in data_libs: # deltares_data from cli + data_libs = ["deltares_data"] + data_libs # prepend! + + if export_config: + args = export_config.pop("args", {}) + if "catalog" in args.keys(): + data_libs = data_libs + args.pop("catalog") + time_tuple = args.pop("time_tuple", None) + region = region or args.pop("region", None) + if isinstance(region, str): + region = json_decode(region) + else: + time_tuple = None + region = None + + if target: + export_targets = [{"source": target}] + elif export_config: + export_targets = export_config["sources"] + else: + export_targets = None + + try: + data_catalog = DataCatalog(data_libs=data_libs) + data_catalog.export_data( + export_dest_path, + source_names=export_targets, + time_tuple=time_tuple, + ) + + except Exception as e: + logger.exception(e) # catch and log errors + raise + finally: + for handler in logger.handlers[:]: + handler.close() + logger.removeHandler(handler) + + ## CLIP diff --git a/hydromt/data_catalog.py b/hydromt/data_catalog.py index 977e904cb..92fc0d683 100644 --- a/hydromt/data_catalog.py +++ b/hydromt/data_catalog.py @@ -34,7 +34,7 @@ from pystac import Catalog as StacCatalog from pystac import CatalogType, MediaType -from hydromt.typing import ErrorHandleMethod, SourceSpecDict +from hydromt.typing import Bbox, ErrorHandleMethod, SourceSpecDict, TimeRange from hydromt.utils import partition_dictionaries from . import __version__ @@ -1130,8 +1130,8 @@ def to_dataframe(self, source_names: Optional[List] = None) -> pd.DataFrame: def export_data( self, data_root: Union[Path, str], - bbox: List = None, - time_tuple: Tuple = None, + bbox: Optional[Bbox] = None, + time_tuple: Optional[TimeRange] = None, source_names: Optional[List] = None, unit_conversion: bool = True, meta: Optional[Dict] = None, @@ -1172,7 +1172,16 @@ def export_data( source_vars = {} if len(source_names) > 0: sources = {} - for name in source_names: + for source in source_names: + # support both strings and SourceSpecDicts here + if isinstance(source, str): + name = source + elif isinstance(source, Dict): + name = source["source"] + else: + raise RuntimeError( + f"unknown source type: {source} of type {type(source).__name__}" + ) # deduce variables from name if "[" in name: variables = name.split("[")[-1].split("]")[0].split(",") diff --git a/hydromt/nodata.py b/hydromt/nodata.py index 779f01308..ccf04f2f2 100644 --- a/hydromt/nodata.py +++ b/hydromt/nodata.py @@ -1,4 +1,4 @@ -"""Functions to handle nodata values.""" "" +"""Functions to handle nodata values.""" from enum import Enum from logging import Logger diff --git a/hydromt/typing.py b/hydromt/typing.py index 50527c777..c13542f2f 100644 --- a/hydromt/typing.py +++ b/hydromt/typing.py @@ -3,7 +3,7 @@ from datetime import datetime from enum import Enum from pathlib import Path -from typing import Tuple, TypedDict, Union +from typing import Any, Dict, List, Tuple, TypedDict, Union GeoDataframeSource = Union[str, Path] GeoDatasetSource = Union[str, Path] @@ -24,6 +24,11 @@ }, ) +ExportConfigDict = TypedDict( + "ExportConfigDict", + {"args": Dict[str, Any], "meta": Dict[str, Any], "sources": List[SourceSpecDict]}, +) + class ErrorHandleMethod(Enum): """Strategies for error handling withing hydromt.""" diff --git a/tests/data/export_config.yml b/tests/data/export_config.yml new file mode 100644 index 000000000..5f5afbd87 --- /dev/null +++ b/tests/data/export_config.yml @@ -0,0 +1,10 @@ +args: + catalog: + - tests/data/test_sources.yml + region: + subbasin: [-7.24, 62.09] + uparea: 50 + +sources: + - source: hydro_lakes[precip] + - source: gtsmv3_eu_era5 diff --git a/tests/data/test_sources.yml b/tests/data/test_sources.yml index 45d343c72..79b7685de 100644 --- a/tests/data/test_sources.yml +++ b/tests/data/test_sources.yml @@ -1,5 +1,6 @@ --- -root: d:/hydromt_testdata +meta: + root: d:/hydromt_testdata era5: path: ERA5/daily/era5_{year}_daily.nc data_type: RasterDataset diff --git a/tests/test_cli.py b/tests/test_cli.py index cd57e2d07..ee851c669 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,34 +10,46 @@ from hydromt.cli.main import main as hydromt_cli -def test_cli(tmpdir): +def test_cli_verison(tmpdir): r = CliRunner().invoke(hydromt_cli, "--version") assert r.exit_code == 0 assert r.output.split()[-1] == __version__ + +def test_cli_models(tmpdir): r = CliRunner().invoke(hydromt_cli, "--models") assert r.exit_code == 0 assert r.output.startswith("model plugins") + +def test_cli_help(tmpdir): r = CliRunner().invoke(hydromt_cli, "--help") assert r.exit_code == 0 # NOTE: when called from CliRunner we get "Usage: main" instead of "Usage: hydromt" assert r.output.startswith("Usage: main [OPTIONS] COMMAND [ARGS]...") + +def test_cli_build_help(tmpdir): r = CliRunner().invoke(hydromt_cli, ["build", "--help"]) assert r.exit_code == 0 assert r.output.startswith("Usage: main build [OPTIONS] MODEL MODEL_ROOT") + +def test_cli_update_help(tmpdir): r = CliRunner().invoke(hydromt_cli, ["update", "--help"]) assert r.exit_code == 0 assert r.output.startswith("Usage: main update [OPTIONS] MODEL MODEL_ROOT") + +def test_cli_clip_help(tmpdir): r = CliRunner().invoke(hydromt_cli, ["clip", "--help"]) assert r.exit_code == 0 assert r.output.startswith( "Usage: main clip [OPTIONS] MODEL MODEL_ROOT MODEL_DESTINATION REGION" ) + +def test_cli_build_grid_model(tmpdir): root = str(tmpdir.join("grid_model_region")) cmd = [ "build", @@ -49,49 +61,96 @@ def test_cli(tmpdir): "setup_grid.res=0.05", "-vv", ] - r = CliRunner().invoke(hydromt_cli, cmd) + _ = CliRunner().invoke(hydromt_cli, cmd) # test force overwrite - r = CliRunner().invoke(hydromt_cli, cmd) with pytest.raises(IOError, match="Model dir already exists"): - raise r.exception + _ = CliRunner().invoke(hydromt_cli, cmd, catch_exceptions=False) r = CliRunner().invoke(hydromt_cli, cmd + ["--fo"]) assert r.exit_code == 0 - root = str(tmpdir.join("empty_region")) - r = CliRunner().invoke( - hydromt_cli, - ["build", "grid_model", root, "-vv"], - ) - assert r.exit_code == 0 +def test_cli_build_unknown_model(tmpdir): + with pytest.raises(ValueError, match="Unknown model"): + _ = CliRunner().invoke( + hydromt_cli, + [ + "build", + "test_model", + str(tmpdir), + "-r", + "{'subbasin': [-7.24, 62.09], 'strord': 4}", + ], + catch_exceptions=False, + ) + + +def test_cli_update_unknown_model(tmpdir): + with pytest.raises(ValueError, match="Unknown model"): + _ = CliRunner().invoke( + hydromt_cli, + [ + "update", + "test_model", + str(tmpdir), + "-c", + "component", + "--opt", + "key=value", + ], + catch_exceptions=False, + ) + + +def test_cli_clip_unknown_model(tmpdir): + with pytest.raises(NotImplementedError): + _ = CliRunner().invoke( + hydromt_cli, + ["clip", "test_model", str(tmpdir), str(tmpdir), "{'bbox': [1,2,3,4]}"], + catch_exceptions=False, + ) + + +def test_export_cli_deltared_data(tmpdir): r = CliRunner().invoke( hydromt_cli, [ - "build", - "test_model", + "export", str(tmpdir), + "-t", + "hydro_lakes", "-r", - "{'subbasin': [-7.24, 62.09], 'strord': 4}", + "{'subbasin': [-7.24, 62.09], 'uparea': 50}", + "--dd", ], ) - with pytest.raises(ValueError, match="Unknown model"): - raise r.exception + assert r.exit_code == 0, r.output + + +def test_export_cli_catalog(tmpdir): r = CliRunner().invoke( hydromt_cli, - ["update", "test_model", str(tmpdir), "-c", "component", "--opt", "key=value"], + [ + "export", + str(tmpdir), + "-t", + "hydro_lakes", + "-d", + "tests/data/test_sources.yml", + ], ) - with pytest.raises(ValueError, match="Unknown model"): - raise r.exception + assert r.exit_code == 0, r.output + +def test_export_cli_config_file(tmpdir): r = CliRunner().invoke( hydromt_cli, - ["clip", "test_model", str(tmpdir), str(tmpdir), "{'bbox': [1,2,3,4]}"], + ["export", str(tmpdir), "-f", "tests/data/export_config.yml"], + catch_exceptions=False, ) - with pytest.raises(NotImplementedError): - raise r.exception + assert r.exit_code == 0, r.output def test_api_datasets():