diff --git a/data/catalogs/test_data_catalog.py b/data/catalogs/test_data_catalog.py
index f691b2a73..77dc4524e 100644
--- a/data/catalogs/test_data_catalog.py
+++ b/data/catalogs/test_data_catalog.py
@@ -75,7 +75,7 @@ def test_data_catalog(args, datacatalog):
logger.info("Checking paths of data catalog sources")
for source_name, source in datacatalog.__iter__():
logger.info(f"Checking paths of {source_name}")
- if isinstance(source.driver.metadata_resolver, RasterTindexResolver):
+ if isinstance(source.uri_resolver, RasterTindexResolver):
if not exists(source.full_uri):
error_count += 1
logger.error(
@@ -84,7 +84,7 @@ def test_data_catalog(args, datacatalog):
continue
else:
- paths = source.driver.metadata_resolver.resolve(
+ paths = source.driver.uri_resolver.resolve(
source.full_uri, source.driver.filesystem
)
for path in paths:
diff --git a/docs/api/api.rst b/docs/api/api.rst
index d241d399e..cb5cb730e 100644
--- a/docs/api/api.rst
+++ b/docs/api/api.rst
@@ -11,12 +11,13 @@ API reference
:maxdepth: 2
cli
+ data_adapter
data_catalog
data_source
drivers
gis
- io
model
stats
plugin
utils
+ uri_resolvers
diff --git a/docs/api/data_adapter.rst b/docs/api/data_adapter.rst
new file mode 100644
index 000000000..1c32e26a9
--- /dev/null
+++ b/docs/api/data_adapter.rst
@@ -0,0 +1,49 @@
+.. currentmodule:: hydromt.data_catalog.adapters
+
+DataAdapter
+===========
+
+RasterDataset
+-------------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ RasterDatasetAdapter
+ RasterDatasetAdapter.transform
+
+GeoDataset
+----------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ GeoDatasetAdapter
+ GeoDatasetAdapter.transform
+
+GeoDataFrame
+------------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ GeoDataFrameAdapter
+ GeoDataFrameAdapter.transform
+
+DataFrame
+---------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ DataFrameAdapter
+ DataFrameAdapter.transform
+
+Dataset
+-------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ DatasetAdapter
+ DatasetAdapter.transform
diff --git a/docs/api/data_catalog.rst b/docs/api/data_catalog.rst
index 794b64944..4296f3711 100644
--- a/docs/api/data_catalog.rst
+++ b/docs/api/data_catalog.rst
@@ -1,9 +1,5 @@
.. currentmodule:: hydromt.data_catalog
-====
-Data
-====
-
.. _api_data_catalog:
Data catalog
@@ -20,7 +16,6 @@ General
DataCatalog.sources
DataCatalog.predefined_catalogs
DataCatalog.to_dict
- DataCatalog.to_dataframe
DataCatalog.to_yml
DataCatalog.export_data
DataCatalog.get_source_bbox
@@ -63,244 +58,3 @@ Predefined data catalog
PredefinedCatalog.get_catalog_file
predefined_catalog.create_registry_file
-
-
-DataSource
-==========
-
-General
--------
-
-.. autosummary::
- :toctree: ../_generated
-
- sources.DataSource
- sources.DataSource.summary
-
-RasterDataset
--------------
-
-.. autosummary::
- :toctree: ../_generated
-
- sources.RasterDatasetSource
- sources.RasterDatasetSource.read_data
- sources.RasterDatasetSource.to_stac_catalog
- sources.RasterDatasetSource.get_bbox
- sources.RasterDatasetSource.get_time_range
- sources.RasterDatasetSource.detect_bbox
- sources.RasterDatasetSource.detect_time_range
-
-GeoDataFrame
-------------
-
-.. autosummary::
- :toctree: ../_generated
-
- sources.GeoDataFrameSource
- sources.GeoDataFrameSource.read_data
- sources.GeoDataFrameSource.to_stac_catalog
- sources.GeoDataFrameSource.get_bbox
- sources.GeoDataFrameSource.detect_bbox
-
-DataFrame
----------
-
-.. autosummary::
- :toctree: ../_generated
-
- sources.DataFrameSource
- sources.DataFrameSource.read_data
- sources.DataFrameSource.to_stac_catalog
-
-GeoDataset
-------------
-
-.. autosummary::
- :toctree: ../_generated
-
- sources.GeoDatasetSource
- sources.GeoDatasetSource.read_data
- sources.GeoDatasetSource.to_stac_catalog
- sources.GeoDatasetSource.get_bbox
- sources.GeoDatasetSource.detect_bbox
-
-URIResolver
-================
-
-General
--------
-
-.. autosummary::
- :toctree: ../_generated
-
- uri_resolvers.URIResolver
- uri_resolvers.URIResolver.resolve
-
-ConventionResolver
-------------------
-
-.. autosummary::
- :toctree: ../_generated
-
- uri_resolvers.ConventionResolver
- uri_resolvers.ConventionResolver.resolve
-
-RasterTindexResolver
---------------------
-.. autosummary::
- :toctree: ../_generated
-
- uri_resolvers.RasterTindexResolver
- uri_resolvers.RasterTindexResolver.resolve
-
-Driver
-======
-
-General
--------
-
-.. autosummary::
- :toctree: ../_generated
-
- drivers.base_driver.BaseDriver
-
-RasterDataset
--------------
-
-.. autosummary::
- :toctree: ../_generated
-
- drivers.raster.raster_dataset_driver.RasterDatasetDriver
- drivers.raster.raster_dataset_driver.RasterDatasetDriver.read
- drivers.raster.raster_dataset_driver.RasterDatasetDriver.write
-
-RasterDatasetXarrayDriver
--------------------------
-
-.. autosummary::
- :toctree: ../_generated
-
- drivers.raster.raster_xarray_driver.RasterDatasetXarrayDriver
- drivers.raster.raster_xarray_driver.RasterDatasetXarrayDriver.read
- drivers.raster.raster_xarray_driver.RasterDatasetXarrayDriver.write
-
-RasterioDriver
---------------
-
-.. autosummary::
- :toctree: ../_generated
-
- drivers.raster.rasterio_driver.RasterioDriver
- drivers.raster.rasterio_driver.RasterioDriver.read
- drivers.raster.rasterio_driver.RasterioDriver.write
-
-GeoDataFrame
-------------
-
-.. autosummary::
- :toctree: ../_generated
-
- drivers.geodataframe.geodataframe_driver.GeoDataFrameDriver
- drivers.geodataframe.geodataframe_driver.GeoDataFrameDriver.read
- drivers.geodataframe.geodataframe_driver.GeoDataFrameDriver.write
-
-PyogrioDriver
--------------
-
-.. autosummary::
- :toctree: ../_generated
-
- drivers.geodataframe.pyogrio_driver.PyogrioDriver
- drivers.geodataframe.pyogrio_driver.PyogrioDriver.read
- drivers.geodataframe.pyogrio_driver.PyogrioDriver.write
-
-GeoDataFrameTableDriver
------------------------
-
-.. autosummary::
- :toctree: ../_generated
-
- drivers.geodataframe.table_driver.GeoDataFrameTableDriver
- drivers.geodataframe.table_driver.GeoDataFrameTableDriver.read
- drivers.geodataframe.table_driver.GeoDataFrameTableDriver.write
-
-DataFrame
----------
-
-.. autosummary::
- :toctree: ../_generated
-
- drivers.dataframe.dataframe_driver.DataFrameDriver
- drivers.dataframe.dataframe_driver.DataFrameDriver.read
- drivers.dataframe.dataframe_driver.DataFrameDriver.write
-
-PandasDriver
-------------
-
-.. autosummary::
- :toctree: ../_generated
-
- drivers.dataframe.pandas_driver.PandasDriver
- drivers.dataframe.pandas_driver.PandasDriver.read
- drivers.dataframe.pandas_driver.PandasDriver.write
-
-GeoDataFrame
-------------
-
-.. autosummary::
- :toctree: ../_generated
-
- drivers.geodataset.geodataset_driver.GeoDatasetDriver
- drivers.geodataset.geodataset_driver.GeoDatasetDriver.read
- drivers.geodataset.geodataset_driver.GeoDatasetDriver.write
-
-DataAdapter
-===========
-
-General
--------
-
-RasterDataset
--------------
-
-.. autosummary::
- :toctree: ../_generated
-
- adapters.RasterDatasetAdapter
- adapters.RasterDatasetAdapter.transform
-
-GeoDataset
-----------
-
-.. autosummary::
- :toctree: ../_generated
-
- adapters.GeoDatasetAdapter
- adapters.GeoDatasetAdapter.transform
-
-GeoDataFrame
-------------
-
-.. autosummary::
- :toctree: ../_generated
-
- adapters.GeoDataFrameAdapter
- adapters.GeoDataFrameAdapter.transform
-
-DataFrame
----------
-
-.. autosummary::
- :toctree: ../_generated
-
- adapters.dataframe.DataFrameAdapter
- adapters.dataframe.DataFrameAdapter.transform
-
-Dataset
--------
-
-.. autosummary::
- :toctree: ../_generated
-
- adapters.DatasetAdapter
diff --git a/docs/api/data_source.rst b/docs/api/data_source.rst
index 15ea2e999..4ce956d5a 100644
--- a/docs/api/data_source.rst
+++ b/docs/api/data_source.rst
@@ -1,5 +1,64 @@
+.. currentmodule:: hydromt.data_catalog.sources
+
.. _data_source:
============
Data sources
============
+
+General
+-------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ DataSource
+ DataSource.summary
+
+RasterDataset
+-------------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ RasterDatasetSource
+ RasterDatasetSource.read_data
+ RasterDatasetSource.to_stac_catalog
+ RasterDatasetSource.get_bbox
+ RasterDatasetSource.get_time_range
+ RasterDatasetSource.detect_bbox
+ RasterDatasetSource.detect_time_range
+
+GeoDataFrame
+------------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ GeoDataFrameSource
+ GeoDataFrameSource.read_data
+ GeoDataFrameSource.to_stac_catalog
+ GeoDataFrameSource.get_bbox
+ GeoDataFrameSource.detect_bbox
+
+DataFrame
+---------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ DataFrameSource
+ DataFrameSource.read_data
+ DataFrameSource.to_stac_catalog
+
+GeoDataset
+------------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ GeoDatasetSource
+ GeoDatasetSource.read_data
+ GeoDatasetSource.to_stac_catalog
+ GeoDatasetSource.get_bbox
+ GeoDatasetSource.detect_bbox
diff --git a/docs/api/drivers.rst b/docs/api/drivers.rst
index 9a696c5fa..26a23f3bc 100644
--- a/docs/api/drivers.rst
+++ b/docs/api/drivers.rst
@@ -1,5 +1,145 @@
+.. currentmodule:: hydromt.data_catalog.drivers
+
.. _drivers:
=======
Drivers
=======
+
+Base
+----
+
+.. autosummary::
+ :toctree: ../_generated
+
+ BaseDriver
+
+RasterDataset
+-------------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ RasterDatasetDriver
+ RasterDatasetDriver.read
+ RasterDatasetDriver.write
+
+RasterDatasetXarrayDriver
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autosummary::
+ :toctree: ../_generated
+
+ RasterDatasetXarrayDriver
+ RasterDatasetXarrayDriver.read
+ RasterDatasetXarrayDriver.write
+
+RasterioDriver
+^^^^^^^^^^^^^^
+
+.. autosummary::
+ :toctree: ../_generated
+
+ RasterioDriver
+ RasterioDriver.read
+ RasterioDriver.write
+
+GeoDataFrame
+------------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ GeoDataFrameDriver
+ GeoDataFrameDriver.read
+ GeoDataFrameDriver.write
+
+PyogrioDriver
+^^^^^^^^^^^^^
+
+.. autosummary::
+ :toctree: ../_generated
+
+ PyogrioDriver
+ PyogrioDriver.read
+ PyogrioDriver.write
+
+GeoDataFrameTableDriver
+^^^^^^^^^^^^^^^^^^^^^^^
+
+.. autosummary::
+ :toctree: ../_generated
+
+ GeoDataFrameTableDriver
+ GeoDataFrameTableDriver.read
+ GeoDataFrameTableDriver.write
+
+DataFrame
+---------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ DataFrameDriver
+ DataFrameDriver.read
+ DataFrameDriver.write
+
+PandasDriver
+^^^^^^^^^^^^
+
+.. autosummary::
+ :toctree: ../_generated
+
+ PandasDriver
+ PandasDriver.read
+ PandasDriver.write
+
+GeoDataset
+----------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ GeoDatasetDriver
+ GeoDatasetDriver.read
+ GeoDatasetDriver.write
+
+GeoDatasetXarrayDriver
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. autosummary::
+ :toctree: ../_generated
+
+ GeoDatasetXarrayDriver
+ GeoDatasetXarrayDriver.read
+ GeoDatasetXarrayDriver.write
+
+GeoDatasetVectorDriver
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. autosummary::
+ :toctree: ../_generated
+
+ GeoDatasetVectorDriver
+ GeoDatasetVectorDriver.read
+ GeoDatasetVectorDriver.write
+
+Dataset
+-------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ DatasetDriver
+ DatasetDriver.read
+ DatasetDriver.write
+
+DatasetXarrayDriver
+^^^^^^^^^^^^^^^^^^^^^^
+
+.. autosummary::
+ :toctree: ../_generated
+
+ DatasetXarrayDriver
+ DatasetXarrayDriver.read
+ DatasetXarrayDriver.write
diff --git a/docs/api/gis.rst b/docs/api/gis.rst
index 3004ab31e..a733aab02 100644
--- a/docs/api/gis.rst
+++ b/docs/api/gis.rst
@@ -285,47 +285,3 @@ visit the `pyflwdir docs. `_
flw.outlet_map
flw.clip_basins
flw.dem_adjust
-
-.. _gis_utils_api:
-
-GIS utility methods
-===================
-
-Raster
-------
-
-.. autosummary::
- :toctree: ../_generated
-
- create_vrt.create_vrt
- raster_utils.spread2d
- raster_utils.reggrid_area
- raster_utils.cellarea
- raster_utils.cellres
- raster_utils.meridian_offset
- raster_utils.affine_to_coords
- raster_utils.affine_to_meshgrid
-
-Vector
-------
-
-.. autosummary::
- :toctree: ../_generated
-
- vector_utils.filter_gdf
- vector_utils.nearest
- vector_utils.nearest_merge
-
-
-General
--------
-
-.. autosummary::
- :toctree: ../_generated
-
- gis_utils.parse_crs
- gis_utils.utm_crs
- gis_utils.bbox_from_file_and_filters
- gis_utils.parse_geom_bbox_buffer
- gis_utils.to_geographic_bbox
- gis_utils.axes_attrs
diff --git a/docs/api/io.rst b/docs/api/io.rst
deleted file mode 100644
index d2b33848f..000000000
--- a/docs/api/io.rst
+++ /dev/null
@@ -1,39 +0,0 @@
-.. currentmodule:: hydromt.io
-
-=======================
-Reading/writing methods
-=======================
-
-.. _open_methods:
-
-Reading methods
-===============
-
-.. autosummary::
- :toctree: ../_generated
-
- configread
- open_geodataset
- open_mfcsv
- open_mfraster
- open_raster
- open_raster_from_tindex
- open_timeseries_from_table
- open_vector
- open_vector_from_table
- read_nc
- read_toml
- read_yaml
-
-Writing methods
-===============
-
-.. autosummary::
- :toctree: ../_generated
-
- netcdf_writer
- write_nc
- write_toml
- write_xy
- write_yaml
- zarr_writer
diff --git a/docs/api/uri_resolvers.rst b/docs/api/uri_resolvers.rst
new file mode 100644
index 000000000..ca5015d4e
--- /dev/null
+++ b/docs/api/uri_resolvers.rst
@@ -0,0 +1,32 @@
+===========
+URIResolver
+===========
+
+.. currentmodule:: hydromt.data_catalog.uri_resolvers
+
+General
+-------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ URIResolver
+ URIResolver.resolve
+
+ConventionResolver
+------------------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ ConventionResolver
+ ConventionResolver.resolve
+
+RasterTindexResolver
+--------------------
+
+.. autosummary::
+ :toctree: ../_generated
+
+ RasterTindexResolver
+ RasterTindexResolver.resolve
diff --git a/docs/assets/data_types/csv_dataframe.yml b/docs/assets/data_types/csv_dataframe.yml
new file mode 100644
index 000000000..532caca01
--- /dev/null
+++ b/docs/assets/data_types/csv_dataframe.yml
@@ -0,0 +1,11 @@
+observations:
+ uri: data/lulc/globcover_mapping.csv
+ data_type: DataFrame
+ driver:
+ name: pandas
+ options:
+ header: null # null translates to None in Python -> no header
+ index_col: 0
+ parse_dates: false
+ metadata:
+ category: parameter_mapping
diff --git a/docs/assets/data_types/csv_geodataframe.yml b/docs/assets/data_types/csv_geodataframe.yml
new file mode 100644
index 000000000..6fa6c1c6c
--- /dev/null
+++ b/docs/assets/data_types/csv_geodataframe.yml
@@ -0,0 +1,6 @@
+stations:
+ uri: /path/to/stations.csv
+ data_type: GeoDataFrame
+ driver: geodataframe_table
+ metadata:
+ crs: 4326
diff --git a/docs/assets/data_types/csv_geodataset.yml b/docs/assets/data_types/csv_geodataset.yml
new file mode 100644
index 000000000..212a9861f
--- /dev/null
+++ b/docs/assets/data_types/csv_geodataset.yml
@@ -0,0 +1,9 @@
+waterlevels_txt:
+ uri: /path/to/stations.csv
+ data_type: GeoDataset
+ driver:
+ name: geodataset_vector
+ options:
+ data_path: /path/to/stations_data.csv
+ metadata:
+ crs: 4326
diff --git a/docs/assets/data_types/gpkg_geodataframe.yml b/docs/assets/data_types/gpkg_geodataframe.yml
new file mode 100644
index 000000000..1cbc6d962
--- /dev/null
+++ b/docs/assets/data_types/gpkg_geodataframe.yml
@@ -0,0 +1,15 @@
+GDP_world:
+ uri: base/emissions/GDP-countries/World_countries_GDPpcPPP.gpkg
+ data_type: GeoDataFrame
+ driver:
+ name: pyogrio
+ options:
+ layer: GDP
+ data_adapter:
+ rename:
+ GDP: gdp
+ unit_mult:
+ gdp: 0.001
+ metadata:
+ category: socio-economic
+ source_version: 1.0
diff --git a/docs/assets/data_types/netcdf_dataset.yml b/docs/assets/data_types/netcdf_dataset.yml
new file mode 100644
index 000000000..8b279d735
--- /dev/null
+++ b/docs/assets/data_types/netcdf_dataset.yml
@@ -0,0 +1,4 @@
+timeseries_dataset:
+ uri: /path/to/timeseries.netcdf
+ data_type: Dataset
+ driver: dataset_xarray
diff --git a/docs/assets/data_types/netcdf_geodataset.yml b/docs/assets/data_types/netcdf_geodataset.yml
new file mode 100644
index 000000000..ae3a13cb6
--- /dev/null
+++ b/docs/assets/data_types/netcdf_geodataset.yml
@@ -0,0 +1,22 @@
+gtsmv3_eu_era5:
+ uri: reanalysis-waterlevel-{year}-m{month:02d}.nc
+ data_type: GeoDataset
+ driver:
+ name: geodataset_xarray
+ options:
+ chunks: {stations: 100, time: 1500}
+ combine: by_coords
+ decode_times: true
+ parallel: true
+ data_adapter:
+ rename:
+ station_x_coordinate: lon
+ station_y_coordinate: lat
+ stations: index
+ metadata:
+ crs: 4326
+ category: ocean
+ paper_doi: 10.24381/cds.8c59054f
+ paper_ref: Copernicus Climate Change Service 2019
+ source_license: https://cds.climate.copernicus.eu/cdsapp/#!/terms/licence-to-use-copernicus-products
+ source_url: https://cds.climate.copernicus.eu/cdsapp#!/dataset/10.24381/cds.8c59054f?tab=overview
diff --git a/docs/assets/data_types/netcdf_raster_dataset.yml b/docs/assets/data_types/netcdf_raster_dataset.yml
new file mode 100644
index 000000000..3d3d70832
--- /dev/null
+++ b/docs/assets/data_types/netcdf_raster_dataset.yml
@@ -0,0 +1,26 @@
+
+era5_hourly:
+ uri: forcing/ERA5/org/era5_{variable}_{year}_hourly.nc
+ data_type: RasterDataset
+ driver:
+ name: raster_xarray
+ options:
+ chunks: {latitude: 125, longitude: 120, time: 50}
+ combine: by_coords
+ decode_times: true
+ parallel: true
+ metadata:
+ crs: 4326
+ category: meteo
+ paper_doi: 10.1002/qj.3803
+ paper_ref: Hersbach et al. (2019)
+ source_license: https://cds.climate.copernicus.eu/cdsapp/#!/terms/licence-to-use-copernicus-products
+ source_url: https://doi.org/10.24381/cds.bd0915c6
+ data_adapter:
+ rename:
+ t2m: temp
+ tp: precip
+ unit_add:
+ temp: -273.15
+ unit_mult:
+ precip: 1000
diff --git a/docs/assets/data_types/single_variable_geotiff_raster.yml b/docs/assets/data_types/single_variable_geotiff_raster.yml
new file mode 100644
index 000000000..cbdec0b15
--- /dev/null
+++ b/docs/assets/data_types/single_variable_geotiff_raster.yml
@@ -0,0 +1,15 @@
+globcover:
+ uri: base/landcover/globcover/GLOBCOVER_200901_200912_300x300m.tif
+ data_type: RasterDataset
+ driver:
+ name: rasterio
+ options:
+ chunks:
+ x: 3600
+ y: 3600
+ metadata:
+ category: landuse
+ source_url: http://due.esrin.esa.int/page_globcover.php
+ source_license: CC-BY-3.0
+ paper_ref: Arino et al (2012)
+ paper_doi: 10.1594/PANGAEA.787668
diff --git a/docs/assets/data_types/tiled_raster_dataset.yml b/docs/assets/data_types/tiled_raster_dataset.yml
new file mode 100644
index 000000000..866025681
--- /dev/null
+++ b/docs/assets/data_types/tiled_raster_dataset.yml
@@ -0,0 +1,23 @@
+grwl_mask:
+ uri: static_data/base/grwl/tindex.gpkg
+ data_type: RasterDataset
+ uri_resolver:
+ name: raster_tindex
+ options:
+ tileindex: location
+ driver:
+ name: rasterio
+ options:
+ chunks:
+ x: 3000
+ y: 3000
+ mosaic_kwargs:
+ method: nearest
+ metadata:
+ nodata: 0
+ category: hydrography
+ paper_doi: 10.1126/science.aat0636
+ paper_ref: Allen and Pavelsky (2018)
+ source_license: CC BY 4.0
+ source_url: https://doi.org/10.5281/zenodo.1297434
+ source_version: 1.01
diff --git a/docs/assets/data_types/vrt_raster_dataset.yml b/docs/assets/data_types/vrt_raster_dataset.yml
new file mode 100644
index 000000000..0d3d37f7f
--- /dev/null
+++ b/docs/assets/data_types/vrt_raster_dataset.yml
@@ -0,0 +1,24 @@
+merit_hydro:
+ uri: base/merit_hydro/{variable}.vrt
+ data_type: RasterDataset
+ driver:
+ name: rasterio
+ options:
+ chunks:
+ x: 6000
+ y: 6000
+ data_adapter:
+ rename:
+ dir: flwdir
+ bas: basins
+ upa: uparea
+ elv: elevtn
+ sto: strord
+ metadata:
+ crs: 4326
+ category: topography
+ source_version: 1.0
+ paper_doi: 10.1029/2019WR024873
+ paper_ref: Dai Yamazaki
+ source_url: http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro
+ source_license: CC-BY-NC 4.0 or ODbL 1.0
diff --git a/docs/conf.py b/docs/conf.py
index 871bacb02..60cded4fb 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -57,8 +57,8 @@ def write_panel(f, name, content="", level=0, item="dropdown"):
f.write("\n")
-def write_nested_dropdown(name, data_cat, note="", categories=[]):
- df = data_cat.to_dataframe().sort_index().drop_duplicates("uri")
+def write_nested_dropdown(name, data_cat: hydromt.DataCatalog, note="", categories=[]):
+ df = data_cat._to_dataframe().sort_index().drop_duplicates("uri")
with open(f"_generated/{name}.rst", mode="w") as f:
write_panel(f, name, note, level=0)
write_panel(f, "", level=1, item="tab-set")
diff --git a/docs/guides/advanced_user/data_prepare_cat.rst b/docs/guides/advanced_user/data_prepare_cat.rst
index c936f0315..31ea0af0e 100644
--- a/docs/guides/advanced_user/data_prepare_cat.rst
+++ b/docs/guides/advanced_user/data_prepare_cat.rst
@@ -29,8 +29,8 @@ shown keys is highly recommended. The ``rename``, ``nodata``, ``unit_add`` and
``unit_mult`` options are set per variable (or attribute table column in case of a
GeoDataFrame).
-.. include:: ../../assets/example_catalog.yml
- :code: yaml
+.. literalinclude:: ../../assets/example_catalog.yml
+ :language: yaml
.. testsetup:: *
diff --git a/docs/guides/advanced_user/data_sources.rst b/docs/guides/advanced_user/data_sources.rst
deleted file mode 100644
index 56b3ea200..000000000
--- a/docs/guides/advanced_user/data_sources.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-.. _data_sources:
-
-Data sources
-============
diff --git a/docs/guides/advanced_user/data_types.rst b/docs/guides/advanced_user/data_types.rst
index 837ac3cd6..fe63a5138 100644
--- a/docs/guides/advanced_user/data_types.rst
+++ b/docs/guides/advanced_user/data_types.rst
@@ -1,5 +1,7 @@
.. _data_types:
+.. currentmodule:: hydromt.data_catalog.drivers
+
Supported data types
====================
@@ -11,15 +13,16 @@ HydroMT currently supports the following data types:
- :ref:`Dataset `: non-spatial n-dimensional data
- :ref:`DataFrame `: 2D tabular data
-Internally the RasterDataset, GeoDataset, and Dataset are represented by :py:class:`xarray.Dataset` objects,
-the GeoDataFrame by :py:class:`geopandas.GeoDataFrame`, and the DataFrame by
-:py:class:`pandas.DataFrame`. We use drivers, typically from third-party packages and sometimes
-wrapped in HydroMT functions, to parse many different file formats to this standardized internal
-data representation.
+Internally the RasterDataset, GeoDataset, and Dataset are represented by
+:py:class:`xarray.Dataset` objects, the GeoDataFrame by
+:py:class:`geopandas.GeoDataFrame`, and the DataFrame by :py:class:`pandas.DataFrame`.
+We use drivers, typically from third-party packages and sometimes wrapped in HydroMT
+functions, to parse many different file formats to this standardized internal data
+representation.
.. note::
- Please contact us through the issue list if you would like to add other drivers.
+ It is also possible to create your own driver. See at :ref:`Custom Driver`
.. _dimensions:
@@ -44,26 +47,23 @@ Raster data (RasterDataset)
.. _raster_formats:
.. list-table::
- :widths: 17, 25, 28, 30
+ :widths: 17, 25, 30
:header-rows: 1
* - Driver
- File formats
- - Method
- Comments
- * - ``raster``
+ * - :py:class:`raster `
- GeoTIFF, ArcASCII, VRT, etc. (see `GDAL formats `_)
- - :py:meth:`~hydromt.io.open_mfraster`
- Based on :py:func:`xarray.open_rasterio`
and :py:func:`rasterio.open`
- * - ``raster_tindex``
+ * - :py:class:`raster ` with the
+ :py:class:`raster_tindex ` resolver
- raster tile index file (see `gdaltindex `_)
- - :py:meth:`~hydromt.io.open_raster_from_tindex`
- - Options to merge tiles via ``mosaic_kwargs``.
- * - ``netcdf`` or ``zarr``
+ - Options to merge tiles via `options -> mosaic_kwargs`.
+ * - :py:class:`raster_xarray `
- NetCDF and Zarr
- - :py:func:`xarray.open_mfdataset`, :py:func:`xarray.open_zarr`
- - required y and x dimensions_
+ - required y and x dimensions
.. _GeoTiff:
@@ -73,24 +73,24 @@ Raster data (RasterDataset)
Single raster files are parsed to a **RasterDataset** based on the **raster** driver.
This driver supports 2D raster for which the dimensions are names "x" and "y".
A potential third dimension is called "dim0".
-The variable name is based on the filename, in this case "GLOBCOVER_200901_200912_300x300m".
-The ``chunks`` key-word argument is passed to :py:meth:`~hydromt.io.open_mfraster`
+The variable name is based on the filename, in this case `"GLOBCOVER_200901_200912_300x300m"`.
+The `chunks` key-word argument is passed to :py:meth:`~hydromt.io.open_mfraster`
and allows lazy reading of the data.
-.. code-block:: yaml
-
- globcover:
- path: base/landcover/globcover/GLOBCOVER_200901_200912_300x300m.tif
- data_type: RasterDataset
- driver: raster
- driver_kwargs:
- chunks: {x: 3600, y: 3600}
- meta:
- category: landuse
- source_url: http://due.esrin.esa.int/page_globcover.php
- source_license: CC-BY-3.0
- paper_ref: Arino et al (2012)
- paper_doi: 10.1594/PANGAEA.787668
+.. literalinclude:: ../../assets/data_types/single_variable_geotiff_raster.yml
+ :language: yaml
+
+.. testsetup:: *
+
+ from hydromt import DataCatalog
+
+.. testcode:: geotiff
+ :hide:
+
+ catalog_path = "docs/assets/data_types/single_variable_geotiff_raster.yml"
+
+ catalog = DataCatalog(fallback_lib=None) # do not read default catalog
+ catalog.from_yml(catalog_path)
.. _VRT:
@@ -100,45 +100,35 @@ Multi-variable Virtual Raster Tileset (VRT)
Multiple raster layers from different files are parsed using the **raster** driver.
Each raster becomes a variable in the resulting RasterDataset based on its filename.
The path to multiple files can be set using a sting glob or several keys,
-see description of the ``path`` argument in the :ref:`yaml file description `.
+see description of the `uri` argument in the :ref:`yaml file description `.
Note that the rasters should have identical grids.
-Here multiple .vrt files (dir.vrt, bas.vrt, etc.) are combined based on their variable name
-into a single dataset with variables flwdir, basins, etc.
-Other multiple file raster datasets (e.g. GeoTIFF files) can be read in the same way.
-VRT files are useful for large raster datasets which are often tiled and can be combined using
+Here multiple .vrt files (dir.vrt, bas.vrt, etc.) are combined based on their variable
+name into a single dataset with variables flwdir, basins, etc. Other multiple file
+raster datasets (e.g. GeoTIFF files) can be read in the same way. VRT files are useful
+for large raster datasets which are often tiled and can be combined using
`gdalbuildvrt. `_
-.. code-block:: yaml
-
- merit_hydro:
- path: base/merit_hydro/{variable}.vrt
- data_type: RasterDataset
- driver: raster
- crs: 4326
- driver_kwargs:
- chunks: {x: 6000, y: 6000}
- rename:
- dir: flwdir
- bas: basins
- upa: uparea
- elv: elevtn
- sto: strord
- meta:
- category: topography
- source_version: 1.0
- paper_doi: 10.1029/2019WR024873
- paper_ref: Dai Yamazaki
- source_url: http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_Hydro
- source_license: CC-BY-NC 4.0 or ODbL 1.0
+.. literalinclude:: ../../assets/data_types/vrt_raster_dataset.yml
+ :language: yaml
+
+.. testcode:: geotiff
+ :hide:
+
+ catalog_path = "docs/assets/data_types/vrt_raster_dataset.yml"
+
+ catalog = DataCatalog(fallback_lib=None) # do not read default catalog
+ catalog.from_yml(catalog_path)
.. _Tile:
Tiled raster dataset
^^^^^^^^^^^^^^^^^^^^
-Tiled index datasets are parsed using the **raster_tindex** driver.
+Tiled index datasets are parsed using the
+:py:Class:`raster_tindex `
+:py:class:`~hydromt.data_catalog.uri_resolvers.uri_resolver.URIResolver`.
This data format is used to combine raster tiles with different CRS projections.
A polygon vector file (e.g. GeoPackage) is used to make a tile index with the spatial
footprints of each tile. When reading a spatial slice of this data the files with
@@ -146,34 +136,26 @@ intersecting footprints will be merged together in the CRS of the most central t
Use `gdaltindex `_ to build an excepted tile index file.
Here a GeoPackage with the tile index referring to individual GeoTiff raster tiles is used.
-The ``mosaic_kwargs`` are passed to :py:meth:`~hydromt.io.open_raster_from_tindex` to
-set the resampling ``method``. The name of the column in the tile index attribute table ``tileindex``
-which contains the raster tile file names is set in the ``driver_kwargs`` (to be directly passed as an argument to
-:py:meth:`~hydromt.io.open_raster_from_tindex`).
-
-.. code-block:: yaml
-
- grwl_mask:
- path: static_data/base/grwl/tindex.gpkg
- data_type: RasterDataset
- driver: raster_tindex
- nodata: 0
- driver_kwargs:
- chunks: {x: 3000, y: 3000}
- mosaic_kwargs: {method: nearest}
- tileindex: location
- meta:
- category: hydrography
- paper_doi: 10.1126/science.aat0636
- paper_ref: Allen and Pavelsky (2018)
- source_license: CC BY 4.0
- source_url: https://doi.org/10.5281/zenodo.1297434
- source_version: 1.01
+The `mosaic_kwargs` are passed to :py:meth:`hydromt.gis.merge` to
+set the resampling `method`. The name of the column in the tile index attribute table
+`tileindex` which contains the raster tile file names is set in the `driver.options``
+
+.. literalinclude:: ../../assets/data_types/tiled_raster_dataset.yml
+ :language: yaml
+
+.. testcode:: geotiff
+ :hide:
+
+ catalog_path = "docs/assets/data_types/tiled_raster_dataset.yml"
+
+ catalog = DataCatalog(fallback_lib=None) # do not read default catalog
+ catalog.from_yml(catalog_path)
.. NOTE::
- Tiled raster datasets are not read lazily as different tiles have to be merged together based on
- their values. For fast access to large raster datasets, other formats might be more suitable.
+ Tiled raster datasets are not read lazily as different tiles have to be merged
+ together based on their values. For fast access to large raster datasets, other
+ formats might be more suitable.
.. _NC_raster:
@@ -199,54 +181,34 @@ See list of recognized dimensions_ names.
To read a raster dataset from a multiple file netcdf archive the following data entry
-is used, where the ``driver_kwargs`` are passed to :py:func:`xarray.open_mfdataset`
+is used, where the `options` are passed to :py:func:`xarray.open_mfdataset`
(or :py:func:`xarray.open_zarr` for zarr data).
-In case the CRS cannot be inferred from the netcdf data it should be defined with the ``crs`` option here.
+In case the CRS cannot be inferred from the netcdf metadata it should be defined with
+the `crs` `metadata`` here.
The path to multiple files can be set using a sting glob or several keys,
-see description of the ``path`` argument in the :ref:`yaml file description `.
+see description of the `uri` argument in the :ref:`yaml file description `.
In this example additional renaming and unit conversion preprocessing steps are added to
unify the data to match the HydroMT naming and unit :ref:`terminology `.
-.. code-block:: yaml
-
- era5_hourly:
- path: forcing/ERA5/org/era5_{variable}_{year}_hourly.nc
- data_type: RasterDataset
- driver: netcdf
- crs: 4326
- driver_kwargs:
- chunks: {latitude: 125, longitude: 120, time: 50}
- combine: by_coords
- decode_times: true
- parallel: true
- meta:
- category: meteo
- paper_doi: 10.1002/qj.3803
- paper_ref: Hersbach et al. (2019)
- source_license: https://cds.climate.copernicus.eu/cdsapp/#!/terms/licence-to-use-copernicus-products
- source_url: https://doi.org/10.24381/cds.bd0915c6
- rename:
- t2m: temp
- tp: precip
- unit_add:
- temp: -273.15
- unit_mult:
- precip: 1000
-
+.. literalinclude:: ../../assets/data_types/netcdf_raster_dataset.yml
+ :language: yaml
-Preprocess functions when combining multiple files
-""""""""""""""""""""""""""""""""""""""""""""""""""
+.. testcode:: geotiff
+ :hide:
-In :py:func:`xarray.open_mfdataset`, xarray allows for a *preprocess* function to be run before merging several
-netcdf files together. In hydroMT, some preprocess functions are available and can be passed through the ``driver_kwargs``
-options in the same way as any xr.open_mfdataset options. These preprocess functions are:
+ catalog_path = "docs/assets/data_types/netcdf_raster_dataset.yml"
-- **round_latlon**: round x and y dimensions to 5 decimals to avoid merging problems in xarray due to small differences
- in x, y values in the different netcdf files of the same data source.
-- **to_datetimeindex**: force parsing the time dimension to a datetime index.
-- **remove_duplicates**: remove time duplicates
+ catalog = DataCatalog(fallback_lib=None) # do not read default catalog
+ catalog.from_yml(catalog_path)
+Preprocess functions when combining multiple files
+""""""""""""""""""""""""""""""""""""""""""""""""""
+In :py:func:`xarray.open_mfdataset`, xarray allows for a **preprocess** function to be
+run before merging several netcdf files together. In hydroMT, some preprocess functions
+are available and can be passed through the options in the same way as any
+xr.open_mfdataset options. These preprocess functions are found at
+:py:obj:`hydromt.data_catalog.preprocessing.py`
.. _GeoDataFrame:
@@ -259,23 +221,18 @@ Vector data (GeoDataFrame)
.. _vector_formats:
.. list-table::
- :widths: 17, 25, 28, 30
+ :widths: 17, 25, 30
:header-rows: 1
* - Driver
- File formats
- - Method
- Comments
- * - ``vector``
+ * - :py:class:`pyogrio `
- ESRI Shapefile, GeoPackage, GeoJSON, etc.
- - :py:meth:`~hydromt.io.open_vector`
- - Point, Line and Polygon geometries. Uses :py:func:`geopandas.read_file`
- * - ``vector_table``
+ - Point, Line and Polygon geometries. Uses :py:func:`pyogrio.read_dataframe`
+ * - :py:class:`geodataframe_table `
- CSV, XY, PARQUET and EXCEL.
- - :py:meth:`~hydromt.io.open_vector`
- - Point geometries only. Uses :py:meth:`~hydromt.io.open_vector_from_table`
-
-
+ - Point geometries only.
.. _GPKG_vector:
@@ -288,21 +245,20 @@ spatial index for fast filtering of the data based on spatial location. An examp
shown below. Note that the rename, ``unit_mult``, ``unit_add`` and ``nodata`` options refer to
columns of the attribute table in case of a GeoDataFrame.
-.. code-block:: yaml
-
- GDP_world:
- path: base/emissions/GDP-countries/World_countries_GDPpcPPP.gpkg
- data_type: GeoDataFrame
- driver: vector
- driver_kwargs:
- layer: GDP
- rename:
- GDP: gdp
- unit_mult:
- gdp: 0.001
- meta:
- category: socio-economic
- source_version: 1.0
+.. literalinclude:: ../../assets/data_types/gpkg_geodataframe.yml
+ :language: yaml
+
+.. testsetup:: *
+
+ from hydromt import DataCatalog
+
+.. testcode:: geotiff
+ :hide:
+
+ catalog_path = "docs/assets/data_types/gpkg_geodataframe.yml"
+
+ catalog = DataCatalog(fallback_lib=None) # do not read default catalog
+ catalog.from_yml(catalog_path)
.. _textdelimited_vector:
@@ -336,28 +292,30 @@ of the GeoDataFrame attribute table.
...
As the CRS of the coordinates cannot be inferred from the data it must be set in the
-data entry in the yaml file as shown in the example below. The internal data format
-is based on the file extension unless the ``driver_kwargs`` ``driver`` option is set.
-See :py:meth:`~hydromt.io.open_vector` and :py:func:`~hydromt.io.open_vector_from_table` for more
-options.
+data entry in the yaml file as shown in the example below.
+
+.. literalinclude:: ../../assets/data_types/csv_geodataframe.yml
+ :language: yaml
+
+.. testsetup:: *
+
+ from hydromt import DataCatalog
-.. code-block:: yaml
+.. testcode:: geotiff
+ :hide:
- stations:
- path: /path/to/stations.csv
- data_type: GeoDataFrame
- driver: vector_table
- crs: 4326
- driver_kwargs:
- driver: csv
+ catalog_path = "docs/assets/data_types/csv_geodataframe.yml"
+
+ catalog = DataCatalog(fallback_lib=None) # do not read default catalog
+ catalog.from_yml(catalog_path)
.. _binary_vector:
-HydroMT also supports reading and writing vector data in binary format. Currently only parquet is
-supported, but others could be added if desired. The structure of the files should be the same as
-the text format files described above but writing according to the parquet file spec. Since this is
-a binary format, not examples are provided, but for example pandas can write the same data structure
-to parquet as it can csv.
+HydroMT also supports reading and writing vector data in binary format. Currently only
+parquet is supported, but others could be added if desired. The structure of the files
+should be the same as the text format files described above but writing according to the
+parquet file spec. Since this is a binary format, not examples are provided, but for
+example pandas can write the same data structure to parquet as it can csv.
.. _GeoDataset:
@@ -371,20 +329,18 @@ Geospatial point time-series (GeoDataset)
.. _geo_formats:
.. list-table::
- :widths: 17, 25, 28, 30
+ :widths: 17, 25, 30
:header-rows: 1
* - Driver
- File formats
- - Method
- Comments
- * - ``vector``
- - Combined point location (e.g. CSV or GeoJSON) and text delimited time-series (e.g. CSV) data.
- - :py:meth:`~hydromt.io.open_geodataset`
- - Uses :py:meth:`~hydromt.io.open_vector`, :py:meth:`~hydromt.io.open_timeseries_from_table`
- * - ``netcdf`` or ``zarr``
+ * - :py:class:`geodataset_vector `
+ - Combined point location (e.g. CSV or GeoJSON) and text delimited time-series
+ (e.g. CSV) data.
+ -
+ * - :py:class:`geodataset_xarray `
- NetCDF and Zarr
- - :py:func:`xarray.open_mfdataset`, :py:func:`xarray.open_zarr`
- required time and index dimensions_ and x- and y coordinates.
@@ -411,67 +367,63 @@ on a list of recognized dimensions_ names.
waterlevel (time, stations)
To read a point time-series dataset from a multiple file netcdf archive the following data entry
-is used, where the ``driver_kwargs`` are passed to :py:func:`xarray.open_mfdataset`
+is used, where the options are passed to :py:func:`xarray.open_mfdataset`
(or :py:func:`xarray.open_zarr` for zarr data).
In case the CRS cannot be inferred from the netcdf data it is defined here.
The path to multiple files can be set using a sting glob or several keys,
-see description of the ``path`` argument in the :ref:`yaml file description `.
+see description of the `uri` argument in the :ref:`yaml file description `.
In this example additional renaming and unit conversion preprocessing steps are added to
unify the data to match the HydroMT naming and unit :ref:`terminology `.
-.. code-block:: yaml
-
- gtsmv3_eu_era5:
- path: reanalysis-waterlevel-{year}-m{month:02d}.nc
- data_type: GeoDataset
- driver: netcdf
- crs: 4326
- driver_kwargs:
- chunks: {stations: 100, time: 1500}
- combine: by_coords
- decode_times: true
- parallel: true
- rename:
- station_x_coordinate: lon
- station_y_coordinate: lat
- stations: index
- meta:
- category: ocean
- paper_doi: 10.24381/cds.8c59054f
- paper_ref: Copernicus Climate Change Service 2019
- source_license: https://cds.climate.copernicus.eu/cdsapp/#!/terms/licence-to-use-copernicus-products
- source_url: https://cds.climate.copernicus.eu/cdsapp#!/dataset/10.24381/cds.8c59054f?tab=overview
+.. literalinclude:: ../../assets/data_types/netcdf_geodataset.yml
+ :language: yaml
+
+.. testsetup:: *
+
+ from hydromt import DataCatalog
+
+.. testcode:: geotiff
+ :hide:
+
+ catalog_path = "docs/assets/data_types/netcdf_geodataset.yml"
+
+ catalog = DataCatalog(fallback_lib=None) # do not read default catalog
+ catalog.from_yml(catalog_path)
.. _CSV_point:
CSV point time-series data
^^^^^^^^^^^^^^^^^^^^^^^^^^
-Point time-series data where the geospatial point geometries and time-series are saved in
-separate (text) files are parsed to **GeoDataset** using the **vector** driver.
-The GeoDataset must at least contain a location index with point geometries which is referred to by the ``path`` argument
-The path may refer to both GIS vector data such as GeoJSON with only Point geometries
-or tabulated point vector data such as csv files, see earlier examples for GeoDataFrame datasets.
-Finally, certain binary formats such as parquet are also supported.
-In addition a tabulated time-series text file can be passed to be used as a variable of the GeoDataset.
-This data is added by a second file which is referred to using the ``data_path`` key-word argument.
-The index of the time-series (in the columns header) and point locations must match.
-For more options see the :py:meth:`~hydromt.io.open_geodataset` method.
-
-.. code-block:: yaml
-
- waterlevels_txt:
- path: /path/to/stations.csv
- data_type: GeoDataset
- driver: vector
- crs: 4326
- driver_kwargs:
- data_path: /path/to/stations_data.csv
+Point time-series data where the geospatial point geometries and time-series are saved
+in separate (text) files are parsed to **GeoDataset** using the **vector** driver. The
+GeoDataset must at least contain a location index with point geometries which is
+referred to by the `uri` argument The path may refer to both GIS vector data such as
+GeoJSON with only Point geometries or tabulated point vector data such as csv files, see
+earlier examples for GeoDataFrame datasets. Finally, certain binary formats such as
+parquet are also supported. In addition a tabulated time-series text file can be passed
+to be used as a variable of the GeoDataset. This data is added by a second file which is
+referred to using the `data_path` option. The index of the time-series (in the columns
+header) and point locations must match.
+
+.. literalinclude:: ../../assets/data_types/csv_geodataset.yml
+ :language: yaml
+
+.. testsetup:: *
+
+ from hydromt import DataCatalog
+
+.. testcode:: geotiff
+ :hide:
+
+ catalog_path = "docs/assets/data_types/csv_geodataset.yml"
+
+ catalog = DataCatalog(fallback_lib=None) # do not read default catalog
+ catalog.from_yml(catalog_path)
*Tabulated time series text file*
-This data is read using the :py:meth:`~hydromt.io.open_timeseries_from_table` method. To
-read the time stamps the :py:func:`pandas.to_datetime` method is used.
+To read the time stamps the :py:func:`pandas.to_datetime` method is used.
.. code-block:: console
@@ -485,38 +437,44 @@ read the time stamps the :py:func:`pandas.to_datetime` method is used.
NetCDF time-series dataset (Dataset)
------------------------------------
+
.. _dataset_formats:
.. list-table::
- :widths: 17, 25, 28, 30
+ :widths: 17, 25, 30
:header-rows: 1
* - Driver
- File formats
- - Method
- Comments
- * - ``netcdf`` or ``zarr``
+ * - :py:Class:`dataset_xarray `
- NetCDF and Zarr
- - :py:func:`xarray.open_mfdataset`, :py:func:`xarray.open_zarr`
- required time and index dimensions_.
.. _NC_timeseries:
-
Netcdf time-series dataset
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+NetCDF and zarr timeseries data are parsed to **Dataset** with the
+:py:class:`~dataset.xarray_driver.DatasetXarrayDriver`.
+The resulting dataset is similar to the **GeoDataset** except that it lacks a spatial
+dimension.
-NetCDF and zarr timeseries data are parsed to **Dataset** with the **netcdf** and **zarr** drivers.
-The resulting dataset is similar to the **GeoDataset** except that it lacks a spatial dimension.
+.. literalinclude:: ../../assets/data_types/netcdf_dataset.yml
+ :language: yaml
-.. code-block:: yaml
+.. testsetup:: *
- timeseries_dataset:
- path: /path/to/timeseries.netcdf
- data_type: Dataset
- driver: netcdf
+ from hydromt import DataCatalog
+.. testcode:: geotiff
+ :hide:
+ catalog_path = "docs/assets/data_types/netcdf_dataset.yml"
+
+ catalog = DataCatalog(fallback_lib=None) # do not read default catalog
+ catalog.from_yml(catalog_path)
.. _DataFrame:
@@ -526,30 +484,15 @@ The resulting dataset is similar to the **GeoDataset** except that it lacks a sp
.. _dataframe_formats:
.. list-table::
- :widths: 17, 25, 28, 30
+ :widths: 17, 25, 30
:header-rows: 1
* - Driver
- File formats
- - Method
- Comments
- * - ``csv``
- - Comma-separated files (or using another delimiter)
- - :py:func:`pandas.read_csv`
- - See :py:func:`pandas.read_csv` for all
- * - ``excel``
- - Excel files
- - :py:func:`pandas.read_excel`
- - If required, provide a sheet name through driver_kwargs
- * - ``parquet``
- - Binary encoded columnar data format
- - :py:func:`pandas.read_parquet`
- -
- * - ``fwf``
- - Fixed width delimited text files
- - :py:func:`pandas.read_fwf`
- - The formatting of these files can either be inferred or defined by the user, both through the driver_kwargs.
-
+ * - :py:class:`csv `
+ - any file readable by pandas
+ - Provide a sheet name or formatting through options
.. note::
@@ -559,24 +502,28 @@ The resulting dataset is similar to the **GeoDataset** except that it lacks a sp
Supported files
^^^^^^^^^^^^^^^
-The DataFrameAdapter is quite flexible in supporting different types of tabular data formats. All drivers allow for flexible reading of
-files: for example both mapping tables and time series data are supported. Please note that for timeseries, the driver_kwargs need to be used to
-set the correct column for indexing, and formatting and parsing of datetime-strings. See the relevant pandas function for which arguments
-can be used. Also note that the **csv** driver is not restricted to comma-separated files, as the delimiter can be given to the reader
-through the driver_kwargs.
-
-.. code-block:: yaml
-
- observations:
- path: data/lulc/globcover_mapping.csv
- data_type: DataFrame
- driver: csv
- meta:
- category: parameter_mapping
- driver_kwargs:
- header: null # null translates to None in Python -> no header
- index_col: 0
- parse_dates: false
+The DataFrameAdapter is quite flexible in supporting different types of tabular data
+formats. The driver allows for flexible reading of files: for example both mapping
+tables and time series data are supported. Please note that for timeseries, the
+`options` need to be used to set the correct column for indexing, and formatting and
+parsing of datetime-strings. See the relevant pandas function for which arguments can be
+used. Also note that the driver is not restricted to comma-separated files, as
+the delimiter can be given to the reader through the `options`.
+
+.. literalinclude:: ../../assets/data_types/csv_dataframe.yml
+ :language: yaml
+
+.. testsetup:: *
+
+ from hydromt import DataCatalog
+
+.. testcode:: geotiff
+ :hide:
+
+ catalog_path = "docs/assets/data_types/csv_dataframe.yml"
+
+ catalog = DataCatalog(fallback_lib=None) # do not read default catalog
+ catalog.from_yml(catalog_path)
.. note::
The yml-parser does not correctly parses `None` arguments. When this is required, the `null` argument should be used instead.
diff --git a/docs/guides/advanced_user/index.rst b/docs/guides/advanced_user/index.rst
index b78790f37..28dedd088 100644
--- a/docs/guides/advanced_user/index.rst
+++ b/docs/guides/advanced_user/index.rst
@@ -5,7 +5,6 @@ Advanced user guide
architecture
data_prepare_cat
- data_sources
data_types
hydromt_python
methods_stats
diff --git a/docs/guides/core_dev/documentation.rst b/docs/guides/core_dev/documentation.rst
new file mode 100644
index 000000000..bcf9aa327
--- /dev/null
+++ b/docs/guides/core_dev/documentation.rst
@@ -0,0 +1,13 @@
+.. _contribute_documentation:
+
+Adding Documentation
+====================
+
+There are a few guidelines when adding new documentation, or when refactoring the
+current documentation.
+
+- We use the `numpy docstring format `.
+- Code examples or example ``yaml`` files should be tested using the sphinx extension
+ ``doctest``.
+- New APIs should be added to the ``docs/api`` folder. The builtin ``autosummary``
+ and ``toctree`` are used to keep track.
diff --git a/docs/guides/core_dev/index.rst b/docs/guides/core_dev/index.rst
index 86b698fda..60a7918d6 100644
--- a/docs/guides/core_dev/index.rst
+++ b/docs/guides/core_dev/index.rst
@@ -4,4 +4,5 @@ Core developer guide
.. toctree::
contributing
+ documentation
dev_install
diff --git a/hydromt/_io/readers.py b/hydromt/_io/readers.py
index 3b84d0c57..aa964d342 100644
--- a/hydromt/_io/readers.py
+++ b/hydromt/_io/readers.py
@@ -29,8 +29,8 @@
from hydromt._utils.path import _make_config_paths_abs
from hydromt._utils.uris import _is_valid_url
from hydromt.gis import _gis_utils, _vector_utils, raster, vector
-from hydromt.gis._raster_merge import _merge
from hydromt.gis.raster import GEO_MAP_COORD
+from hydromt.gis.raster_merge import merge
if TYPE_CHECKING:
from hydromt._validators.model_config import HydromtModelStep
@@ -360,7 +360,7 @@ def _open_mfraster(
da = da.sortby(concat_dim).transpose(concat_dim, ...)
da.attrs.update(da_lst[0].attrs)
else:
- da = _merge(da_lst, **mosaic_kwargs) # spatial merge
+ da = merge(da_lst, **mosaic_kwargs) # spatial merge
da.attrs.update({"source_file": "; ".join(file_attrs)})
ds = da.to_dataset() # dataset for consistency
else:
diff --git a/hydromt/data_catalog/data_catalog.py b/hydromt/data_catalog/data_catalog.py
index ae64614cf..2e72215b4 100644
--- a/hydromt/data_catalog/data_catalog.py
+++ b/hydromt/data_catalog/data_catalog.py
@@ -665,7 +665,8 @@ def from_yml(
A yaml data entry is provided below, where all the text between <>
should be filled by the user. Multiple data sources of the same
data type should be grouped. Currently the following data types are supported:
- {'RasterDataset', 'GeoDataset', 'GeoDataFrame'}. See the specific data adapters
+ {'RasterDataset', 'GeoDataset', 'GeoDataFrame', 'DataFrame', 'Dataset'}. See the
+ specific data adapters
for more information about the required and optional arguments.
.. code-block:: yaml
@@ -677,22 +678,12 @@ def from_yml(
name:
sha256: # only if the root is an archive
:
- path:
+ uri:
data_type:
driver:
- filesystem:
- driver_kwargs:
- :
- nodata:
- :
- rename:
- :
- :
- unit_add:
- :
- unit_mult:
- :
- meta:
+ data_adapter:
+ uri_resolver:
+ metadata:
source_url:
source_version:
source_licence:
@@ -801,13 +792,9 @@ def from_dict(
"path": ,
"data_type": ,
"driver": ,
- "filesystem": ,
- "driver_kwargs": {: },
- "nodata": ,
- "rename": {: },
- "unit_add": {: },
- "unit_mult": {: },
- "meta": {...},
+ "data_adapter": ,
+ "uri_resolver": ,
+ "metadata": {...},
"placeholders": {: },
}
: {
diff --git a/hydromt/data_catalog/drivers/raster/rasterio_driver.py b/hydromt/data_catalog/drivers/raster/rasterio_driver.py
index 5e7ed6592..b5e596c82 100644
--- a/hydromt/data_catalog/drivers/raster/rasterio_driver.py
+++ b/hydromt/data_catalog/drivers/raster/rasterio_driver.py
@@ -56,6 +56,7 @@ def read(
{"time_range": time_range},
)
kwargs: Dict[str, Any] = {}
+ mosaic_kwargs: Dict[str, Any] = self.options.get("mosaic_kwargs", {})
# get source-specific options
cache_root: str = str(
@@ -78,7 +79,11 @@ def read(
uris = uris_cached
if mask is not None:
- kwargs.update({"mosaic_kwargs": {"mask": mask}})
+ mosaic_kwargs.update({"mask": mask})
+
+ # get mosaic kwargs
+ if mosaic_kwargs:
+ kwargs.update({"mosaic_kwargs": mosaic_kwargs})
if np.issubdtype(type(metadata.nodata), np.number):
kwargs.update(nodata=metadata.nodata)
diff --git a/hydromt/data_catalog/sources/factory.py b/hydromt/data_catalog/sources/factory.py
index dd6868212..84bd5d2d4 100644
--- a/hydromt/data_catalog/sources/factory.py
+++ b/hydromt/data_catalog/sources/factory.py
@@ -6,6 +6,7 @@
from hydromt._typing.type_def import DataType
from hydromt.data_catalog.sources import (
DataFrameSource,
+ DatasetSource,
DataSource,
GeoDataFrameSource,
GeoDatasetSource,
@@ -15,6 +16,7 @@
# Map DataType to DataSource, need to add here when implementing a new Type
available_sources: Dict[DataType, DataSource] = {
"DataFrame": DataFrameSource,
+ "Dataset": DatasetSource,
"RasterDataset": RasterDatasetSource,
"GeoDataFrame": GeoDataFrameSource,
"GeoDataset": GeoDatasetSource,
diff --git a/hydromt/data_catalog/uri_resolvers/convention_resolver.py b/hydromt/data_catalog/uri_resolvers/convention_resolver.py
index 4171eeb60..b0d2570cb 100644
--- a/hydromt/data_catalog/uri_resolvers/convention_resolver.py
+++ b/hydromt/data_catalog/uri_resolvers/convention_resolver.py
@@ -87,7 +87,6 @@ def resolve(
variables: Optional[List[str]] = None,
metadata: Optional[SourceMetadata] = None,
handle_nodata: NoDataStrategy = NoDataStrategy.RAISE,
- options: Optional[Dict[str, Any]] = None,
) -> List[str]:
"""Resolve the placeholders in the URI using naming conventions.
@@ -107,8 +106,6 @@ def resolve(
DataSource metadata.
handle_nodata : NoDataStrategy, optional
how to react when no data is found, by default NoDataStrategy.RAISE
- options : Optional[Dict[str, Any]], optional
- extra options for this resolver, by default None
Returns
-------
diff --git a/hydromt/data_catalog/uri_resolvers/raster_tindex_resolver.py b/hydromt/data_catalog/uri_resolvers/raster_tindex_resolver.py
index 3e6856e65..fe0cae660 100644
--- a/hydromt/data_catalog/uri_resolvers/raster_tindex_resolver.py
+++ b/hydromt/data_catalog/uri_resolvers/raster_tindex_resolver.py
@@ -3,7 +3,7 @@
from logging import Logger, getLogger
from os.path import abspath, dirname, join
from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import List, Optional, Union
import geopandas as gpd
@@ -34,7 +34,6 @@ def resolve(
variables: Union[int, tuple[float, str], None] = None,
metadata: Optional[SourceMetadata],
handle_nodata: NoDataStrategy = NoDataStrategy.RAISE,
- options: Optional[Dict[str, Any]] = None,
) -> List[str]:
"""Resolve URIs of a raster tindex file.
@@ -54,8 +53,6 @@ def resolve(
DataSource metadata.
handle_nodata : NoDataStrategy, optional
how to react when no data is found, by default NoDataStrategy.RAISE
- options : Optional[Dict[str, Any]], optional
- extra options for this resolver, by default None
Returns
-------
@@ -71,7 +68,7 @@ def resolve(
raise ValueError(f"Resolver {self.name} needs a mask")
gdf = gpd.read_file(uri)
gdf = gdf.iloc[gdf.sindex.query(mask.to_crs(gdf.crs).union_all())]
- tileindex: Optional[str] = options.get("tileindex")
+ tileindex: Optional[str] = self.options.get("tileindex")
if tileindex is None:
raise ValueError(
f"{self.__class__.__name__} needs options specifying 'tileindex'"
diff --git a/hydromt/data_catalog/uri_resolvers/uri_resolver.py b/hydromt/data_catalog/uri_resolvers/uri_resolver.py
index f064ef12a..f9eafc9f4 100644
--- a/hydromt/data_catalog/uri_resolvers/uri_resolver.py
+++ b/hydromt/data_catalog/uri_resolvers/uri_resolver.py
@@ -19,6 +19,7 @@ class URIResolver(AbstractBaseModel, ABC):
model_config = ConfigDict(extra="forbid")
filesystem: FS = Field(default_factory=LocalFileSystem)
+ options: Dict[str, Any] = Field(default_factory=dict)
@abstractmethod
def resolve(
@@ -31,7 +32,6 @@ def resolve(
zoom_level: Optional[Zoom] = None,
metadata: Optional[SourceMetadata] = None,
handle_nodata: NoDataStrategy = NoDataStrategy.RAISE,
- options: Optional[Dict[str, Any]] = None,
) -> List[str]:
"""Resolve a single uri to multiple uris.
@@ -51,8 +51,6 @@ def resolve(
Metadata of DataSource.
handle_nodata : NoDataStrategy, optional
how to react when no data is found, by default NoDataStrategy.RAISE
- options : Optional[Dict[str, Any]], optional
- extra options for this resolver, by default None
Returns
-------
diff --git a/hydromt/gis/_raster_merge.py b/hydromt/gis/raster_merge.py
similarity index 99%
rename from hydromt/gis/_raster_merge.py
rename to hydromt/gis/raster_merge.py
index a479e10f6..26782efd7 100644
--- a/hydromt/gis/_raster_merge.py
+++ b/hydromt/gis/raster_merge.py
@@ -6,10 +6,10 @@
from hydromt.gis.raster import full_from_transform
-__all__ = ["_merge"]
+__all__ = ["merge"]
-def _merge(
+def merge(
data_arrays,
dst_crs=None,
dst_bounds=None,
diff --git a/tests/data_catalog/drivers/raster/test_rasterio_driver.py b/tests/data_catalog/drivers/raster/test_rasterio_driver.py
index 2cff2539c..b656dbd10 100644
--- a/tests/data_catalog/drivers/raster/test_rasterio_driver.py
+++ b/tests/data_catalog/drivers/raster/test_rasterio_driver.py
@@ -2,6 +2,7 @@
from os.path import join
from pathlib import Path
from typing import Tuple
+from unittest.mock import MagicMock, patch
import numpy as np
import pytest
@@ -63,6 +64,13 @@ def test_sets_nodata(self, rioda: xr.DataArray, tmp_path: Path):
)
assert ds["test_sets_nodata"].raster.nodata == 42
+ @patch("hydromt.data_catalog.drivers.raster.rasterio_driver._open_mfraster")
+ def test_sets_mosaic_kwargs(self, fake_open_mfraster: MagicMock):
+ uris = ["test", "test2"]
+ mosaic_kwargs = {"mykwarg: 0"}
+ RasterioDriver(options={"mosaic_kwargs": mosaic_kwargs}).read(uris=uris)
+ fake_open_mfraster.assert_called_once_with(uris, mosaic_kwargs=mosaic_kwargs)
+
class TestOpenMFRaster:
@pytest.fixture()
diff --git a/tests/data_catalog/uri_resolvers/test_raster_tindex_resolver.py b/tests/data_catalog/uri_resolvers/test_raster_tindex_resolver.py
index 4a34b39f0..92178117e 100644
--- a/tests/data_catalog/uri_resolvers/test_raster_tindex_resolver.py
+++ b/tests/data_catalog/uri_resolvers/test_raster_tindex_resolver.py
@@ -70,12 +70,13 @@ def test_resolves_correctly(self, raster_tindex):
geom = gpd.GeoDataFrame(geometry=[box(-78, 0.0005, -65, 4)], crs=4326)
metadata = SourceMetadata()
options = {"tileindex": "location"}
- resolver = RasterTindexResolver(filesystem=AbstractFileSystem())
+ resolver = RasterTindexResolver(
+ filesystem=AbstractFileSystem(), options=options
+ )
paths = resolver.resolve(
uri=raster_tindex,
metadata=metadata,
mask=geom,
- options=options,
)
assert len(paths) == 2
assert (
@@ -92,7 +93,6 @@ def test_resolves_correctly(self, raster_tindex):
uri=raster_tindex,
metadata=metadata,
mask=geom,
- options=options,
)
assert len(paths) == 1
path = str(Path(join(dirname(raster_tindex), "GRWL_mask_V01.01/NA19.tif")))
@@ -110,13 +110,14 @@ def test_raises_no_tileindex(self, raster_tindex):
uri=raster_tindex,
metadata=metadata,
mask=geom,
- options={},
)
def test_raises_missing_tileindex(self, raster_tindex):
- resolver = RasterTindexResolver(filesystem=AbstractFileSystem())
- metadata = SourceMetadata()
options = {"tileindex": "file"}
+ resolver = RasterTindexResolver(
+ filesystem=AbstractFileSystem(), options=options
+ )
+ metadata = SourceMetadata()
geom = gpd.GeoDataFrame(geometry=[box(-78, 0.0005, -65, 4)], crs=4326)
with pytest.raises(
IOError,
@@ -126,18 +127,18 @@ def test_raises_missing_tileindex(self, raster_tindex):
uri=raster_tindex,
metadata=metadata,
mask=geom,
- options=options,
)
def test_raises_no_intersecting_files(self, raster_tindex):
- resolver = RasterTindexResolver(filesystem=AbstractFileSystem())
- metadata = SourceMetadata()
options = {"tileindex": "file"}
+ resolver = RasterTindexResolver(
+ filesystem=AbstractFileSystem(), options=options
+ )
+ metadata = SourceMetadata()
geom = gpd.GeoDataFrame(geometry=[box(4, 52, 5, 53)], crs=4326)
with pytest.raises(NoDataException, match="found no intersecting tiles."):
resolver.resolve(
uri=raster_tindex,
metadata=metadata,
mask=geom,
- options=options,
)