diff --git a/.github/workflows/pytest-ci.yml b/.github/workflows/pytest-ci.yml new file mode 100644 index 00000000..07612002 --- /dev/null +++ b/.github/workflows/pytest-ci.yml @@ -0,0 +1,46 @@ +# Workflow to run the pytest test suite. + +name: pytest CI + +on: + pull_request: + branches: + - master + push: + branches: + - master + +jobs: + test: + runs-on: ubuntu-latest + + steps: + # Checkout the repository under $GITHUB_WORKSPACE + - uses: actions/checkout@v2 + + # initialize conda + - name: Conda setup + uses: s-weigand/setup-conda@v1 + with: + update-conda: true + python-version: 3.7 + + # cache the conda installation to speedup the CI runs + - uses: actions/cache@v2 + id: cache + with: + path: /usr/share/miniconda/envs/neuralhydrology + key: ${{ runner.os }}-conda-cache-${{ hashFiles('environments/environment_cuda10_2.yml') }} + + # on cache miss, create the env from scratch + - name: Conda environment creation + if: steps.cache.outputs.cache-hit != 'true' + run: | + conda env create -f environments/environment_cuda10_2.yml + source activate neuralhydrology + + # Run the tests + - name: Testing with pytest + run: | + source activate neuralhydrology + pytest --cov=neuralhydrology diff --git a/.gitignore b/.gitignore index c1832336..3a8ddfb2 100644 --- a/.gitignore +++ b/.gitignore @@ -12,8 +12,8 @@ dist/* neuralhydrology.egg-info/* .vscode/* .idea/* -runs/* +runs/ configs/* -.ipynb_checkpoints/* +.ipynb_checkpoints/ data/* docs/build/* diff --git a/README.md b/README.md index 3b0cbf55..141196ce 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,8 @@ this code in our day-to-day research and will continue to integrate our new rese **Note:** We will gradually add more examples/documentation over the next couple of days/weeks. -- Documentation: [neuralhydrology.readthedocs.io](neuralhydrology.readthedocs.io) -- Research Blog: [neuralhydrology.github.io](neuralhydrology.github.io) +- Documentation: [neuralhydrology.readthedocs.io](https://neuralhydrology.readthedocs.io) +- Research Blog: [neuralhydrology.github.io](https://neuralhydrology.github.io) - Bug reports/Feature requests [https://github.com/neuralhydrology/neuralhydrology/issues](https://github.com/neuralhydrology/neuralhydrology/issues) # Getting started @@ -115,4 +115,4 @@ use the ``nh-results-ensemble`` script:: # Contact If you have any questions regarding the usage of this repository, feature requests or comments, please open an issue. -You can also reach out to Frederik Kratzert (kratzert(at)ml.jku.at) by email. \ No newline at end of file +You can also reach out to Frederik Kratzert (kratzert(at)ml.jku.at) by email. diff --git a/docs/source/api/neuralhydrology.rst b/docs/source/api/neuralhydrology.rst index 21746964..eddeb281 100644 --- a/docs/source/api/neuralhydrology.rst +++ b/docs/source/api/neuralhydrology.rst @@ -1,5 +1,5 @@ -neuralhydrology -=============== +neuralhydrology API +=================== .. automodule:: neuralhydrology :members: diff --git a/docs/source/conf.py b/docs/source/conf.py index 75568bd9..044745aa 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -10,18 +10,22 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # +import datetime import os import sys sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('../../')) # -- Project information ----------------------------------------------------- +about = {} +with open('../../neuralhydrology/__about__.py', "r") as fp: + exec(fp.read(), about) -project = 'NeuralHydrology' -copyright = '2020, Frederik Kratzert' +project = 'neuralHydrology' +copyright = f'{datetime.datetime.now().year}, Frederik Kratzert' author = 'Frederik Kratzert' # The full version, including alpha/beta/rc tags -release = '0.9.0-beta' +release = about["__version__"] # -- General configuration --------------------------------------------------- @@ -31,7 +35,9 @@ extensions = [ 'sphinx.ext.autodoc', # autodocument 'sphinx.ext.napoleon', # google and numpy doc string support - 'sphinx.ext.mathjax' # latex rendering of equations using MathJax + 'sphinx.ext.mathjax', # latex rendering of equations using MathJax + 'nbsphinx', # for direct embedding of jupyter notebooks into sphinx docs + 'nbsphinx_link' # to be able to include notebooks from outside of the docs folder ] # Add any paths that contain templates here, relative to this directory. @@ -40,7 +46,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] +exclude_patterns = ['**.ipynb_checkpoints'] # -- Options for HTML output ------------------------------------------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index c57b6085..0e023dc4 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,13 +3,17 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to NeuralHydrology's documentation! +Welcome to neuralHydrology's documentation! =========================================== +The documentation is still work-in-progress. Stay tuned for a lot of updates during the next days/weeks, as well as +a handful of tutorials. + .. toctree:: - :maxdepth: 5 + :maxdepth: 2 :caption: Contents: usage/quickstart usage/models + tutorials/index api/neuralhydrology diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst new file mode 100644 index 00000000..5f6e8275 --- /dev/null +++ b/docs/source/tutorials/index.rst @@ -0,0 +1,11 @@ +Tutorials +--------- + +We will gradually add more tutorials over the next couple of weeks, highlighting some of +the functionality of this Python package. + +.. toctree:: + :maxdepth: 1 + :caption: Contents: + + introduction diff --git a/docs/source/tutorials/introduction.nblink b/docs/source/tutorials/introduction.nblink new file mode 100644 index 00000000..bd966504 --- /dev/null +++ b/docs/source/tutorials/introduction.nblink @@ -0,0 +1,3 @@ +{ + "path": "../../../examples/01-Introduction/Introduction.ipynb" +} \ No newline at end of file diff --git a/docs/source/usage/models.rst b/docs/source/usage/models.rst index 567b77bb..e1095ad4 100644 --- a/docs/source/usage/models.rst +++ b/docs/source/usage/models.rst @@ -1,28 +1,29 @@ -Models -====== +Modelzoo +======== -The following section gives an overview over all implemented models. +The following section gives an overview of all implemented models. See `Implementing a new model`_ for details +on how to add your own model to the neuralHydrology package. + +BaseModel +--------- +Abstract base class from which all models derive. Do not use this class for model training. CudaLSTM -------- :py:class:`neuralhydrology.modelzoo.cudalstm.CudaLSTM` is a network using the standard PyTorch LSTM implementation. -All features (``x_d``, ``x_s``, ``x_one_hot``) are concatenated and passed at each time step. -Initial forget gate bias can be set (in config.yml) and will be set during model initialization. +All features (``x_d``, ``x_s``, ``x_one_hot``) are concatenated and passed to the network at each time step. +The initial forget gate bias can be defined in config.yml (``initial_forget_bias``) and will be set accordingly during +model initialization. EA-LSTM ------- -:py:class:`neuralhydrology.modelzoo.ealstm.EALSTM` is an implementation of the Entity-Aware LSTM, as used -in the 2019 HESS paper. The static features (``x_s`` and/or ``x_one_hot``) are used to compute the input gate -activations, while ``x_d`` is used in all other gates of the network. -Initial forget gate bias can be set, and if ``embedding_hiddens`` is passed, the input gate consists of the so-defined +:py:class:`neuralhydrology.modelzoo.ealstm.EALSTM` is an implementation of the Entity-Aware LSTM, as introduced in +`Kratzert et al. "Towards learning universal, regional, and local hydrological behaviors via machine learning applied to large-sample datasets" `__. +The static features (``x_s`` and/or ``x_one_hot``) are used to compute the input gate activations, while the dynamic +inputs ``x_d`` are used in all other gates of the network. +The initial forget gate bias can be defined in config.yml (``initial_forget_bias``). If ``embedding_hiddens`` is passed, the input gate consists of the so-defined FC network and not a single linear layer. -LSTM ----- -:py:class:`neuralhydrology.modelzoo.lstm.LSTM` is an own LSTM implementation. -Momentarily, the only advantage compared to CudaLSTM is the return of the entire cell state array. -This class will be most likely adapted/changed in the near future to provide much more flexibility for various settings. - EmbCudaLSTM ----------- :py:class:`neuralhydrology.modelzoo.embcudalstm.EmbCudaLSTM` is similar to `CudaLSTM`_, @@ -31,6 +32,26 @@ with the only difference that static inputs (``x_s`` and/or ``x_one_hot``) are p at each time step. +LSTM +---- +:py:class:`neuralhydrology.modelzoo.lstm.LSTM` is a PyTorch port of the CudaLSTM that returns all gate and state +activations for all time steps. This class is implemented for exploratory reasons. You can use the method +``model.copy_weights()`` to copy the weights of a ``CudaLSTM`` model into an ``LSTM`` model. This allows to use the fast +CUDA implementation for training, and only use this class for inference with more detailed outputs. + +MultiFreqLSTM +------------- +:py:class:`neuralhydrology.modelzoo.multifreqlstm.MultiFreqLSTM` is a newly proposed model by Gauch et al. (pre-print +published soon). This model allows the training on more than one temporal frequency (e.g. daily and hourly inputs) and +returns multi-frequency model predictions accordingly. A more detailed tutorial will follow shortly. + +ODELSTM +------- +:py:class:`neuralhydrology.modelzoo.odelstm.ODELSTM` is a PyTorch implementation of the ODE-LSTM proposed by +`Lechner and Hasani `_. This model can be used with unevenly sampled inputs and can +be queried to return predictions for any arbitrary time step. + + Implementing a new model ------------------------ The listing below shows the skeleton of a template model you can use to start implementing your own model. @@ -38,7 +59,7 @@ Once you have implemented your model, make sure to modify :py:func:`neuralhydrol Furthermore, make sure to select a *unique* model abbreviation that will be used to specify the model in the config.yml files. -:: +.. code-block:: python from typing import Dict @@ -105,4 +126,3 @@ files. # Implement forward pass here # ############################### pass - diff --git a/docs/source/usage/quickstart.rst b/docs/source/usage/quickstart.rst index 25c396c5..18a018a8 100644 --- a/docs/source/usage/quickstart.rst +++ b/docs/source/usage/quickstart.rst @@ -3,18 +3,17 @@ Quick Start Installation ------------ -The neuralhydrology project is available on PyPI. -Hence, installation is as easy as:: +For now, download or clone the repository to your local machine and install a local, editable copy. +This is a good idea if you want to edit the ``neuralhydrology`` code (e.g., adding new models or datasets) - pip install neuralhydrology +.. code-block:: -Alternatively, you can clone the repository and install the local, editable copy. This is a good idea if you want to -edit the ``neuralhydrology`` code (e.g., adding new models or datasets).:: - - git clone https://github.com/kratzert/lstm_based_hydrology.git - cd lstm_based_hydrology + git clone https://github.com/neuralhydrology/neuralhydrology.git + cd neuralhydrology pip install -e . +Besides adding the package to your Python environment, it will also add three bash scripts: +`nh-run`, `nh-run-scheduler` and `nh-results-ensemble`. For details, see below. Data ---- @@ -35,7 +34,7 @@ To train a model, prepare a configuration file, then run:: If you want to train multiple models, you can make use of the ``nh-run-scheduler`` command. Place all configs in a folder, then run:: - nh-run-scheduler --config-dir /path/to/config_dir/ --runs-per-gpu X --gpu-ids Y + nh-run-scheduler train --config-dir /path/to/config_dir/ --runs-per-gpu X --gpu-ids Y With X, you can specify how many models should be trained on parallel on a single GPU. With Y, you can specify which GPUs to use for training (use the id as specified in ``nvidia-smi``). @@ -52,7 +51,7 @@ the weights of the last epoch are used. To evaluate all runs in a specific directory you can, similarly to training, run:: - nh-run-scheduler --mode evaluate --run-dir /path/to/config_dir/ --runs-per-gpu X --gpu-ids Y + nh-run-scheduler evaluate --run-dir /path/to/config_dir/ --runs-per-gpu X --gpu-ids Y To merge the predictons of a number of runs (stored in ``$DIR1``, ...) into one averaged ensemble, diff --git a/environments/environment_cpu.yml b/environments/environment_cpu.yml index cb1e768b..ce9d16af 100644 --- a/environments/environment_cpu.yml +++ b/environments/environment_cpu.yml @@ -27,4 +27,5 @@ dependencies: - pip: - tensorboard - sphinx-rtd-theme - + - nbsphinx + - nbsphinx-link \ No newline at end of file diff --git a/environments/environment_cuda10_2.yml b/environments/environment_cuda10_2.yml index 0b2f5a8f..1e6ab168 100644 --- a/environments/environment_cuda10_2.yml +++ b/environments/environment_cuda10_2.yml @@ -27,4 +27,5 @@ dependencies: - pip: - tensorboard - sphinx-rtd-theme - + - nbsphinx + - nbsphinx-link \ No newline at end of file diff --git a/environments/environment_cuda9_2.yml b/environments/environment_cuda9_2.yml index 5e06c5ff..54020dbd 100644 --- a/environments/environment_cuda9_2.yml +++ b/environments/environment_cuda9_2.yml @@ -27,4 +27,5 @@ dependencies: - pip: - tensorboard - sphinx-rtd-theme - + - nbsphinx + - nbsphinx-link \ No newline at end of file diff --git a/examples/01-Introduction/1_basin.txt b/examples/01-Introduction/1_basin.txt new file mode 100644 index 00000000..94860eb2 --- /dev/null +++ b/examples/01-Introduction/1_basin.txt @@ -0,0 +1 @@ +01022500 diff --git a/examples/01-Introduction/1_basin.yml b/examples/01-Introduction/1_basin.yml new file mode 100644 index 00000000..0b5e3e28 --- /dev/null +++ b/examples/01-Introduction/1_basin.yml @@ -0,0 +1,145 @@ +# --- Experiment configurations -------------------------------------------------------------------- + +# experiment name, used as folder name +experiment_name: test_run + +# files to specify training, validation and test basins (relative to code root or absolute path) +train_basin_file: 1_basin.txt +validation_basin_file: 1_basin.txt +test_basin_file: 1_basin.txt + +# training, validation and test time periods (format = 'dd/mm/yyyy') +train_start_date: '01/10/1999' +train_end_date: '30/09/2008' +validation_start_date: '01/10/1980' +validation_end_date: '30/09/1989' +test_start_date: '01/10/1989' +test_end_date: '30/09/1999' + +# which GPU (id) to use [in format of cuda:0, cuda:1 etc, or cpu or None] +device: cuda:0 + +# --- Validation configuration --------------------------------------------------------------------- + +# specify after how many epochs to perform validation +validate_every: 3 + +# specify how many random basins to use for validation +validate_n_random_basins: 1 + +# specify which metrics to calculate during validation (see neuralhydrology.evaluation.metrics) +# this can either be a list or a dictionary. If a dictionary is used, the inner keys must match the name of the +# target_variable specified below. Using dicts allows for different metrics per target variable. +metrics: +- NSE + +# --- Model configuration -------------------------------------------------------------------------- + +# base model type [lstm, ealstm, cudalstm, embcudalstm, multifreqlstm] +# (has to match the if statement in modelzoo/__init__.py) +model: cudalstm + +# prediction head [regression]. Define the head specific parameters below +head: regression + +# ----> Regression settings <---- +output_activation: linear + +# ----> General settings <---- + +# Number of cell states of the LSTM +hidden_size: 20 + +# Initial bias value of the forget gate +initial_forget_bias: 3 + +# Dropout applied to the output of the LSTM +output_dropout: 0.4 + +# --- Training configuration ----------------------------------------------------------------------- + +# specify optimizer [Adam] +optimizer: Adam + +# specify loss [MSE, NSE, RMSE] +loss: MSE + +# specify learning rates to use starting at specific epochs (0 is the initial learning rate) +learning_rate: + 0: 1e-2 + 30: 5e-3 + 40: 1e-3 + +# Mini-batch size +batch_size: 256 + +# Number of training epochs +epochs: 50 + +# If a value, clips the gradients during training to that norm. +clip_gradient_norm: 1 + +# Defines which time steps are used to calculate the loss. Can't be larger than seq_length. +# If use_frequencies is used, this needs to be a dict mapping each frequency to a predict_last_n-value, else an int. +predict_last_n: 1 + +# Length of the input sequence +# If use_frequencies is used, this needs to be a dict mapping each frequency to a seq_length, else an int. +seq_length: 365 + +# Number of parallel workers used in the data pipeline +num_workers: 8 + +# Log the training loss every n steps +log_interval: 5 + +# If true, writes logging results into tensorboard file +log_tensorboard: True + +# If a value and greater than 0, logs n random basins as figures during validation +log_n_figures: 1 + +# Save model weights every n epochs +save_weights_every: 1 + +# --- Data configurations -------------------------------------------------------------------------- + +# which data set to use [camels_us, camels_gb, global, hourly_camels_us] +dataset: camels_us + +# Path to data set root +data_dir: /data/Hydrology/CAMELS_US + +# Forcing product [daymet, maurer, maurer_extended, nldas, nldas_extended, nldas_hourly] +# can be either a list of forcings or a single forcing product +forcings: +- maurer_extended +- daymet +- nldas_extended + +dynamic_inputs: +- PRCP(mm/day)_nldas_extended +- SRAD(W/m2)_nldas_extended +- Tmax(C)_nldas_extended +- Tmin(C)_nldas_extended +- Vp(Pa)_nldas_extended +- prcp(mm/day)_maurer_extended +- srad(W/m2)_maurer_extended +- tmax(C)_maurer_extended +- tmin(C)_maurer_extended +- vp(Pa)_maurer_extended +- prcp(mm/day)_daymet +- srad(W/m2)_daymet +- tmax(C)_daymet +- tmin(C)_daymet +- vp(Pa)_daymet + +# which columns to use as target +target_variables: +- QObs(mm/d) + +# clip negative predictions to zero for all variables listed below. Should be a list, even for single variables. +clip_target_to_zero: +- QObs(mm/d) + +zero_center_target: True diff --git a/examples/01-Introduction/Introduction.ipynb b/examples/01-Introduction/Introduction.ipynb new file mode 100644 index 00000000..aa2525cf --- /dev/null +++ b/examples/01-Introduction/Introduction.ipynb @@ -0,0 +1,891 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction to neuralHydrology\n", + "The Python package `neuralHydrology` was was developed with a strong focus on research. The main application area is hydrology, however, in principle the code can be used with any data. To allow fast iteration of research ideas, we tried to develop the package as modular as possible so that new models, new data sets, new loss functions, new regularizations, new metrics etc. can be integrated with minor effort.\n", + "\n", + "There are two different ways to use this package:\n", + "\n", + "1. From the terminal, making use of some high-level entry points (such as `nh-run` and `nh-run-scheduler`)\n", + "2. From any other Python file or Jupyter Notebook, using neuralHydrology's API\n", + "\n", + "In this tutorial, we will give a very short overview of the two different modes.\n", + "\n", + "Both approaches require a **configuration file**. These are `.yml` files which define the entire run configuration (such as data set, basins, data periods, model specifications, etc.). A full list of config arguments is listed in the [Wiki on GitHub](https://github.com/neuralhydrology/neuralhydrology/wiki/Config-arguments) and we highly recommend to check this page and read the documentation carefully. There is a lot that you can do with this Python package and we can't cover everything in tutorials.\n", + "\n", + "For every run that you start, a new folder will be created. This folder is used to store the model and optimizer checkpoints, train data means/stds (needed for scaling during inference), tensorboard log file (can be used to monitor and compare training runs visually), validation results (optionally) and training progress figures (optionally, e.g., model predictions and observations for _n_ random basins). During inference, the evaluation results will also be stored in this directory (e.g., test period results).\n", + "\n", + "\n", + "### TensorBoard logging\n", + "By default, the training progress is logged in TensorBoard files (add `log_tensorboard: False` to the config to disable TensorBoard logging). If you installed a Python environment from one of our environment files, you have TensorBoard already installed. If not, you can install TensorBoard with:\n", + "\n", + "```\n", + "pip install tensorboard\n", + "``` \n", + "\n", + "To start the TensorBoard dashboard, run:\n", + "\n", + "```\n", + "tensorboard --logdir /path/to/run-dir\n", + "```\n", + "\n", + "You can also visualize multiple runs at once if you point the `--logdir` to the parent directory (useful for model intercomparison)\n", + "\n", + "### File logging\n", + "In addition to TensorBoard, you will always find a file called `output.log` in the run directory. This file is a dump of the console output you see during training and evaluation.\n", + "\n", + "\n", + "## Using `neuralHydrology` from the Terminal\n", + "\n", + "### nh-run\n", + "\n", + "\n", + "Given a run configuration file, you can use the bash command `nh-run` to train/evaluate a model. To train a model, use\n", + "\n", + "\n", + "```bash\n", + "nh-run train --config-file path/to/config.yml\n", + "```\n", + "\n", + "to evaluate the model after training, use\n", + "\n", + "```bash\n", + "nh-run evaluate --run-dir path/to/run-directory\n", + "```\n", + "\n", + "### nh-run-scheduler\n", + "\n", + "If you want to train/evaluate multiple models on different GPUs, you can use the `nh-run-scheduler`. This tool automatically distributes runs across GPUs and starts a new one, whenever one run finishes.\n", + "\n", + "Calling `nh-run-scheduler` in `train` mode will train one model for each `.yml` file in a directory (or its sub-directories).\n", + "\n", + "```bash\n", + "nh-run-scheduler train --directory /path/to/config-dir --runs-per-gpu 2 --gpu_ids 0 1 2 3 \n", + "```\n", + "Use `-runs-per-gpu` to define the number of models that are simultaneously trained on a _single_ GPU (2 in this case) and `--gpu-ids` to define which GPUs will be used (numbers are ids according to nvidia-smi). In this example, 8 models will train simultaneously on 4 different GPUs.\n", + "\n", + "Calling `nh-run-scheduler` in `evaluate` mode will evaluate all models in all run directories in a given root directory.\n", + "\n", + "```bash\n", + "nh-run-scheduler evaluate --directory /path/to/parent-run-dir/ --runs-per-gpu 2 --gpu_ids 0 1 2 3 \n", + "```\n", + "\n", + "## API usage\n", + "\n", + "Besides the command line tools, you can also use the neuralHydrology package just like any other Python package by importing its modules, classes, or functions.\n", + "\n", + "This can be helpful for exploratory studies with trained models, but also if you want to use some of the functions or classes within a different codebase. \n", + "\n", + "Look at the [API Documentation](https://neuralhydrology.readthedocs.io/en/latest/api/neuralhydrology.html) for a full list of functions/classes you could use.\n", + "\n", + "The following example shows how to train and evaluate a model via the API." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "from pathlib import Path\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from neuralhydrology.evaluation import metrics\n", + "from neuralhydrology.nh_run import start_run, eval_run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train a model for a single config file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2020-10-05 12:52:52,508: Logging to /home/frederik/Projects/neuralhydrology/examples/01-Introduction/runs/test_run_0510_125252/output.log initialized.\n", + "2020-10-05 12:52:52,510: ### Folder structure created at /home/frederik/Projects/neuralhydrology/examples/01-Introduction/runs/test_run_0510_125252\n", + "2020-10-05 12:52:52,510: ### Run configurations for test_run\n", + "2020-10-05 12:52:52,511: experiment_name: test_run\n", + "2020-10-05 12:52:52,512: train_basin_file: 1_basin.txt\n", + "2020-10-05 12:52:52,513: validation_basin_file: 1_basin.txt\n", + "2020-10-05 12:52:52,514: test_basin_file: 1_basin.txt\n", + "2020-10-05 12:52:52,514: train_start_date: 1999-10-01 00:00:00\n", + "2020-10-05 12:52:52,515: train_end_date: 2008-09-30 00:00:00\n", + "2020-10-05 12:52:52,516: validation_start_date: 1980-10-01 00:00:00\n", + "2020-10-05 12:52:52,517: validation_end_date: 1989-09-30 00:00:00\n", + "2020-10-05 12:52:52,517: test_start_date: 1989-10-01 00:00:00\n", + "2020-10-05 12:52:52,518: test_end_date: 1999-09-30 00:00:00\n", + "2020-10-05 12:52:52,519: device: cuda:0\n", + "2020-10-05 12:52:52,520: validate_every: 3\n", + "2020-10-05 12:52:52,520: validate_n_random_basins: 1\n", + "2020-10-05 12:52:52,521: metrics: ['NSE']\n", + "2020-10-05 12:52:52,522: model: cudalstm\n", + "2020-10-05 12:52:52,524: head: regression\n", + "2020-10-05 12:52:52,524: output_activation: linear\n", + "2020-10-05 12:52:52,525: hidden_size: 20\n", + "2020-10-05 12:52:52,525: initial_forget_bias: 3\n", + "2020-10-05 12:52:52,526: output_dropout: 0.4\n", + "2020-10-05 12:52:52,527: optimizer: Adam\n", + "2020-10-05 12:52:52,527: loss: MSE\n", + "2020-10-05 12:52:52,528: learning_rate: {0: 0.01, 30: 0.005, 40: 0.001}\n", + "2020-10-05 12:52:52,529: batch_size: 256\n", + "2020-10-05 12:52:52,529: epochs: 50\n", + "2020-10-05 12:52:52,530: clip_gradient_norm: 1\n", + "2020-10-05 12:52:52,530: predict_last_n: 1\n", + "2020-10-05 12:52:52,531: seq_length: 365\n", + "2020-10-05 12:52:52,533: num_workers: 8\n", + "2020-10-05 12:52:52,534: log_interval: 5\n", + "2020-10-05 12:52:52,535: log_tensorboard: True\n", + "2020-10-05 12:52:52,536: log_n_figures: 1\n", + "2020-10-05 12:52:52,537: save_weights_every: 1\n", + "2020-10-05 12:52:52,537: dataset: camels_us\n", + "2020-10-05 12:52:52,538: data_dir: /data/Hydrology/CAMELS_US\n", + "2020-10-05 12:52:52,539: forcings: ['maurer_extended', 'daymet', 'nldas_extended']\n", + "2020-10-05 12:52:52,539: dynamic_inputs: ['PRCP(mm/day)_nldas_extended', 'SRAD(W/m2)_nldas_extended', 'Tmax(C)_nldas_extended', 'Tmin(C)_nldas_extended', 'Vp(Pa)_nldas_extended', 'prcp(mm/day)_maurer_extended', 'srad(W/m2)_maurer_extended', 'tmax(C)_maurer_extended', 'tmin(C)_maurer_extended', 'vp(Pa)_maurer_extended', 'prcp(mm/day)_daymet', 'srad(W/m2)_daymet', 'tmax(C)_daymet', 'tmin(C)_daymet', 'vp(Pa)_daymet']\n", + "2020-10-05 12:52:52,540: target_variables: ['QObs(mm/d)']\n", + "2020-10-05 12:52:52,541: clip_target_to_zero: ['QObs(mm/d)']\n", + "2020-10-05 12:52:52,542: zero_center_target: True\n", + "2020-10-05 12:52:52,543: number_of_basins: 1\n", + "2020-10-05 12:52:52,544: run_dir: /home/frederik/Projects/neuralhydrology/examples/01-Introduction/runs/test_run_0510_125252\n", + "2020-10-05 12:52:52,544: train_dir: /home/frederik/Projects/neuralhydrology/examples/01-Introduction/runs/test_run_0510_125252/train_data\n", + "2020-10-05 12:52:52,545: img_log_dir: /home/frederik/Projects/neuralhydrology/examples/01-Introduction/runs/test_run_0510_125252/img_log\n", + "2020-10-05 12:52:52,586: ### Device cuda:0 will be used for training\n", + "2020-10-05 12:52:55,028: Loading basin data into xarray data set.\n", + "100%|██████████| 1/1 [00:00<00:00, 5.97it/s]\n", + "2020-10-05 12:52:55,213: Create lookup table and convert to pytorch tensor\n", + "100%|██████████| 1/1 [00:01<00:00, 1.29s/it]\n", + "# Epoch 1: 100%|██████████| 13/13 [00:00<00:00, 21.75it/s, Loss: 0.3369]\n", + "2020-10-05 12:52:57,229: Epoch 1 average loss: 0.40770340997439164\n", + "# Epoch 2: 100%|██████████| 13/13 [00:00<00:00, 21.06it/s, Loss: 0.1233]\n", + "2020-10-05 12:52:57,852: Epoch 2 average loss: 0.2696097624989656\n", + "# Epoch 3: 100%|██████████| 13/13 [00:00<00:00, 20.21it/s, Loss: 0.1440]\n", + "2020-10-05 12:52:58,503: Epoch 3 average loss: 0.2056583693394294\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 1.15it/s]\n", + "2020-10-05 12:52:59,650: -- Median validation metrics:NSE: 0.62918\n", + "# Epoch 4: 100%|██████████| 13/13 [00:00<00:00, 21.25it/s, Loss: 0.1681]\n", + "2020-10-05 12:53:00,265: Epoch 4 average loss: 0.16614325917684114\n", + "# Epoch 5: 100%|██████████| 13/13 [00:00<00:00, 20.18it/s, Loss: 0.0893]\n", + "2020-10-05 12:53:00,916: Epoch 5 average loss: 0.1314379280576339\n", + "# Epoch 6: 100%|██████████| 13/13 [00:00<00:00, 20.25it/s, Loss: 0.1715]\n", + "2020-10-05 12:53:01,565: Epoch 6 average loss: 0.11709093875609912\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 3.77it/s]\n", + "2020-10-05 12:53:02,108: -- Median validation metrics:NSE: 0.70598\n", + "# Epoch 7: 100%|██████████| 13/13 [00:00<00:00, 19.89it/s, Loss: 0.0765]\n", + "2020-10-05 12:53:02,766: Epoch 7 average loss: 0.10338054998562886\n", + "# Epoch 8: 100%|██████████| 13/13 [00:00<00:00, 19.77it/s, Loss: 0.0803]\n", + "2020-10-05 12:53:03,430: Epoch 8 average loss: 0.09337395773484157\n", + "# Epoch 9: 100%|██████████| 13/13 [00:00<00:00, 18.54it/s, Loss: 0.0631]\n", + "2020-10-05 12:53:04,137: Epoch 9 average loss: 0.09041231240217502\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 3.74it/s]\n", + "2020-10-05 12:53:04,691: -- Median validation metrics:NSE: 0.72472\n", + "# Epoch 10: 100%|██████████| 13/13 [00:01<00:00, 11.72it/s, Loss: 0.1419]\n", + "2020-10-05 12:53:05,804: Epoch 10 average loss: 0.08599556208803104\n", + "# Epoch 11: 100%|██████████| 13/13 [00:00<00:00, 20.62it/s, Loss: 0.1633]\n", + "2020-10-05 12:53:06,442: Epoch 11 average loss: 0.08094931651766483\n", + "# Epoch 12: 100%|██████████| 13/13 [00:00<00:00, 20.40it/s, Loss: 0.0485]\n", + "2020-10-05 12:53:07,085: Epoch 12 average loss: 0.0698587424479998\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 4.31it/s]\n", + "2020-10-05 12:53:07,590: -- Median validation metrics:NSE: 0.77600\n", + "# Epoch 13: 100%|██████████| 13/13 [00:00<00:00, 21.13it/s, Loss: 0.0707]\n", + "2020-10-05 12:53:08,210: Epoch 13 average loss: 0.07356188331659023\n", + "# Epoch 14: 100%|██████████| 13/13 [00:00<00:00, 22.66it/s, Loss: 0.0825]\n", + "2020-10-05 12:53:08,788: Epoch 14 average loss: 0.07214784335631591\n", + "# Epoch 15: 100%|██████████| 13/13 [00:00<00:00, 21.50it/s, Loss: 0.0473]\n", + "2020-10-05 12:53:09,399: Epoch 15 average loss: 0.06782861340504426\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 3.59it/s]\n", + "2020-10-05 12:53:09,974: -- Median validation metrics:NSE: 0.79518\n", + "# Epoch 16: 100%|██████████| 13/13 [00:00<00:00, 22.83it/s, Loss: 0.0356]\n", + "2020-10-05 12:53:10,548: Epoch 16 average loss: 0.06452611088752747\n", + "# Epoch 17: 100%|██████████| 13/13 [00:00<00:00, 18.94it/s, Loss: 0.0343]\n", + "2020-10-05 12:53:11,239: Epoch 17 average loss: 0.06379958213521884\n", + "# Epoch 18: 100%|██████████| 13/13 [00:00<00:00, 21.93it/s, Loss: 0.0528]\n", + "2020-10-05 12:53:11,838: Epoch 18 average loss: 0.06280213909653518\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 4.13it/s]\n", + "2020-10-05 12:53:12,351: -- Median validation metrics:NSE: 0.80859\n", + "# Epoch 19: 100%|██████████| 13/13 [00:00<00:00, 21.07it/s, Loss: 0.0701]\n", + "2020-10-05 12:53:12,972: Epoch 19 average loss: 0.0555433054956106\n", + "# Epoch 20: 100%|██████████| 13/13 [00:00<00:00, 21.90it/s, Loss: 0.0627]\n", + "2020-10-05 12:53:13,571: Epoch 20 average loss: 0.062321179188214816\n", + "# Epoch 21: 100%|██████████| 13/13 [00:00<00:00, 21.03it/s, Loss: 0.0453]\n", + "2020-10-05 12:53:14,194: Epoch 21 average loss: 0.05481961842339773\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 4.32it/s]\n", + "2020-10-05 12:53:14,698: -- Median validation metrics:NSE: 0.81559\n", + "# Epoch 22: 100%|██████████| 13/13 [00:00<00:00, 20.03it/s, Loss: 0.1060]\n", + "2020-10-05 12:53:15,351: Epoch 22 average loss: 0.06080526944536429\n", + "# Epoch 23: 100%|██████████| 13/13 [00:00<00:00, 20.79it/s, Loss: 0.0510]\n", + "2020-10-05 12:53:15,983: Epoch 23 average loss: 0.0530552279490691\n", + "# Epoch 24: 100%|██████████| 13/13 [00:00<00:00, 21.31it/s, Loss: 0.0569]\n", + "2020-10-05 12:53:16,598: Epoch 24 average loss: 0.05936390132858203\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 2.85it/s]\n", + "2020-10-05 12:53:17,205: -- Median validation metrics:NSE: 0.79431\n", + "# Epoch 25: 100%|██████████| 13/13 [00:00<00:00, 21.29it/s, Loss: 0.0278]\n", + "2020-10-05 12:53:17,821: Epoch 25 average loss: 0.05501310441356439\n", + "# Epoch 26: 100%|██████████| 13/13 [00:00<00:00, 21.44it/s, Loss: 0.0452]\n", + "2020-10-05 12:53:18,432: Epoch 26 average loss: 0.05229091515334753\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# Epoch 27: 100%|██████████| 13/13 [00:00<00:00, 22.24it/s, Loss: 0.0588]\n", + "2020-10-05 12:53:19,021: Epoch 27 average loss: 0.04917494379557096\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 4.42it/s]\n", + "2020-10-05 12:53:19,514: -- Median validation metrics:NSE: 0.81387\n", + "# Epoch 28: 100%|██████████| 13/13 [00:00<00:00, 20.37it/s, Loss: 0.0417]\n", + "2020-10-05 12:53:20,157: Epoch 28 average loss: 0.046723469805258974\n", + "# Epoch 29: 100%|██████████| 13/13 [00:00<00:00, 19.72it/s, Loss: 0.0399]\n", + "2020-10-05 12:53:20,822: Epoch 29 average loss: 0.04707713339191217\n", + "2020-10-05 12:53:20,826: Setting learning rate to 0.005\n", + "# Epoch 30: 100%|██████████| 13/13 [00:00<00:00, 20.35it/s, Loss: 0.0569]\n", + "2020-10-05 12:53:21,468: Epoch 30 average loss: 0.04919698060705112\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 3.89it/s]\n", + "2020-10-05 12:53:22,026: -- Median validation metrics:NSE: 0.81469\n", + "# Epoch 31: 100%|██████████| 13/13 [00:00<00:00, 18.51it/s, Loss: 0.0412]\n", + "2020-10-05 12:53:22,732: Epoch 31 average loss: 0.046678220136807516\n", + "# Epoch 32: 100%|██████████| 13/13 [00:00<00:00, 19.51it/s, Loss: 0.0385]\n", + "2020-10-05 12:53:23,405: Epoch 32 average loss: 0.04431860951276926\n", + "# Epoch 33: 100%|██████████| 13/13 [00:00<00:00, 18.42it/s, Loss: 0.0381]\n", + "2020-10-05 12:53:24,116: Epoch 33 average loss: 0.04529069263774615\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 3.29it/s]\n", + "2020-10-05 12:53:24,701: -- Median validation metrics:NSE: 0.82519\n", + "# Epoch 34: 100%|██████████| 13/13 [00:00<00:00, 19.21it/s, Loss: 0.0354]\n", + "2020-10-05 12:53:25,382: Epoch 34 average loss: 0.047089104182445086\n", + "# Epoch 35: 100%|██████████| 13/13 [00:00<00:00, 20.25it/s, Loss: 0.0414]\n", + "2020-10-05 12:53:26,030: Epoch 35 average loss: 0.03992394959697357\n", + "# Epoch 36: 100%|██████████| 13/13 [00:00<00:00, 20.30it/s, Loss: 0.0488]\n", + "2020-10-05 12:53:26,678: Epoch 36 average loss: 0.04217390658763739\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 4.24it/s]\n", + "2020-10-05 12:53:27,180: -- Median validation metrics:NSE: 0.82312\n", + "# Epoch 37: 100%|██████████| 13/13 [00:00<00:00, 20.71it/s, Loss: 0.0486]\n", + "2020-10-05 12:53:27,811: Epoch 37 average loss: 0.04324197224699534\n", + "# Epoch 38: 100%|██████████| 13/13 [00:00<00:00, 20.47it/s, Loss: 0.0845]\n", + "2020-10-05 12:53:28,451: Epoch 38 average loss: 0.042667378599827104\n", + "# Epoch 39: 100%|██████████| 13/13 [00:00<00:00, 20.77it/s, Loss: 0.0399]\n", + "2020-10-05 12:53:29,082: Epoch 39 average loss: 0.043971521636614434\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 2.85it/s]\n", + "2020-10-05 12:53:29,695: -- Median validation metrics:NSE: 0.81838\n", + "2020-10-05 12:53:29,696: Setting learning rate to 0.001\n", + "# Epoch 40: 100%|██████████| 13/13 [00:00<00:00, 20.51it/s, Loss: 0.0629]\n", + "2020-10-05 12:53:30,334: Epoch 40 average loss: 0.047052909405185625\n", + "# Epoch 41: 100%|██████████| 13/13 [00:00<00:00, 21.16it/s, Loss: 0.0358]\n", + "2020-10-05 12:53:30,955: Epoch 41 average loss: 0.037803018608918555\n", + "# Epoch 42: 100%|██████████| 13/13 [00:00<00:00, 21.40it/s, Loss: 0.0514]\n", + "2020-10-05 12:53:31,568: Epoch 42 average loss: 0.04351525338223347\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 4.34it/s]\n", + "2020-10-05 12:53:32,075: -- Median validation metrics:NSE: 0.82342\n", + "# Epoch 43: 100%|██████████| 13/13 [00:00<00:00, 21.82it/s, Loss: 0.0397]\n", + "2020-10-05 12:53:32,675: Epoch 43 average loss: 0.03911813692404674\n", + "# Epoch 44: 100%|██████████| 13/13 [00:00<00:00, 18.98it/s, Loss: 0.0404]\n", + "2020-10-05 12:53:33,365: Epoch 44 average loss: 0.042462579905986786\n", + "# Epoch 45: 100%|██████████| 13/13 [00:00<00:00, 19.18it/s, Loss: 0.0339]\n", + "2020-10-05 12:53:34,051: Epoch 45 average loss: 0.04013453395320819\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 3.41it/s]\n", + "2020-10-05 12:53:34,639: -- Median validation metrics:NSE: 0.83013\n", + "# Epoch 46: 100%|██████████| 13/13 [00:00<00:00, 18.73it/s, Loss: 0.0397]\n", + "2020-10-05 12:53:35,337: Epoch 46 average loss: 0.04252039125332466\n", + "# Epoch 47: 100%|██████████| 13/13 [00:00<00:00, 19.62it/s, Loss: 0.0451]\n", + "2020-10-05 12:53:36,005: Epoch 47 average loss: 0.03530547395348549\n", + "# Epoch 48: 100%|██████████| 13/13 [00:00<00:00, 21.38it/s, Loss: 0.0298]\n", + "2020-10-05 12:53:36,621: Epoch 48 average loss: 0.039933502387541994\n", + "# Validation: 100%|██████████| 1/1 [00:00<00:00, 4.36it/s]\n", + "2020-10-05 12:53:37,112: -- Median validation metrics:NSE: 0.82898\n", + "# Epoch 49: 100%|██████████| 13/13 [00:00<00:00, 21.71it/s, Loss: 0.0408]\n", + "2020-10-05 12:53:37,714: Epoch 49 average loss: 0.04054238203053291\n", + "# Epoch 50: 100%|██████████| 13/13 [00:00<00:00, 21.83it/s, Loss: 0.0302]\n", + "2020-10-05 12:53:38,315: Epoch 50 average loss: 0.04161823354661465\n" + ] + } + ], + "source": [ + "start_run(config_file=Path(\"1_basin.yml\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluate run on test set\n", + "The run directory that needs to be specified for evaluation is printed in the output log above." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2020-10-05 12:54:54,703: Using the model weights from /home/frederik/Projects/neuralhydrology/examples/01-Introduction/runs/test_run_0510_125252/model_epoch050.pt\n", + "# Evaluation: 100%|██████████| 1/1 [00:00<00:00, 1.79it/s]\n", + "2020-10-05 12:54:55,269: Stored results at /home/frederik/Projects/neuralhydrology/examples/01-Introduction/runs/test_run_0510_125252/test/model_epoch050/test_results.p\n" + ] + } + ], + "source": [ + "run_dir = Path(\"/home/frederik/Projects/neuralhydrology/examples/01-Introduction/runs/test_run_0510_125252\")\n", + "eval_run(run_dir=run_dir, period=\"test\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load and inspect model predictions\n", + "Next, we load the results file and compare the model predictions with observations. The results file is always a pickled dictionary with one key per basin (even for a single basin). The next-lower dictionary level is the temporal resolution of the predictions. In this case, we trained a model only on daily data ('1D'). Within the temporal resolution, the next-lower dictionary level are `xr`(an xarray Dataset that contains observations and predictions), as well as one key for each metric that was specified in the config file." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['01022500'])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with open(run_dir / \"test\" / \"model_epoch050\" / \"test_results.p\", \"rb\") as fp:\n", + " results = pickle.load(fp)\n", + " \n", + "results.keys()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data variables in the xarray Dataset are named according to the name of the target variables, with suffix `_obs` for the observations and suffix `_sim` for the simulations." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:         (date: 3652, time_step: 1)\n",
+       "Coordinates:\n",
+       "  * date            (date) datetime64[ns] 1989-10-01 1989-10-02 ... 1999-09-30\n",
+       "  * time_step       (time_step) timedelta64[ns] 00:00:00\n",
+       "Data variables:\n",
+       "    QObs(mm/d)_obs  (date, time_step) float32 0.6203073 0.5536971 ... 0.9991529\n",
+       "    QObs(mm/d)_sim  (date, time_step) float32 0.6279986 0.6001396 ... 1.966821
" + ], + "text/plain": [ + "\n", + "Dimensions: (date: 3652, time_step: 1)\n", + "Coordinates:\n", + " * date (date) datetime64[ns] 1989-10-01 1989-10-02 ... 1999-09-30\n", + " * time_step (time_step) timedelta64[ns] 00:00:00\n", + "Data variables:\n", + " QObs(mm/d)_obs (date, time_step) float32 0.6203073 0.5536971 ... 0.9991529\n", + " QObs(mm/d)_sim (date, time_step) float32 0.6279986 0.6001396 ... 1.966821" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results['01022500']['1D']['xr']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's plot the model predictions vs. the observations" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Test period - NSE 0.791')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# extract observations and simulations\n", + "qobs = results['01022500']['1D']['xr']['QObs(mm/d)_obs']\n", + "qsim = results['01022500']['1D']['xr']['QObs(mm/d)_sim']\n", + "\n", + "fig, ax = plt.subplots(figsize=(16,10))\n", + "ax.plot(qobs['date'], qobs)\n", + "ax.plot(qsim['date'], qsim)\n", + "ax.set_ylabel(\"Discharge (mm/d)\")\n", + "ax.set_title(f\"Test period - NSE {results['01022500']['1D']['NSE']:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we are going to compute all metrics that are implemented in the neuralHydrology package. You will find additional hydrological signatures implemented in `neuralhydrology.evaluation.signatures`." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NSE: 0.791\n", + "MSE: 1.028\n", + "RMSE: 1.014\n", + "KGE: 0.864\n", + "Alpha-NSE: 0.929\n", + "Beta-NSE: 0.036\n", + "Pearson-r: 0.891\n", + "FHV: -8.793\n", + "FMS: -5.994\n", + "FLV: -876.161\n", + "Peak-Timing: 0.087\n" + ] + } + ], + "source": [ + "values = metrics.calculate_all_metrics(qobs.isel(time_step=-1), qsim.isel(time_step=-1))\n", + "for key, val in values.items():\n", + " print(f\"{key}: {val:.3f}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/neuralhydrology/__about__.py b/neuralhydrology/__about__.py index ea68199a..3f013dc7 100644 --- a/neuralhydrology/__about__.py +++ b/neuralhydrology/__about__.py @@ -1 +1 @@ -__version__ = "0.9.0-beta" +__version__ = "0.9.1-beta" diff --git a/neuralhydrology/data/utils.py b/neuralhydrology/data/utils.py index b0e3b398..9e7e508a 100644 --- a/neuralhydrology/data/utils.py +++ b/neuralhydrology/data/utils.py @@ -4,6 +4,8 @@ import numpy as np import pandas as pd import xarray +from xarray.core.dataarray import DataArray +from xarray.core.dataset import Dataset ######################################################################################################################## # CAMELS US utility functions # @@ -512,3 +514,30 @@ def infer_frequency(index: Union[pd.DatetimeIndex, np.ndarray]) -> str: if pd.to_timedelta(native_frequency) == pd.to_timedelta(0): raise ValueError('Inferred dataset frequency is zero.') return native_frequency + + +def infer_datetime_coord(xr: Union[DataArray, Dataset]) -> str: + """Checks for coordinate with 'date' in its name and returns the name. + + Parameters + ---------- + xr : Union[DataArray, Dataset] + Array to infer coordinate name of. + + Returns + ------- + str + Name of datetime coordinate name. + + Raises + ------ + RuntimeError + If none or multiple coordinates with 'date' in its name are found. + """ + candidates = [c for c in list(xr.coords) if "date" in c] + if len(candidates) > 1: + raise RuntimeError("Found multiple coordinates with 'date' in its name.") + if not candidates: + raise RuntimeError("Did not find any coordinate with 'date' in its name") + + return candidates[0] diff --git a/neuralhydrology/evaluation/metrics.py b/neuralhydrology/evaluation/metrics.py index 833a83cf..32e62944 100644 --- a/neuralhydrology/evaluation/metrics.py +++ b/neuralhydrology/evaluation/metrics.py @@ -5,6 +5,8 @@ from scipy import stats, signal from xarray.core.dataarray import DataArray +from neuralhydrology.data import utils + def get_available_metrics() -> List[str]: """Get list of available metrics. @@ -513,7 +515,11 @@ def fdc_flv(obs: DataArray, sim: DataArray, l: float = 0.3) -> float: return flv * 100 -def mean_peak_timing(obs: DataArray, sim: DataArray, window: int = None, resolution: str = '1D') -> float: +def mean_peak_timing(obs: DataArray, + sim: DataArray, + window: int = None, + resolution: str = '1D', + datetime_coord: str = None) -> float: """Mean difference in peak flow timing. Uses scipy.find_peaks to find peaks in the observed time series. Starting with all observed peaks, those with a @@ -536,6 +542,8 @@ def mean_peak_timing(obs: DataArray, sim: DataArray, window: int = None, resolut for a resolution of '1H' the the window size is 12. resolution : str, optional Temporal resolution of the time series in pandas format, e.g. '1D' for daily and '1H' for hourly. + datetime_coord : str, optional + Name of datetime coordinate. Tried to infer automatically if not specified. Returns @@ -558,6 +566,10 @@ def mean_peak_timing(obs: DataArray, sim: DataArray, window: int = None, resolut # heuristic to get indices of peaks and their corresponding height. peaks, properties = signal.find_peaks(obs.values, distance=100, prominence=np.std(obs.values)) + # infer name of datetime index + if datetime_coord is None: + datetime_coord = utils.infer_datetime_coord(obs) + if window is None: # infer a reasonable window size window = max((0.5 * pd.to_timedelta('1D')) // pd.to_timedelta(resolution), 3) @@ -567,8 +579,8 @@ def mean_peak_timing(obs: DataArray, sim: DataArray, window: int = None, resolut for idx in peaks: # skip peaks at the start and end of the sequence and peaks around missing observations # (NaNs that were removed in obs & sim would result in windows that span too much time). - if (idx - window < 0) or (idx + window >= len(obs)) or (pd.date_range(obs[idx - window]['datetime'].values, - obs[idx + window]['datetime'].values, + if (idx - window < 0) or (idx + window >= len(obs)) or (pd.date_range(obs[idx - window][datetime_coord].values, + obs[idx + window][datetime_coord].values, freq=resolution).size != 2 * window + 1): continue @@ -584,7 +596,7 @@ def mean_peak_timing(obs: DataArray, sim: DataArray, window: int = None, resolut peak_obs = obs[idx] # calculate the time difference between the peaks - delta = peak_obs.coords['datetime'] - peak_sim.coords['datetime'] + delta = peak_obs.coords[datetime_coord] - peak_sim.coords[datetime_coord] timing_error = np.abs(delta.values / pd.to_timedelta(resolution)) @@ -593,7 +605,8 @@ def mean_peak_timing(obs: DataArray, sim: DataArray, window: int = None, resolut return np.mean(timing_errors) if len(timing_errors) > 0 else np.nan -def calculate_all_metrics(obs: DataArray, sim: DataArray, resolution: str = "1D") -> Dict[str, float]: +def calculate_all_metrics(obs: DataArray, sim: DataArray, resolution: str = "1D", + datetime_coord: str = None) -> Dict[str, float]: """Calculate all metrics with default values. Parameters @@ -604,7 +617,9 @@ def calculate_all_metrics(obs: DataArray, sim: DataArray, resolution: str = "1D" Simulated time series. resolution : str, optional Temporal resolution of the time series in pandas format, e.g. '1D' for daily and '1H' for hourly. - + datetime_coord : str, optional + Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. + Returns ------- Dict[str, float] @@ -621,13 +636,17 @@ def calculate_all_metrics(obs: DataArray, sim: DataArray, resolution: str = "1D" "FHV": fdc_fhv(obs, sim), "FMS": fdc_fms(obs, sim), "FLV": fdc_flv(obs, sim), - "Peak-Timing": mean_peak_timing(obs, sim, resolution=resolution) + "Peak-Timing": mean_peak_timing(obs, sim, resolution=resolution, datetime_coord=datetime_coord) } return results -def calculate_metrics(obs: DataArray, sim: DataArray, metrics: List[str], resolution: str = "1D") -> Dict[str, float]: +def calculate_metrics(obs: DataArray, + sim: DataArray, + metrics: List[str], + resolution: str = "1D", + datetime_coord: str = None) -> Dict[str, float]: """Calculate specific metrics with default values. Parameters @@ -640,6 +659,8 @@ def calculate_metrics(obs: DataArray, sim: DataArray, metrics: List[str], resolu List of metric names. resolution : str, optional Temporal resolution of the time series in pandas format, e.g. '1D' for daily and '1H' for hourly. + datetime_coord : str, optional + Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- @@ -673,7 +694,7 @@ def calculate_metrics(obs: DataArray, sim: DataArray, metrics: List[str], resolu elif metric.lower() == "flv": values["FLV"] = fdc_flv(obs, sim) elif metric.lower() == "peak-timing": - values["Peak-Timing"] = mean_peak_timing(obs, sim, resolution=resolution) + values["Peak-Timing"] = mean_peak_timing(obs, sim, resolution=resolution, datetime_coord=datetime_coord) else: raise RuntimeError(f"Unknown metric {metric}") diff --git a/neuralhydrology/evaluation/signatures.py b/neuralhydrology/evaluation/signatures.py index 603b0964..dcc1e726 100644 --- a/neuralhydrology/evaluation/signatures.py +++ b/neuralhydrology/evaluation/signatures.py @@ -8,7 +8,7 @@ from numba import njit from xarray.core.dataarray import DataArray -from neuralhydrology.data.utils import infer_frequency +from neuralhydrology.data import utils def get_available_signatures() -> List[str]: @@ -26,7 +26,7 @@ def get_available_signatures() -> List[str]: return signatures -def calculate_all_signatures(da: DataArray, prcp: DataArray, datetime_coord: str = 'date') -> Dict[str, float]: +def calculate_all_signatures(da: DataArray, prcp: DataArray, datetime_coord: str = None) -> Dict[str, float]: """Calculate all signatures with default values. Parameters @@ -36,34 +36,35 @@ def calculate_all_signatures(da: DataArray, prcp: DataArray, datetime_coord: str prcp : DataArray Array of precipitation values. datetime_coord : str, optional - Datetime coordinate in the passed DataArray. + Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- Dict[str, float] Dictionary with signature names as keys and signature values as values. """ + if datetime_coord is None: + datetime_coord = utils.infer_datetime_coord(da) + results = { - "high_q_freq": high_q_freq(da, coord=datetime_coord), + "high_q_freq": high_q_freq(da, datetime_coord=datetime_coord), "high_q_dur": high_q_dur(da), - "low_q_freq": low_q_freq(da, coord=datetime_coord), + "low_q_freq": low_q_freq(da, datetime_coord=datetime_coord), "low_q_dur": low_q_dur(da), "zero_q_freq": zero_q_freq(da), "q95": q95(da), "q5": q5(da), "q_mean": q_mean(da), - "hfd_mean": hfd_mean(da, coord=datetime_coord), + "hfd_mean": hfd_mean(da, datetime_coord=datetime_coord), "baseflow_index": baseflow_index(da)[0], "slope_fdc": slope_fdc(da), - "stream_elas": stream_elas(da, prcp, coord=datetime_coord), - "runoff_ratio": runoff_ratio(da, prcp, coord=datetime_coord) + "stream_elas": stream_elas(da, prcp, datetime_coord=datetime_coord), + "runoff_ratio": runoff_ratio(da, prcp, datetime_coord=datetime_coord) } return results -def calculate_signatures(da: DataArray, - signatures: List[str], - datetime_coord: str = 'date', +def calculate_signatures(da: DataArray, signatures: List[str], datetime_coord: str = None, prcp: DataArray = None) -> Dict[str, float]: """Calculate the specified signatures with default values. @@ -74,7 +75,7 @@ def calculate_signatures(da: DataArray, signatures : List[str] List of names of the signatures to calculate. datetime_coord : str, optional - Datetime coordinate in the passed DataArray. + Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. prcp : DataArray, optional Array of precipitation values. Required for signatures 'runoff_ratio' and 'streamflow_elas'. @@ -88,14 +89,17 @@ def calculate_signatures(da: DataArray, ValueError If a passed signature name does not exist. """ + if datetime_coord is None: + datetime_coord = utils.infer_datetime_coord(da) + values = {} for signature in signatures: if signature == "high_q_freq": - values["high_q_freq"] = high_q_freq(da, coord=datetime_coord) + values["high_q_freq"] = high_q_freq(da, datetime_coord=datetime_coord) elif signature == "high_q_dur": values["high_q_dur"] = high_q_dur(da) elif signature == "low_q_freq": - values["low_q_freq"] = low_q_freq(da, coord=datetime_coord) + values["low_q_freq"] = low_q_freq(da, datetime_coord=datetime_coord) elif signature == "low_q_dur": values["low_q_dur"] = low_q_dur(da) elif signature == "zero_q_freq": @@ -107,15 +111,15 @@ def calculate_signatures(da: DataArray, elif signature == "q_mean": values["q_mean"] = q_mean(da) elif signature == "hfd_mean": - values["hfd_mean"] = hfd_mean(da, coord=datetime_coord) + values["hfd_mean"] = hfd_mean(da, datetime_coord=datetime_coord) elif signature == "baseflow_index": - values["baseflow_index"] = baseflow_index(da, coord=datetime_coord)[0] + values["baseflow_index"] = baseflow_index(da, datetime_coord=datetime_coord)[0] elif signature == "slope_fdc": values["slope_fdc"] = slope_fdc(da) elif signature == "runoff_ratio": - values["runoff_ratio"] = runoff_ratio(da, prcp, coord=datetime_coord) + values["runoff_ratio"] = runoff_ratio(da, prcp, datetime_coord=datetime_coord) elif signature == "stream_elas": - values["stream_elas"] = stream_elas(da, prcp, coord=datetime_coord) + values["stream_elas"] = stream_elas(da, prcp, datetime_coord=datetime_coord) else: ValueError(f"Unknown signatures {signature}") return values @@ -199,7 +203,6 @@ def low_q_dur(da: DataArray, threshold: float = 0.2) -> float: .. [#] Westerberg, I. K. and McMillan, H. K.: Uncertainty in hydrological signatures. Hydrology and Earth System Sciences, 2015, 19, 3951--3968, doi:10.5194/hess-19-3951-2015 """ - mean_flow = float(da.mean()) idx = np.where(da.values < threshold * mean_flow)[0] if len(idx) > 0: @@ -225,14 +228,13 @@ def zero_q_freq(da: DataArray) -> float: float Zero-flow frequency. """ - # number of steps with zero flow n_steps = (da == 0).sum() return float(n_steps / len(da)) -def high_q_freq(da: DataArray, coord: str = 'date', threshold: float = 9.) -> float: +def high_q_freq(da: DataArray, datetime_coord: str = None, threshold: float = 9.) -> float: """Calculate high-flow frequency. Frequency of high-flow events (>`threshold` times the median flow) [#]_, [#]_ (Table 2). @@ -241,8 +243,8 @@ def high_q_freq(da: DataArray, coord: str = 'date', threshold: float = 9.) -> fl ---------- da : DataArray Array of flow values. - coord : str, optional - Datetime coordinate in `da`. + datetime_coord : str, optional + Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. threshold : float, optional High-flow threshold. Values larger than ``threshold * median`` are considered high flows. @@ -258,10 +260,12 @@ def high_q_freq(da: DataArray, coord: str = 'date', threshold: float = 9.) -> fl .. [#] Westerberg, I. K. and McMillan, H. K.: Uncertainty in hydrological signatures. Hydrology and Earth System Sciences, 2015, 19, 3951--3968, doi:10.5194/hess-19-3951-2015 """ + if datetime_coord is None: + datetime_coord = utils.infer_datetime_coord(da) # determine the date of the first January 1st in the data period - first_date = da.coords[coord][0].values.astype('datetime64[s]').astype(datetime) - last_date = da.coords[coord][-1].values.astype('datetime64[s]').astype(datetime) + first_date = da.coords[datetime_coord][0].values.astype('datetime64[s]').astype(datetime) + last_date = da.coords[datetime_coord][-1].values.astype('datetime64[s]').astype(datetime) if first_date == datetime.strptime(f'{first_date.year}-01-01', '%Y-%m-%d'): start_date = first_date @@ -277,7 +281,7 @@ def high_q_freq(da: DataArray, coord: str = 'date', threshold: float = 9.) -> fl hqfs = [] while end_date < last_date: - data = da.sel({coord: slice(start_date, end_date)}) + data = da.sel({datetime_coord: slice(start_date, end_date)}) # number of steps with discharge higher than threshold * median in a one year period n_steps = (data > (threshold * median_flow)).sum() @@ -290,7 +294,7 @@ def high_q_freq(da: DataArray, coord: str = 'date', threshold: float = 9.) -> fl return np.mean(hqfs) -def low_q_freq(da: DataArray, coord: str = 'date', threshold: float = 0.2) -> float: +def low_q_freq(da: DataArray, datetime_coord: str = None, threshold: float = 0.2) -> float: """Calculate Low-flow frequency. Frequency of low-flow events (<`threshold` times the median flow) [#]_, [#]_ (Table 2). @@ -299,8 +303,8 @@ def low_q_freq(da: DataArray, coord: str = 'date', threshold: float = 0.2) -> fl ---------- da : DataArray Array of flow values. - coord : str, optional - Datetime coordinate in `da`. + datetime_coord : str, optional + Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. threshold : float, optional Low-flow threshold. Values below ``threshold * median`` are considered low flows. @@ -316,10 +320,12 @@ def low_q_freq(da: DataArray, coord: str = 'date', threshold: float = 0.2) -> fl .. [#] Westerberg, I. K. and McMillan, H. K.: Uncertainty in hydrological signatures. Hydrology and Earth System Sciences, 2015, 19, 3951--3968, doi:10.5194/hess-19-3951-2015 """ + if datetime_coord is None: + datetime_coord = utils.infer_datetime_coord(da) # determine the date of the first January 1st in the data period - first_date = da.coords[coord][0].values.astype('datetime64[s]').astype(datetime) - last_date = da.coords[coord][-1].values.astype('datetime64[s]').astype(datetime) + first_date = da.coords[datetime_coord][0].values.astype('datetime64[s]').astype(datetime) + last_date = da.coords[datetime_coord][-1].values.astype('datetime64[s]').astype(datetime) if first_date == datetime.strptime(f'{first_date.year}-01-01', '%Y-%m-%d'): start_date = first_date @@ -335,7 +341,7 @@ def low_q_freq(da: DataArray, coord: str = 'date', threshold: float = 0.2) -> fl lqfs = [] while end_date < last_date: - data = da.sel({coord: slice(start_date, end_date)}) + data = da.sel({datetime_coord: slice(start_date, end_date)}) # number of steps with discharge lower than threshold * median in a one year period n_steps = (data < (threshold * mean_flow)).sum() @@ -348,7 +354,7 @@ def low_q_freq(da: DataArray, coord: str = 'date', threshold: float = 0.2) -> fl return np.mean(lqfs) -def hfd_mean(da: DataArray, coord: str = 'date') -> float: +def hfd_mean(da: DataArray, datetime_coord: str = None) -> float: """Calculate mean half-flow duration. Mean half-flow date (step on which the cumulative discharge since October 1st @@ -358,8 +364,8 @@ def hfd_mean(da: DataArray, coord: str = 'date') -> float: ---------- da : DataArray Array of flow values. - coord : str, optional - Datetime coordinate name in `da`. + datetime_coord : str, optional + Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- @@ -371,10 +377,12 @@ def hfd_mean(da: DataArray, coord: str = 'date') -> float: .. [#] Court, A.: Measures of streamflow timing. Journal of Geophysical Research (1896-1977), 1962, 67, 4335--4339, doi:10.1029/JZ067i011p04335 """ + if datetime_coord is None: + datetime_coord = utils.infer_datetime_coord(da) # determine the date of the first October 1st in the data period - first_date = da.coords[coord][0].values.astype('datetime64[s]').astype(datetime) - last_date = da.coords[coord][-1].values.astype('datetime64[s]').astype(datetime) + first_date = da.coords[datetime_coord][0].values.astype('datetime64[s]').astype(datetime) + last_date = da.coords[datetime_coord][-1].values.astype('datetime64[s]').astype(datetime) if first_date > datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d'): start_date = datetime.strptime(f'{first_date.year + 1}-10-01', '%Y-%m-%d') @@ -387,7 +395,7 @@ def hfd_mean(da: DataArray, coord: str = 'date') -> float: while end_date < last_date: # compute cumulative sum for the selected period - data = da.sel({coord: slice(start_date, end_date)}) + data = da.sel({datetime_coord: slice(start_date, end_date)}) cs = data.cumsum(skipna=True) # find steps with more cumulative discharge than the half annual sum @@ -417,7 +425,6 @@ def q5(da: DataArray) -> float: float 5th flow quantile. """ - return float(da.quantile(0.05)) @@ -498,7 +505,7 @@ def baseflow_index(da: DataArray, alpha: float = 0.98, warmup: int = 30, n_passes: int = None, - coord: str = 'date') -> Tuple[float, DataArray]: + datetime_coord: str = None) -> Tuple[float, DataArray]: """Calculate baseflow index. Ratio of mean baseflow to mean discharge [#]_. If `da` contains NaN values, the baseflow is calculated for each @@ -515,8 +522,9 @@ def baseflow_index(da: DataArray, n_passes : int, optional Number of passes (alternating forward and backward) to perform. Should be an odd number. If None, will use 3 for daily and 9 for hourly data and fail for all other input frequencies. - coord : str, optional - Datetime coordinate in `da`, used to infer the frequency if `n_passes` is None. + datetime_coord : str, optional + Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Used to infer the + frequency if `n_passes` is None. Returns ------- @@ -535,9 +543,11 @@ def baseflow_index(da: DataArray, Lyne and Hollick Filter. Australasian Journal of Water Resources, Taylor & Francis, 2013, 17, 25--34, doi:10.7158/13241583.2013.11465417 """ + if datetime_coord is None: + datetime_coord = utils.infer_datetime_coord(da) if n_passes is None: - freq = infer_frequency(da[coord].values) + freq = utils.infer_frequency(da[datetime_coord].values) if freq == '1D': n_passes = 3 elif freq == '1H': @@ -595,7 +605,7 @@ def slope_fdc(da: DataArray, lower_quantile: float = 0.33, upper_quantile: float return value -def runoff_ratio(da: DataArray, prcp: DataArray, coord: str = 'date') -> float: +def runoff_ratio(da: DataArray, prcp: DataArray, datetime_coord: str = None) -> float: """Calculate runoff ratio. Runoff ratio (ratio of mean discharge to mean precipitation) [#]_ (Eq. 2). @@ -606,8 +616,8 @@ def runoff_ratio(da: DataArray, prcp: DataArray, coord: str = 'date') -> float: Array of flow values. prcp : DataArray Array of precipitation values. - coord : str, optional - Datetime dimension name in `da`. + datetime_coord : str, optional + Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- @@ -620,11 +630,14 @@ def runoff_ratio(da: DataArray, prcp: DataArray, coord: str = 'date') -> float: analysis of hydrologic similarity based on catchment function in the eastern USA. Hydrology and Earth System Sciences, 2011, 15, 2895--2911, doi:10.5194/hess-15-2895-2011 """ + if datetime_coord is None: + datetime_coord = utils.infer_datetime_coord(da) + # rename precip coordinate name (to avoid problems with 'index' or 'date') - prcp = prcp.rename({list(prcp.coords.keys())[0]: coord}) + prcp = prcp.rename({list(prcp.coords.keys())[0]: datetime_coord}) # slice prcp to the same time window as the discharge - prcp = prcp.sel({coord: slice(da.coords[coord][0], da.coords[coord][-1])}) + prcp = prcp.sel({datetime_coord: slice(da.coords[datetime_coord][0], da.coords[datetime_coord][-1])}) # calculate runoff ratio value = da.mean() / prcp.mean() @@ -632,7 +645,7 @@ def runoff_ratio(da: DataArray, prcp: DataArray, coord: str = 'date') -> float: return float(value) -def stream_elas(da: DataArray, prcp: DataArray, coord: str = 'date') -> float: +def stream_elas(da: DataArray, prcp: DataArray, datetime_coord: str = None) -> float: """Calculate stream elasticity. Streamflow precipitation elasticity (sensitivity of streamflow to changes in precipitation at @@ -644,8 +657,8 @@ def stream_elas(da: DataArray, prcp: DataArray, coord: str = 'date') -> float: Array of flow values. prcp : DataArray Array of precipitation values. - coord : str, optional - Datetime dimension name in `da`. + datetime_coord : str, optional + Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- @@ -657,15 +670,18 @@ def stream_elas(da: DataArray, prcp: DataArray, coord: str = 'date') -> float: .. [#] Sankarasubramanian, A., Vogel, R. M., and Limbrunner, J. F.: Climate elasticity of streamflow in the United States. Water Resources Research, 2001, 37, 1771--1781, doi:10.1029/2000WR900330 """ + if datetime_coord is None: + datetime_coord = utils.infer_datetime_coord(da) + # rename precip coordinate name (to avoid problems with 'index' or 'date') - prcp = prcp.rename({list(prcp.coords.keys())[0]: coord}) + prcp = prcp.rename({list(prcp.coords.keys())[0]: datetime_coord}) # slice prcp to the same time window as the discharge - prcp = prcp.sel({coord: slice(da.coords[coord][0], da.coords[coord][-1])}) + prcp = prcp.sel({datetime_coord: slice(da.coords[datetime_coord][0], da.coords[datetime_coord][-1])}) # determine the date of the first October 1st in the data period - first_date = da.coords[coord][0].values.astype('datetime64[s]').astype(datetime) - last_date = da.coords[coord][-1].values.astype('datetime64[s]').astype(datetime) + first_date = da.coords[datetime_coord][0].values.astype('datetime64[s]').astype(datetime) + last_date = da.coords[datetime_coord][-1].values.astype('datetime64[s]').astype(datetime) if first_date > datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d'): start_date = datetime.strptime(f'{first_date.year + 1}-10-01', '%Y-%m-%d') @@ -685,8 +701,8 @@ def stream_elas(da: DataArray, prcp: DataArray, coord: str = 'date') -> float: values = [] while end_date < last_date: - q = da.sel({coord: slice(start_date, end_date)}) - p = prcp.sel({coord: slice(start_date, end_date)}) + q = da.sel({datetime_coord: slice(start_date, end_date)}) + p = prcp.sel({datetime_coord: slice(start_date, end_date)}) val = (q.mean() - q_mean_total) / (p.mean() - p_mean_total) * (p_mean_total / q_mean_total) values.append(val) diff --git a/neuralhydrology/nh_run.py b/neuralhydrology/nh_run.py index c9c4efc8..7748fb99 100644 --- a/neuralhydrology/nh_run.py +++ b/neuralhydrology/nh_run.py @@ -99,7 +99,7 @@ def continue_run(run_dir: Path, config_file: Path = None, gpu: int = None): start_training(base_config) -def eval_run(run_dir: Path, period: str, epoch: int, gpu: int = None): +def eval_run(run_dir: Path, period: str, epoch: int = None, gpu: int = None): """Start evaluating a trained model. Parameters diff --git a/readthedocs.yml b/readthedocs.yml new file mode 100644 index 00000000..e5375216 --- /dev/null +++ b/readthedocs.yml @@ -0,0 +1,2 @@ +conda: + file: environments/environment_cpu.yml