From 96e23a0d9f667aab3aefcddfcf1d065db7a421e8 Mon Sep 17 00:00:00 2001 From: hboisgon <45457510+hboisgon@users.noreply.github.com> Date: Fri, 1 Dec 2023 09:17:21 +0800 Subject: [PATCH] Add new skills in stats (#666) --- docs/api.rst | 2 + docs/changelog.rst | 2 + hydromt/stats/skills.py | 78 ++++++++++++++++++++++++++++++++++++++- tests/test_stats_skill.py | 8 ++-- 4 files changed, 85 insertions(+), 5 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index ff1ee247b..af9979484 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -829,6 +829,7 @@ Statistics and performance metrics stats.skills.bias stats.skills.percentual_bias + stats.skills.volumetric_error stats.skills.nashsutcliffe stats.skills.lognashsutcliffe stats.skills.pearson_correlation @@ -840,6 +841,7 @@ Statistics and performance metrics stats.skills.rsquared stats.skills.mse stats.skills.rmse + stats.skills.rsr Extreme Value Analysis ======================= diff --git a/docs/changelog.rst b/docs/changelog.rst index 3ecaecb5a..0a569365a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -14,6 +14,7 @@ Unreleased Added ----- - Export CLI now also accepts time tuples (#660) +- New stats.skills VE and RSR (#666) Changed ------- @@ -23,6 +24,7 @@ Fixed ----- - Bug in `raster.set_crs` if input_crs is of type CRS. (#659) - Export CLI now actually parses provided geoms. (#660) +- Bug in stats.skills for computation of pbias and MSE / RMSE. (#666) Deprecated ---------- diff --git a/hydromt/stats/skills.py b/hydromt/stats/skills.py index 024c4fdf0..7c4eb0859 100644 --- a/hydromt/stats/skills.py +++ b/hydromt/stats/skills.py @@ -68,6 +68,35 @@ def percentual_bias(sim, obs, dim="time"): return pbias +def volumetric_error(sim, obs, dim="time"): + r"""Return the volumetric error between two time series. + + .. math:: + VE= 1 - \\frac{\\sum_{i=1}^{N}(|sim_{i}-obs_{i}|)}{\\sum_{i=1}^{N}(obs_{i})} + + Parameters + ---------- + sim : xarray DataArray + simulations time series + obs : xarray DataArray + observations time series + dim : str, optional + name of time dimension in sim and obs (the default is 'time') + + Returns + ------- + xarray DataArray + volumetric error + """ + # wrap numpy function + kwargs = dict( + input_core_dims=[[dim], [dim]], dask="parallelized", output_dtypes=[float] + ) + ve = xr.apply_ufunc(_ve, sim, obs, **kwargs) + ve.name = "ve" + return ve + + def nashsutcliffe(sim, obs, dim="time"): r"""Return the Nash-Sutcliffe model efficiency. @@ -351,6 +380,33 @@ def rmse(sim, obs, dim="time"): return rmse +def rsr(sim, obs, dim="time"): + r"""Return the RMSE-observations standard deviation (RSR) between two time series. + + .. math:: + RSR=\\frac{RMSE}{STDEVobs} + + Parameters + ---------- + sim : xarray DataArray + simulations time series + obs : xarray DataArray + observations time series + + Returns + ------- + xarray DataArray + the root squared error + """ + # wrap numpy function + kwargs = dict( + input_core_dims=[[dim], [dim]], dask="parallelized", output_dtypes=[float] + ) + rsr = xr.apply_ufunc(_rsr, sim, obs, **kwargs) + rsr.name = "rsr" + return rsr + + def kge(sim, obs, dim="time"): r"""Return the Kling-Gupta Efficiency (KGE) of two time series. @@ -458,15 +514,33 @@ def _bias(sim, obs, axis=-1): def _pbias(sim, obs, axis=-1): """Percentual bias.""" - return np.nansum(sim - obs, axis=axis) / np.nansum(obs, axis=axis) + return 100 * np.nansum(sim - obs, axis=axis) / np.nansum(obs, axis=axis) + + +def _ve(sim, obs, axis=-1): + """Volumetric Error.""" + ve = 1 - np.nansum(np.absolute(sim - obs), axis=axis) / np.nansum(obs, axis=axis) + return ve def _mse(sim, obs, axis=-1): """Mean squared error.""" - mse = np.nansum((obs - sim) ** 2, axis=axis) + mse = np.nansum((obs - sim) ** 2, axis=axis) / np.nansum( + np.isfinite(obs), axis=axis + ) return mse +def _rsr(sim, obs, axis=-1): + """RMSE-observations standard deviation.""" + rmse = np.sqrt(_mse(sim, obs, axis=axis)) + stdev = np.sqrt( + np.nansum((obs - np.nanmean(obs, axis=axis)) ** 2, axis=axis) + / np.nansum(np.isfinite(obs), axis=axis) + ) + return rmse / stdev + + def _nse(sim, obs, axis=-1): """nash-sutcliffe efficiency.""" obs_mean = np.nanmean(obs, axis=axis) diff --git a/tests/test_stats_skill.py b/tests/test_stats_skill.py index 09cbe6b25..e64a24f97 100644 --- a/tests/test_stats_skill.py +++ b/tests/test_stats_skill.py @@ -9,7 +9,8 @@ def test_skills(obsda): simda = obsda + 5.0 assert np.isclose(skills.bias(simda, obsda).values, 5.0) - assert np.isclose(skills.percentual_bias(simda, obsda).values, 0.1033) + assert np.isclose(skills.percentual_bias(simda, obsda).values, 10.3299) + assert np.isclose(skills.volumetric_error(simda, obsda).values, 0.8967) assert np.isclose(skills.nashsutcliffe(simda, obsda).values, 0.97015) assert np.isclose(skills.lognashsutcliffe(simda, obsda).values, 0.8517) assert np.isclose(skills.pearson_correlation(simda, obsda).values, 1.0) @@ -21,5 +22,6 @@ def test_skills(obsda): skills.kge_non_parametric_flood(simda, obsda)["kge_np_flood"].values, 0.8939 ) assert np.isclose(skills.rsquared(simda, obsda).values, 1.0) - assert np.isclose(skills.mse(simda, obsda).values, 9125.0) - assert np.isclose(skills.rmse(simda, obsda).values, 95.5249) + assert np.isclose(skills.mse(simda, obsda).values, 25.0) + assert np.isclose(skills.rmse(simda, obsda).values, 5.0) + assert np.isclose(skills.rsr(simda, obsda).values, 0.1727659)