From 51832c3d43972a1c7e2add9e3d8330a23b1453e1 Mon Sep 17 00:00:00 2001 From: Hans Dembinski Date: Mon, 12 Feb 2024 17:13:37 +0100 Subject: [PATCH] Remove resample.bootstrap.bias and resample.bootstrap.bias_corrected (#181) The implementation of resample.bootstrap.bias was removed because of several flaws. It was found to give very wrong results when used with `method="extended"` for parameters sensitive to the sample size. It was only an approximation to the correct implementation of the bias formula, valid only in the limit of very large bootstrap samples. This was not properly stated in the documentation. It is not obvious how this general flaw can be fixed in a reasonable manner, so the decision is to remove the bias estimation via bootstrap altogether. --- src/resample/_deprecated.py | 49 -------------- src/resample/bootstrap.py | 124 ++++-------------------------------- tests/test_bootstrap.py | 10 +++ 3 files changed, 22 insertions(+), 161 deletions(-) delete mode 100644 src/resample/_deprecated.py diff --git a/src/resample/_deprecated.py b/src/resample/_deprecated.py deleted file mode 100644 index c364e4f..0000000 --- a/src/resample/_deprecated.py +++ /dev/null @@ -1,49 +0,0 @@ -import warnings -from numpy import VisibleDeprecationWarning -from typing import TypeVar, Callable, Any - -T = TypeVar("T") - - -class deprecated: - """Deprecate function of method.""" - - def __init__(self, reason: str): - """Initialize the decorator with a reason.""" - self._reason = reason - - def __call__(self, func: Callable[..., T]) -> Callable[..., T]: - """Wrap the target function or method.""" - - def decorated_func(*args: Any, **kwargs: Any) -> T: - warnings.warn( - f"{func.__name__} is deprecated: {self._reason}", - category=VisibleDeprecationWarning, - stacklevel=2, - ) - return func(*args, **kwargs) - - decorated_func.__name__ = func.__name__ - decorated_func.__doc__ = "deprecated: " + self._reason - return decorated_func - - -class deprecated_parameter: - def __init__(self, **replacements: str): - self._replacements = replacements - - def __call__(self, func: Callable[..., T]) -> Callable[..., T]: - def decorated_func(*args: Any, **kwargs: Any) -> T: - for new, old in self._replacements.items(): - if old in kwargs: - warnings.warn( - f"keyword {old!r} is deprecated, please use {new!r}", - category=VisibleDeprecationWarning, - stacklevel=2, - ) - kwargs[new] = kwargs[old] - del kwargs[old] - return func(*args, **kwargs) - - decorated_func.__name__ = func.__name__ - return decorated_func diff --git a/src/resample/bootstrap.py b/src/resample/bootstrap.py index 6732fde..7552cfc 100644 --- a/src/resample/bootstrap.py +++ b/src/resample/bootstrap.py @@ -38,7 +38,6 @@ from . import _util from .empirical import quantile_function_gen from .jackknife import jackknife -from ._deprecated import deprecated def resample( @@ -287,117 +286,6 @@ def bootstrap( return np.array([fn(x) for x in gen]) -@deprecated( - "bootstrap.bias is deprecated and will be removed in a future revision, " - "use jackknife.bias instead" -) -def bias( - fn: Callable[..., np.ndarray], - sample: "ArrayLike", - *args: "ArrayLike", - **kwargs: Any, -) -> np.ndarray: - """ - Calculate bias of the function estimate with the bootstrap. - - Parameters - ---------- - fn : callable - Function to be bootstrapped. - sample : array-like - Original sample. - *args : array-like - Optional additional arrays of the same length to resample. - **kwargs - Keyword arguments forwarded to :func:`resample`. - - Returns - ------- - ndarray - Bootstrap estimate of bias (= expectation of estimator - true value). - - Examples - -------- - Compute bias of numpy.var with and without bias-correction. - - >>> from resample.bootstrap import bias - >>> import numpy as np - >>> x = np.arange(10) - >>> round(bias(np.var, x, size=10000, random_state=1), 1) - -0.8 - >>> round(bias(lambda x: np.var(x, ddof=1), x, size=10000, random_state=1), 1) - 0.0 - - Notes - ----- - This function has special space requirements, it needs to hold `size` replicates of - the original sample in memory at once. The balanced bootstrap is recommended over - the ordinary bootstrap for bias estimation, it tends to converge faster. - - """ - thetas = [] - if args: - replicates: List[List[np.ndarray]] = [[] for _ in range(len(args) + 1)] - for b in resample(sample, *args, **kwargs): - for ri, bi in zip(replicates, b): - ri.append(bi) - thetas.append(fn(*b)) - population_theta = fn(*(np.concatenate(r) for r in replicates)) - else: - replicates = [] - for b in resample(sample, *args, **kwargs): - replicates.append(b) - thetas.append(fn(b)) - population_theta = fn(np.concatenate(replicates)) - return np.mean(thetas, axis=0) - population_theta - - -@deprecated( - "bootstrap.bias is deprecated and will be removed in a future revision, " - "use jackknife.bias instead" -) -def bias_corrected( - fn: Callable[..., np.ndarray], - sample: "ArrayLike", - *args: "ArrayLike", - **kwargs: Any, -) -> np.ndarray: - """ - Calculate bias-corrected estimate of the function with the bootstrap. - - Parameters - ---------- - fn : callable - Estimator. Can be any mapping ℝⁿ → ℝᵏ, where n is the sample size - and k is the length of the output array. - sample : array-like - Original sample. - *args : array-like - Optional additional arrays of the same length to resample. - **kwargs - Keyword arguments forwarded to :func:`resample`. - - Returns - ------- - ndarray - Estimate with some bias removed. - - Examples - -------- - Compute bias-corrected estimate of numpy.var. - - >>> from resample.bootstrap import bias_corrected - >>> import numpy as np - >>> x = np.arange(10) - >>> round(np.var(x), 1) - 8.2 - >>> round(bias_corrected(np.var, x, size=10000, random_state=1), 1) - 9.1 - - """ - return fn(sample, *args) - bias(fn, sample, *args, **kwargs) - - def variance( fn: Callable[..., np.ndarray], sample: "ArrayLike", @@ -682,3 +570,15 @@ def _confidence_interval_bca( quant = quantile_function_gen(thetas) return quant(p_low), quant(p_high) + + +def __getattr__(key: str) -> Any: + for match in ("bias", "bias_corrected"): + if key == match: + msg = ( + f"resample.bootstrap.{match} has been removed. The implementation was " + "discovered to be faulty, and a generic fix is not in sight. " + "Please use resample.jackknife.bias instead." + ) + raise NotImplementedError(msg) + raise AttributeError diff --git a/tests/test_bootstrap.py b/tests/test_bootstrap.py index 75df0d3..4e5f882 100644 --- a/tests/test_bootstrap.py +++ b/tests/test_bootstrap.py @@ -486,3 +486,13 @@ def test_resample_extended_5(): mu2 = 3**2 * np.sum(x, axis=0) assert_allclose(t1, (mu1, 3**2 * mu1), rtol=0.05) assert_allclose(t2, (mu2, 3**2 * mu2), rtol=0.05) + + +def test_bias_error(): + with pytest.raises(NotImplementedError): + from resample.bootstrap import bias # noqa + + with pytest.raises(NotImplementedError): + import resample.bootstrap as b + + b.bias_corrected # noqa