diff --git a/Makefile b/Makefile index c39eaa318..0abce0b4c 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ doc: doc/_build/html/index.html analysis: flake8 flake8: build - @$(PYTHON) -m flake8 --extend-ignore=E203 --max-line-length=95 src/$(PROJECT) + @$(PYTHON) -m flake8 src/$(PROJECT) ## pylint is garbage, also see https://lukeplant.me.uk/blog/posts/pylint-false-positives/#running-total # pylint: build diff --git a/setup.cfg b/setup.cfg index 6c3beee4d..bc8ee8657 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,6 +4,10 @@ testpaths = src/iminuit [aliases] test=pytest +[flake8] +max-line-length = 95 +ignore = E203,W503 + [check-manifest] ignore = .ci/** diff --git a/src/iminuit/cost.py b/src/iminuit/cost.py index ed1986852..84523382c 100644 --- a/src/iminuit/cost.py +++ b/src/iminuit/cost.py @@ -65,6 +65,7 @@ class UnbinnedNLL: unbinned data is available. """ + mask = None verbose = False errordef = 0.5 @@ -91,7 +92,8 @@ def __init__(self, data, pdf, verbose=0): self.func_code = make_func_code(describe(self.pdf)[1:]) def __call__(self, *args): - r = -_sum_log_x(self.pdf(self.data, *args)) + data = self.data if self.mask is None else self.data[self.mask] + r = -_sum_log_x(self.pdf(data, *args)) if self.verbose >= 1: print(args, "->", r) return r @@ -104,6 +106,7 @@ class ExtendedUnbinnedNLL: original unbinned data is available. """ + mask = None verbose = False errordef = 0.5 @@ -126,13 +129,14 @@ def __init__(self, data, scaled_pdf, verbose=0): - 0: is no output (default) - 1: print current args and negative log-likelihood value """ - self.data = data + self.data = np.atleast_1d(data) self.scaled_pdf = scaled_pdf self.verbose = verbose self.func_code = make_func_code(describe(self.scaled_pdf)[1:]) def __call__(self, *args): - ns, s = self.scaled_pdf(self.data, *args) + data = self.data if self.mask is None else self.data[self.mask] + ns, s = self.scaled_pdf(data, *args) r = ns - _sum_log_x(s) if self.verbose >= 1: print(args, "->", r) @@ -145,6 +149,7 @@ class BinnedNLL: Use this if only the shape of the fitted PDF is of interest and the data is binned. """ + mask = None verbose = False errordef = 0.5 @@ -168,18 +173,29 @@ def __init__(self, n, xe, cdf, verbose=0): - 0: is no output (default) - 1: print current args and negative log-likelihood value """ + n = np.atleast_1d(n) + xe = np.atleast_1d(xe) + + if np.any((np.array(n.shape) + 1) != xe.shape): + raise ValueError("n and xe have incompatible shapes") + self.n = n self.xe = xe - self.tot = np.sum(n) self.cdf = cdf self.verbose = verbose self.func_code = make_func_code(describe(self.cdf)[1:]) def __call__(self, *args): - c = self.cdf(self.xe, *args) - mu = self.tot * np.diff(c) + prob = np.diff(self.cdf(self.xe, *args)) + ma = self.mask + if ma is None: + n = self.n + else: + n = self.n[ma] + prob = prob[ma] + mu = np.sum(n) * prob # + np.sum(mu) can be skipped, it is effectively constant - r = -_sum_n_log_mu(self.n, mu) + r = -_sum_n_log_mu(n, mu) if self.verbose >= 1: print(args, "->", r) return r @@ -192,6 +208,7 @@ class ExtendedBinnedNLL: is binned. """ + mask = None verbose = False errordef = 0.5 @@ -215,6 +232,12 @@ def __init__(self, n, xe, scaled_cdf, verbose=0): - 0: is no output (default) - 1: print current args and negative log-likelihood value """ + n = np.atleast_1d(n) + xe = np.atleast_1d(xe) + + if np.any((np.array(n.shape) + 1) != xe.shape): + raise ValueError("n and xe have incompatible shapes") + self.n = n self.xe = xe self.scaled_cdf = scaled_cdf @@ -223,7 +246,13 @@ def __init__(self, n, xe, scaled_cdf, verbose=0): def __call__(self, *args): mu = np.diff(self.scaled_cdf(self.xe, *args)) - r = _sum_log_poisson(self.n, mu) + ma = self.mask + if ma is None: + n = self.n + else: + n = self.n[ma] + mu = mu[ma] + r = _sum_log_poisson(n, mu) if self.verbose >= 1: print(args, "->", r) return r @@ -235,6 +264,7 @@ class LeastSquares: Use this if you have data of the form (x, y +/- yerror). """ + mask = None verbose = False errordef = 1.0 @@ -246,10 +276,11 @@ def __init__(self, x, y, yerror, model, loss="linear", verbose=0): Locations where the model is evaluated. y: array-like - Observed values. + Observed values. Must have the same length as `x`. - yerror: array-like - Estimated uncertainty of observed values. + yerror: array-like or float + Estimated uncertainty of observed values. Must have same shape as `y` or + be a scalar, which is then broadcasted to same shape as `y`. model: callable Function of the form f(x, par0, par1, ..., parN) whose output is compared @@ -269,9 +300,22 @@ def __init__(self, x, y, yerror, model, loss="linear", verbose=0): - 0: is no output (default) - 1: print current args and negative log-likelihood value """ + x = np.atleast_1d(x) + y = np.atleast_1d(y) + + if len(x) != len(y): + raise ValueError("x and y must have same length") + + if np.ndim(yerror) == 0: + yerror = yerror * np.ones_like(y) + else: + if np.shape(yerror) != y.shape: + raise ValueError("y and yerror must have same shape") + self.x = x self.y = y self.yerror = yerror + self.model = model if hasattr(loss, "__call__"): self.cost = lambda y, ye, ym: np.sum(loss(_z_squared(y, ye, ym))) @@ -285,7 +329,17 @@ def __init__(self, x, y, yerror, model, loss="linear", verbose=0): self.func_code = make_func_code(describe(self.model)[1:]) def __call__(self, *args): - r = self.cost(self.y, self.yerror, self.model(self.x, *args)) + ma = self.mask + if ma is None: + x = self.x + y = self.y + yerror = self.yerror + else: + x = self.x[ma] + y = self.y[ma] + yerror = self.yerror[ma] + ym = self.model(x, *args) + r = self.cost(y, yerror, ym) if self.verbose >= 1: print(args, "->", r) return r diff --git a/src/iminuit/tests/test_cost.py b/src/iminuit/tests/test_cost.py index e6892fbdc..a292c294b 100644 --- a/src/iminuit/tests/test_cost.py +++ b/src/iminuit/tests/test_cost.py @@ -14,66 +14,102 @@ norm = stats.norm -np.random.seed(1) -x = np.random.randn(1000) -nx, xe = np.histogram(x, bins=50, range=(-3, 3)) -truth = (len(x), np.mean(x), np.std(x, ddof=1)) +@pytest.fixture +def unbinned(): + np.random.seed(1) + x = np.random.randn(1000) + mle = (len(x), np.mean(x), np.std(x, ddof=1)) + return mle, x + + +@pytest.fixture +def binned(unbinned): + mle, x = unbinned + nx, xe = np.histogram(x, bins=50, range=(-3, 3)) + return mle, nx, xe -def test_UnbinnedNLL(): +@pytest.mark.parametrize("verbose", (0, 1)) +def test_UnbinnedNLL(unbinned, verbose): + mle, x = unbinned + def pdf(x, mu, sigma): return norm(mu, sigma).pdf(x) - m = Minuit(UnbinnedNLL(x, pdf, verbose=2), mu=0, sigma=1, limit_sigma=(0, None)) + cost = UnbinnedNLL(x, pdf, verbose=verbose) + m = Minuit(cost, mu=0, sigma=1, limit_sigma=(0, None)) m.migrad() - assert_allclose(m.args, truth[1:], atol=1e-3) + assert_allclose(m.args, mle[1:], atol=1e-3) + # add bad value and mask it out + cost.data[1] = np.nan + cost.mask = np.arange(len(cost.data)) != 1 + m.migrad() + assert_allclose(m.args, mle[1:], rtol=0.02) + + +@pytest.mark.parametrize("verbose", (0, 1)) +def test_ExtendedUnbinnedNLL(unbinned, verbose): + mle, x = unbinned -def test_ExtendedUnbinnedNLL(): def scaled_pdf(x, n, mu, sigma): return n, n * norm(mu, sigma).pdf(x) - m = Minuit( - ExtendedUnbinnedNLL(x, scaled_pdf), - n=len(x), - mu=0, - sigma=1, - limit_n=(0, None), - limit_sigma=(0, None), - ) + cost = ExtendedUnbinnedNLL(x, scaled_pdf, verbose=verbose) + m = Minuit(cost, n=len(x), mu=0, sigma=1, limit_n=(0, None), limit_sigma=(0, None),) + m.migrad() + assert_allclose(m.args, mle, atol=1e-3) + + # add bad value and mask it out + cost.data[1] = np.nan + cost.mask = np.arange(len(cost.data)) != 1 m.migrad() - assert_allclose(m.args, truth, atol=1e-3) + assert_allclose(m.args, mle, rtol=0.02) -def test_BinnedNLL(): +@pytest.mark.parametrize("verbose", (0, 1)) +def test_BinnedNLL(binned, verbose): + mle, nx, xe = binned + def cdf(x, mu, sigma): return norm(mu, sigma).cdf(x) - m = Minuit(BinnedNLL(nx, xe, cdf), mu=0, sigma=1, limit_sigma=(0, None)) + cost = BinnedNLL(nx, xe, cdf, verbose=verbose) + m = Minuit(cost, mu=0, sigma=1, limit_sigma=(0, None)) m.migrad() # binning loses information compared to unbinned case - assert_allclose(m.args, truth[1:], rtol=0.15) + assert_allclose(m.args, mle[1:], rtol=0.15) + + # add bad value and mask it out + cost.n[1] = -1000 + cost.mask = np.arange(len(cost.n)) != 1 + m.migrad() + assert_allclose(m.args, mle[1:], atol=0.04) -def test_ExtendedBinnedNLL(): +@pytest.mark.parametrize("verbose", (0, 1)) +def test_ExtendedBinnedNLL(binned, verbose): + mle, nx, xe = binned + def scaled_cdf(x, n, mu, sigma): return n * norm(mu, sigma).cdf(x) - m = Minuit( - ExtendedBinnedNLL(nx, xe, scaled_cdf), - n=len(x), - mu=0, - sigma=1, - limit_n=(0, None), - limit_sigma=(0, None), - ) + cost = ExtendedBinnedNLL(nx, xe, scaled_cdf, verbose=verbose) + m = Minuit(cost, n=mle[0], mu=0, sigma=1, limit_n=(0, None), limit_sigma=(0, None)) m.migrad() # binning loses information compared to unbinned case - assert_allclose(m.args, truth, rtol=0.15) + assert_allclose(m.args, mle, rtol=0.15) + + # add bad value and mask it out + cost.n[1] = -1000 + cost.mask = np.arange(len(cost.n)) != 1 + m.migrad() + assert_allclose(m.args, mle, rtol=0.06) @pytest.mark.parametrize("loss", ["linear", "soft_l1", np.arctan]) -def test_LeastSquares(loss): +@pytest.mark.parametrize("verbose", (0, 1)) +def test_LeastSquares(loss, verbose): np.random.seed(1) x = np.random.rand(20) y = 2 * x + 1 @@ -83,6 +119,13 @@ def test_LeastSquares(loss): def model(x, a, b): return a + b * x - m = Minuit(LeastSquares(x, y, ye, model, loss=loss), a=0, b=0) + cost = LeastSquares(x, y, ye, model, loss=loss, verbose=verbose) + m = Minuit(cost, a=0, b=0) + m.migrad() + assert_allclose(m.args, (1, 2), rtol=0.03) + + # add bad value and mask it out + cost.y[1] = np.nan + cost.mask = np.arange(len(y)) != 1 m.migrad() assert_allclose(m.args, (1, 2), rtol=0.03)