Skip to content

Commit

Permalink
make cost functions maskable (scikit-hep#444)
Browse files Browse the repository at this point in the history
  • Loading branch information
HDembinski authored Jun 30, 2020
1 parent 7b696b8 commit e7301a0
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 45 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ doc: doc/_build/html/index.html
analysis: flake8

flake8: build
@$(PYTHON) -m flake8 --extend-ignore=E203 --max-line-length=95 src/$(PROJECT)
@$(PYTHON) -m flake8 src/$(PROJECT)

## pylint is garbage, also see https://lukeplant.me.uk/blog/posts/pylint-false-positives/#running-total
# pylint: build
Expand Down
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ testpaths = src/iminuit
[aliases]
test=pytest

[flake8]
max-line-length = 95
ignore = E203,W503

[check-manifest]
ignore =
.ci/**
Expand Down
78 changes: 66 additions & 12 deletions src/iminuit/cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class UnbinnedNLL:
unbinned data is available.
"""

mask = None
verbose = False
errordef = 0.5

Expand All @@ -91,7 +92,8 @@ def __init__(self, data, pdf, verbose=0):
self.func_code = make_func_code(describe(self.pdf)[1:])

def __call__(self, *args):
r = -_sum_log_x(self.pdf(self.data, *args))
data = self.data if self.mask is None else self.data[self.mask]
r = -_sum_log_x(self.pdf(data, *args))
if self.verbose >= 1:
print(args, "->", r)
return r
Expand All @@ -104,6 +106,7 @@ class ExtendedUnbinnedNLL:
original unbinned data is available.
"""

mask = None
verbose = False
errordef = 0.5

Expand All @@ -126,13 +129,14 @@ def __init__(self, data, scaled_pdf, verbose=0):
- 0: is no output (default)
- 1: print current args and negative log-likelihood value
"""
self.data = data
self.data = np.atleast_1d(data)
self.scaled_pdf = scaled_pdf
self.verbose = verbose
self.func_code = make_func_code(describe(self.scaled_pdf)[1:])

def __call__(self, *args):
ns, s = self.scaled_pdf(self.data, *args)
data = self.data if self.mask is None else self.data[self.mask]
ns, s = self.scaled_pdf(data, *args)
r = ns - _sum_log_x(s)
if self.verbose >= 1:
print(args, "->", r)
Expand All @@ -145,6 +149,7 @@ class BinnedNLL:
Use this if only the shape of the fitted PDF is of interest and the data is binned.
"""

mask = None
verbose = False
errordef = 0.5

Expand All @@ -168,18 +173,29 @@ def __init__(self, n, xe, cdf, verbose=0):
- 0: is no output (default)
- 1: print current args and negative log-likelihood value
"""
n = np.atleast_1d(n)
xe = np.atleast_1d(xe)

if np.any((np.array(n.shape) + 1) != xe.shape):
raise ValueError("n and xe have incompatible shapes")

self.n = n
self.xe = xe
self.tot = np.sum(n)
self.cdf = cdf
self.verbose = verbose
self.func_code = make_func_code(describe(self.cdf)[1:])

def __call__(self, *args):
c = self.cdf(self.xe, *args)
mu = self.tot * np.diff(c)
prob = np.diff(self.cdf(self.xe, *args))
ma = self.mask
if ma is None:
n = self.n
else:
n = self.n[ma]
prob = prob[ma]
mu = np.sum(n) * prob
# + np.sum(mu) can be skipped, it is effectively constant
r = -_sum_n_log_mu(self.n, mu)
r = -_sum_n_log_mu(n, mu)
if self.verbose >= 1:
print(args, "->", r)
return r
Expand All @@ -192,6 +208,7 @@ class ExtendedBinnedNLL:
is binned.
"""

mask = None
verbose = False
errordef = 0.5

Expand All @@ -215,6 +232,12 @@ def __init__(self, n, xe, scaled_cdf, verbose=0):
- 0: is no output (default)
- 1: print current args and negative log-likelihood value
"""
n = np.atleast_1d(n)
xe = np.atleast_1d(xe)

if np.any((np.array(n.shape) + 1) != xe.shape):
raise ValueError("n and xe have incompatible shapes")

self.n = n
self.xe = xe
self.scaled_cdf = scaled_cdf
Expand All @@ -223,7 +246,13 @@ def __init__(self, n, xe, scaled_cdf, verbose=0):

def __call__(self, *args):
mu = np.diff(self.scaled_cdf(self.xe, *args))
r = _sum_log_poisson(self.n, mu)
ma = self.mask
if ma is None:
n = self.n
else:
n = self.n[ma]
mu = mu[ma]
r = _sum_log_poisson(n, mu)
if self.verbose >= 1:
print(args, "->", r)
return r
Expand All @@ -235,6 +264,7 @@ class LeastSquares:
Use this if you have data of the form (x, y +/- yerror).
"""

mask = None
verbose = False
errordef = 1.0

Expand All @@ -246,10 +276,11 @@ def __init__(self, x, y, yerror, model, loss="linear", verbose=0):
Locations where the model is evaluated.
y: array-like
Observed values.
Observed values. Must have the same length as `x`.
yerror: array-like
Estimated uncertainty of observed values.
yerror: array-like or float
Estimated uncertainty of observed values. Must have same shape as `y` or
be a scalar, which is then broadcasted to same shape as `y`.
model: callable
Function of the form f(x, par0, par1, ..., parN) whose output is compared
Expand All @@ -269,9 +300,22 @@ def __init__(self, x, y, yerror, model, loss="linear", verbose=0):
- 0: is no output (default)
- 1: print current args and negative log-likelihood value
"""
x = np.atleast_1d(x)
y = np.atleast_1d(y)

if len(x) != len(y):
raise ValueError("x and y must have same length")

if np.ndim(yerror) == 0:
yerror = yerror * np.ones_like(y)
else:
if np.shape(yerror) != y.shape:
raise ValueError("y and yerror must have same shape")

self.x = x
self.y = y
self.yerror = yerror

self.model = model
if hasattr(loss, "__call__"):
self.cost = lambda y, ye, ym: np.sum(loss(_z_squared(y, ye, ym)))
Expand All @@ -285,7 +329,17 @@ def __init__(self, x, y, yerror, model, loss="linear", verbose=0):
self.func_code = make_func_code(describe(self.model)[1:])

def __call__(self, *args):
r = self.cost(self.y, self.yerror, self.model(self.x, *args))
ma = self.mask
if ma is None:
x = self.x
y = self.y
yerror = self.yerror
else:
x = self.x[ma]
y = self.y[ma]
yerror = self.yerror[ma]
ym = self.model(x, *args)
r = self.cost(y, yerror, ym)
if self.verbose >= 1:
print(args, "->", r)
return r
107 changes: 75 additions & 32 deletions src/iminuit/tests/test_cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,66 +14,102 @@
norm = stats.norm


np.random.seed(1)
x = np.random.randn(1000)
nx, xe = np.histogram(x, bins=50, range=(-3, 3))
truth = (len(x), np.mean(x), np.std(x, ddof=1))
@pytest.fixture
def unbinned():
np.random.seed(1)
x = np.random.randn(1000)
mle = (len(x), np.mean(x), np.std(x, ddof=1))
return mle, x


@pytest.fixture
def binned(unbinned):
mle, x = unbinned
nx, xe = np.histogram(x, bins=50, range=(-3, 3))
return mle, nx, xe


def test_UnbinnedNLL():
@pytest.mark.parametrize("verbose", (0, 1))
def test_UnbinnedNLL(unbinned, verbose):
mle, x = unbinned

def pdf(x, mu, sigma):
return norm(mu, sigma).pdf(x)

m = Minuit(UnbinnedNLL(x, pdf, verbose=2), mu=0, sigma=1, limit_sigma=(0, None))
cost = UnbinnedNLL(x, pdf, verbose=verbose)
m = Minuit(cost, mu=0, sigma=1, limit_sigma=(0, None))
m.migrad()
assert_allclose(m.args, truth[1:], atol=1e-3)
assert_allclose(m.args, mle[1:], atol=1e-3)

# add bad value and mask it out
cost.data[1] = np.nan
cost.mask = np.arange(len(cost.data)) != 1
m.migrad()
assert_allclose(m.args, mle[1:], rtol=0.02)


@pytest.mark.parametrize("verbose", (0, 1))
def test_ExtendedUnbinnedNLL(unbinned, verbose):
mle, x = unbinned

def test_ExtendedUnbinnedNLL():
def scaled_pdf(x, n, mu, sigma):
return n, n * norm(mu, sigma).pdf(x)

m = Minuit(
ExtendedUnbinnedNLL(x, scaled_pdf),
n=len(x),
mu=0,
sigma=1,
limit_n=(0, None),
limit_sigma=(0, None),
)
cost = ExtendedUnbinnedNLL(x, scaled_pdf, verbose=verbose)
m = Minuit(cost, n=len(x), mu=0, sigma=1, limit_n=(0, None), limit_sigma=(0, None),)
m.migrad()
assert_allclose(m.args, mle, atol=1e-3)

# add bad value and mask it out
cost.data[1] = np.nan
cost.mask = np.arange(len(cost.data)) != 1
m.migrad()
assert_allclose(m.args, truth, atol=1e-3)
assert_allclose(m.args, mle, rtol=0.02)


def test_BinnedNLL():
@pytest.mark.parametrize("verbose", (0, 1))
def test_BinnedNLL(binned, verbose):
mle, nx, xe = binned

def cdf(x, mu, sigma):
return norm(mu, sigma).cdf(x)

m = Minuit(BinnedNLL(nx, xe, cdf), mu=0, sigma=1, limit_sigma=(0, None))
cost = BinnedNLL(nx, xe, cdf, verbose=verbose)
m = Minuit(cost, mu=0, sigma=1, limit_sigma=(0, None))
m.migrad()
# binning loses information compared to unbinned case
assert_allclose(m.args, truth[1:], rtol=0.15)
assert_allclose(m.args, mle[1:], rtol=0.15)

# add bad value and mask it out
cost.n[1] = -1000
cost.mask = np.arange(len(cost.n)) != 1
m.migrad()
assert_allclose(m.args, mle[1:], atol=0.04)


def test_ExtendedBinnedNLL():
@pytest.mark.parametrize("verbose", (0, 1))
def test_ExtendedBinnedNLL(binned, verbose):
mle, nx, xe = binned

def scaled_cdf(x, n, mu, sigma):
return n * norm(mu, sigma).cdf(x)

m = Minuit(
ExtendedBinnedNLL(nx, xe, scaled_cdf),
n=len(x),
mu=0,
sigma=1,
limit_n=(0, None),
limit_sigma=(0, None),
)
cost = ExtendedBinnedNLL(nx, xe, scaled_cdf, verbose=verbose)
m = Minuit(cost, n=mle[0], mu=0, sigma=1, limit_n=(0, None), limit_sigma=(0, None))
m.migrad()
# binning loses information compared to unbinned case
assert_allclose(m.args, truth, rtol=0.15)
assert_allclose(m.args, mle, rtol=0.15)

# add bad value and mask it out
cost.n[1] = -1000
cost.mask = np.arange(len(cost.n)) != 1
m.migrad()
assert_allclose(m.args, mle, rtol=0.06)


@pytest.mark.parametrize("loss", ["linear", "soft_l1", np.arctan])
def test_LeastSquares(loss):
@pytest.mark.parametrize("verbose", (0, 1))
def test_LeastSquares(loss, verbose):
np.random.seed(1)
x = np.random.rand(20)
y = 2 * x + 1
Expand All @@ -83,6 +119,13 @@ def test_LeastSquares(loss):
def model(x, a, b):
return a + b * x

m = Minuit(LeastSquares(x, y, ye, model, loss=loss), a=0, b=0)
cost = LeastSquares(x, y, ye, model, loss=loss, verbose=verbose)
m = Minuit(cost, a=0, b=0)
m.migrad()
assert_allclose(m.args, (1, 2), rtol=0.03)

# add bad value and mask it out
cost.y[1] = np.nan
cost.mask = np.arange(len(y)) != 1
m.migrad()
assert_allclose(m.args, (1, 2), rtol=0.03)

0 comments on commit e7301a0

Please sign in to comment.