diff --git a/Makefile b/Makefile
index c39eaa318..0abce0b4c 100644
--- a/Makefile
+++ b/Makefile
@@ -53,7 +53,7 @@ doc: doc/_build/html/index.html
 analysis: flake8
 
 flake8: build
-	@$(PYTHON) -m flake8 --extend-ignore=E203 --max-line-length=95 src/$(PROJECT)
+	@$(PYTHON) -m flake8 src/$(PROJECT)
 
 ## pylint is garbage, also see https://lukeplant.me.uk/blog/posts/pylint-false-positives/#running-total
 # pylint: build
diff --git a/setup.cfg b/setup.cfg
index 6c3beee4d..bc8ee8657 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,6 +4,10 @@ testpaths = src/iminuit
 [aliases]
 test=pytest
 
+[flake8]
+max-line-length = 95
+ignore = E203,W503
+
 [check-manifest]
 ignore =
     .ci/**
diff --git a/src/iminuit/cost.py b/src/iminuit/cost.py
index ed1986852..84523382c 100644
--- a/src/iminuit/cost.py
+++ b/src/iminuit/cost.py
@@ -65,6 +65,7 @@ class UnbinnedNLL:
     unbinned data is available.
     """
 
+    mask = None
     verbose = False
     errordef = 0.5
 
@@ -91,7 +92,8 @@ def __init__(self, data, pdf, verbose=0):
         self.func_code = make_func_code(describe(self.pdf)[1:])
 
     def __call__(self, *args):
-        r = -_sum_log_x(self.pdf(self.data, *args))
+        data = self.data if self.mask is None else self.data[self.mask]
+        r = -_sum_log_x(self.pdf(data, *args))
         if self.verbose >= 1:
             print(args, "->", r)
         return r
@@ -104,6 +106,7 @@ class ExtendedUnbinnedNLL:
     original unbinned data is available.
     """
 
+    mask = None
     verbose = False
     errordef = 0.5
 
@@ -126,13 +129,14 @@ def __init__(self, data, scaled_pdf, verbose=0):
             - 0: is no output (default)
             - 1: print current args and negative log-likelihood value
         """
-        self.data = data
+        self.data = np.atleast_1d(data)
         self.scaled_pdf = scaled_pdf
         self.verbose = verbose
         self.func_code = make_func_code(describe(self.scaled_pdf)[1:])
 
     def __call__(self, *args):
-        ns, s = self.scaled_pdf(self.data, *args)
+        data = self.data if self.mask is None else self.data[self.mask]
+        ns, s = self.scaled_pdf(data, *args)
         r = ns - _sum_log_x(s)
         if self.verbose >= 1:
             print(args, "->", r)
@@ -145,6 +149,7 @@ class BinnedNLL:
     Use this if only the shape of the fitted PDF is of interest and the data is binned.
     """
 
+    mask = None
     verbose = False
     errordef = 0.5
 
@@ -168,18 +173,29 @@ def __init__(self, n, xe, cdf, verbose=0):
             - 0: is no output (default)
             - 1: print current args and negative log-likelihood value
         """
+        n = np.atleast_1d(n)
+        xe = np.atleast_1d(xe)
+
+        if np.any((np.array(n.shape) + 1) != xe.shape):
+            raise ValueError("n and xe have incompatible shapes")
+
         self.n = n
         self.xe = xe
-        self.tot = np.sum(n)
         self.cdf = cdf
         self.verbose = verbose
         self.func_code = make_func_code(describe(self.cdf)[1:])
 
     def __call__(self, *args):
-        c = self.cdf(self.xe, *args)
-        mu = self.tot * np.diff(c)
+        prob = np.diff(self.cdf(self.xe, *args))
+        ma = self.mask
+        if ma is None:
+            n = self.n
+        else:
+            n = self.n[ma]
+            prob = prob[ma]
+        mu = np.sum(n) * prob
         # + np.sum(mu) can be skipped, it is effectively constant
-        r = -_sum_n_log_mu(self.n, mu)
+        r = -_sum_n_log_mu(n, mu)
         if self.verbose >= 1:
             print(args, "->", r)
         return r
@@ -192,6 +208,7 @@ class ExtendedBinnedNLL:
     is binned.
     """
 
+    mask = None
     verbose = False
     errordef = 0.5
 
@@ -215,6 +232,12 @@ def __init__(self, n, xe, scaled_cdf, verbose=0):
             - 0: is no output (default)
             - 1: print current args and negative log-likelihood value
         """
+        n = np.atleast_1d(n)
+        xe = np.atleast_1d(xe)
+
+        if np.any((np.array(n.shape) + 1) != xe.shape):
+            raise ValueError("n and xe have incompatible shapes")
+
         self.n = n
         self.xe = xe
         self.scaled_cdf = scaled_cdf
@@ -223,7 +246,13 @@ def __init__(self, n, xe, scaled_cdf, verbose=0):
 
     def __call__(self, *args):
         mu = np.diff(self.scaled_cdf(self.xe, *args))
-        r = _sum_log_poisson(self.n, mu)
+        ma = self.mask
+        if ma is None:
+            n = self.n
+        else:
+            n = self.n[ma]
+            mu = mu[ma]
+        r = _sum_log_poisson(n, mu)
         if self.verbose >= 1:
             print(args, "->", r)
         return r
@@ -235,6 +264,7 @@ class LeastSquares:
     Use this if you have data of the form (x, y +/- yerror).
     """
 
+    mask = None
     verbose = False
     errordef = 1.0
 
@@ -246,10 +276,11 @@ def __init__(self, x, y, yerror, model, loss="linear", verbose=0):
             Locations where the model is evaluated.
 
         y: array-like
-            Observed values.
+            Observed values. Must have the same length as `x`.
 
-        yerror: array-like
-            Estimated uncertainty of observed values.
+        yerror: array-like or float
+            Estimated uncertainty of observed values. Must have same shape as `y` or
+            be a scalar, which is then broadcasted to same shape as `y`.
 
         model: callable
             Function of the form f(x, par0, par1, ..., parN) whose output is compared
@@ -269,9 +300,22 @@ def __init__(self, x, y, yerror, model, loss="linear", verbose=0):
             - 0: is no output (default)
             - 1: print current args and negative log-likelihood value
         """
+        x = np.atleast_1d(x)
+        y = np.atleast_1d(y)
+
+        if len(x) != len(y):
+            raise ValueError("x and y must have same length")
+
+        if np.ndim(yerror) == 0:
+            yerror = yerror * np.ones_like(y)
+        else:
+            if np.shape(yerror) != y.shape:
+                raise ValueError("y and yerror must have same shape")
+
         self.x = x
         self.y = y
         self.yerror = yerror
+
         self.model = model
         if hasattr(loss, "__call__"):
             self.cost = lambda y, ye, ym: np.sum(loss(_z_squared(y, ye, ym)))
@@ -285,7 +329,17 @@ def __init__(self, x, y, yerror, model, loss="linear", verbose=0):
         self.func_code = make_func_code(describe(self.model)[1:])
 
     def __call__(self, *args):
-        r = self.cost(self.y, self.yerror, self.model(self.x, *args))
+        ma = self.mask
+        if ma is None:
+            x = self.x
+            y = self.y
+            yerror = self.yerror
+        else:
+            x = self.x[ma]
+            y = self.y[ma]
+            yerror = self.yerror[ma]
+        ym = self.model(x, *args)
+        r = self.cost(y, yerror, ym)
         if self.verbose >= 1:
             print(args, "->", r)
         return r
diff --git a/src/iminuit/tests/test_cost.py b/src/iminuit/tests/test_cost.py
index e6892fbdc..a292c294b 100644
--- a/src/iminuit/tests/test_cost.py
+++ b/src/iminuit/tests/test_cost.py
@@ -14,66 +14,102 @@
 norm = stats.norm
 
 
-np.random.seed(1)
-x = np.random.randn(1000)
-nx, xe = np.histogram(x, bins=50, range=(-3, 3))
-truth = (len(x), np.mean(x), np.std(x, ddof=1))
+@pytest.fixture
+def unbinned():
+    np.random.seed(1)
+    x = np.random.randn(1000)
+    mle = (len(x), np.mean(x), np.std(x, ddof=1))
+    return mle, x
+
+
+@pytest.fixture
+def binned(unbinned):
+    mle, x = unbinned
+    nx, xe = np.histogram(x, bins=50, range=(-3, 3))
+    return mle, nx, xe
 
 
-def test_UnbinnedNLL():
+@pytest.mark.parametrize("verbose", (0, 1))
+def test_UnbinnedNLL(unbinned, verbose):
+    mle, x = unbinned
+
     def pdf(x, mu, sigma):
         return norm(mu, sigma).pdf(x)
 
-    m = Minuit(UnbinnedNLL(x, pdf, verbose=2), mu=0, sigma=1, limit_sigma=(0, None))
+    cost = UnbinnedNLL(x, pdf, verbose=verbose)
+    m = Minuit(cost, mu=0, sigma=1, limit_sigma=(0, None))
     m.migrad()
-    assert_allclose(m.args, truth[1:], atol=1e-3)
+    assert_allclose(m.args, mle[1:], atol=1e-3)
 
+    # add bad value and mask it out
+    cost.data[1] = np.nan
+    cost.mask = np.arange(len(cost.data)) != 1
+    m.migrad()
+    assert_allclose(m.args, mle[1:], rtol=0.02)
+
+
+@pytest.mark.parametrize("verbose", (0, 1))
+def test_ExtendedUnbinnedNLL(unbinned, verbose):
+    mle, x = unbinned
 
-def test_ExtendedUnbinnedNLL():
     def scaled_pdf(x, n, mu, sigma):
         return n, n * norm(mu, sigma).pdf(x)
 
-    m = Minuit(
-        ExtendedUnbinnedNLL(x, scaled_pdf),
-        n=len(x),
-        mu=0,
-        sigma=1,
-        limit_n=(0, None),
-        limit_sigma=(0, None),
-    )
+    cost = ExtendedUnbinnedNLL(x, scaled_pdf, verbose=verbose)
+    m = Minuit(cost, n=len(x), mu=0, sigma=1, limit_n=(0, None), limit_sigma=(0, None),)
+    m.migrad()
+    assert_allclose(m.args, mle, atol=1e-3)
+
+    # add bad value and mask it out
+    cost.data[1] = np.nan
+    cost.mask = np.arange(len(cost.data)) != 1
     m.migrad()
-    assert_allclose(m.args, truth, atol=1e-3)
+    assert_allclose(m.args, mle, rtol=0.02)
 
 
-def test_BinnedNLL():
+@pytest.mark.parametrize("verbose", (0, 1))
+def test_BinnedNLL(binned, verbose):
+    mle, nx, xe = binned
+
     def cdf(x, mu, sigma):
         return norm(mu, sigma).cdf(x)
 
-    m = Minuit(BinnedNLL(nx, xe, cdf), mu=0, sigma=1, limit_sigma=(0, None))
+    cost = BinnedNLL(nx, xe, cdf, verbose=verbose)
+    m = Minuit(cost, mu=0, sigma=1, limit_sigma=(0, None))
     m.migrad()
     # binning loses information compared to unbinned case
-    assert_allclose(m.args, truth[1:], rtol=0.15)
+    assert_allclose(m.args, mle[1:], rtol=0.15)
+
+    # add bad value and mask it out
+    cost.n[1] = -1000
+    cost.mask = np.arange(len(cost.n)) != 1
+    m.migrad()
+    assert_allclose(m.args, mle[1:], atol=0.04)
 
 
-def test_ExtendedBinnedNLL():
+@pytest.mark.parametrize("verbose", (0, 1))
+def test_ExtendedBinnedNLL(binned, verbose):
+    mle, nx, xe = binned
+
     def scaled_cdf(x, n, mu, sigma):
         return n * norm(mu, sigma).cdf(x)
 
-    m = Minuit(
-        ExtendedBinnedNLL(nx, xe, scaled_cdf),
-        n=len(x),
-        mu=0,
-        sigma=1,
-        limit_n=(0, None),
-        limit_sigma=(0, None),
-    )
+    cost = ExtendedBinnedNLL(nx, xe, scaled_cdf, verbose=verbose)
+    m = Minuit(cost, n=mle[0], mu=0, sigma=1, limit_n=(0, None), limit_sigma=(0, None))
     m.migrad()
     # binning loses information compared to unbinned case
-    assert_allclose(m.args, truth, rtol=0.15)
+    assert_allclose(m.args, mle, rtol=0.15)
+
+    # add bad value and mask it out
+    cost.n[1] = -1000
+    cost.mask = np.arange(len(cost.n)) != 1
+    m.migrad()
+    assert_allclose(m.args, mle, rtol=0.06)
 
 
 @pytest.mark.parametrize("loss", ["linear", "soft_l1", np.arctan])
-def test_LeastSquares(loss):
+@pytest.mark.parametrize("verbose", (0, 1))
+def test_LeastSquares(loss, verbose):
     np.random.seed(1)
     x = np.random.rand(20)
     y = 2 * x + 1
@@ -83,6 +119,13 @@ def test_LeastSquares(loss):
     def model(x, a, b):
         return a + b * x
 
-    m = Minuit(LeastSquares(x, y, ye, model, loss=loss), a=0, b=0)
+    cost = LeastSquares(x, y, ye, model, loss=loss, verbose=verbose)
+    m = Minuit(cost, a=0, b=0)
+    m.migrad()
+    assert_allclose(m.args, (1, 2), rtol=0.03)
+
+    # add bad value and mask it out
+    cost.y[1] = np.nan
+    cost.mask = np.arange(len(y)) != 1
     m.migrad()
     assert_allclose(m.args, (1, 2), rtol=0.03)