Update flaky tests

We use theoretical results on the HMC algorithm to update the flaky tests.
aesara-devs · May 19, 2022 · 18f0e7b · 18f0e7b
1 parent 5448fda
commit 18f0e7b
Showing 1 changed file with 64 additions and 83 deletions.
diff --git a/tests/test_hmc.py b/tests/test_hmc.py
@@ -46,7 +46,7 @@ def kernel_factory(inverse_mass_matrix: TensorVariable):
 
     assert final_state[0] != 3.0  # the chain has moved
     assert np.ndim(step_size) == 0  # scalar step size
-    assert step_size > 0.1 and step_size < 2  # stable step size
+    assert step_size > 0.1 and step_size < 2  # stable range for the step size
     assert np.ndim(inverse_mass_matrix) == 0  # scalar mass matrix
     assert inverse_mass_matrix == pytest.approx(4, rel=1.0)
 
@@ -91,17 +91,25 @@ def kernel_factory(inverse_mass_matrix: TensorVariable):
 
     assert np.all(final_state[0] != np.array([1.0, 1.0]))  # the chain has moved
     assert np.ndim(step_size) == 0  # scalar step size
-    assert step_size > 0.1 and step_size < 2
+    assert step_size > 0.1 and step_size < 2  # stable range for the step size
     assert np.ndim(inverse_mass_matrix) == 1  # scalar mass matrix
     np.testing.assert_allclose(inverse_mass_matrix, scale**2, rtol=1.0)
 
 
-@pytest.mark.skip(reason="this test is flaky")
-def test_hmc():
-    """Test the HMC kernel on a gaussian target."""
-    step_size = 1.0
+@pytest.mark.parametrize("step_size, diverges", [(3.9, False), (4.1, True)])
+def test_univariate_hmc(step_size, diverges):
+    """Test the NUTS kernel on a univariate gaussian target.
+
+    Theory [1]_ says that the integration of the trajectory should be stable as
+    long as the step size is smaller than twice the standard deviation.
+
+    References
+    ----------
+    .. [1]: Neal, R. M. (2011). MCMC using Hamiltonian dynamics. Handbook of markov chain monte carlo, 2(11), 2.
+
+    """
     inverse_mass_matrix = at.as_tensor(1.0)
-    num_integration_steps = 10
+    num_integration_steps = 30
 
     srng = at.random.RandomStream(seed=0)
     Y_rv = srng.normal(1, 2)
@@ -135,52 +143,11 @@ def logprob_fn(y):
     trajectory_generator = aesara.function((y_vv,), trajectory[0], updates=updates)
 
     samples = trajectory_generator(3.0)
-    assert np.mean(samples[1000:]) == pytest.approx(1.0, rel=1e-1)
-    assert np.var(samples[1000:]) == pytest.approx(4.0, rel=1e-1)
-
-
-@pytest.mark.skip(reason="this test is flaky")
-def test_nuts():
-    """Test the NUTS kernel on a gaussian target."""
-    step_size = 0.1
-    inverse_mass_matrix = at.as_tensor(1.0)
-
-    srng = at.random.RandomStream(seed=0)
-    Y_rv = srng.normal(1, 2)
-
-    def logprob_fn(y):
-        logprob = joint_logprob({Y_rv: y})
-        return logprob
-
-    kernel = nuts.kernel(
-        srng,
-        logprob_fn,
-        inverse_mass_matrix,
-    )
-
-    y_vv = Y_rv.clone()
-    initial_state = nuts.new_state(y_vv, logprob_fn)
-
-    trajectory, updates = aesara.scan(
-        kernel,
-        outputs_info=[
-            {"initial": initial_state[0]},
-            {"initial": initial_state[1]},
-            {"initial": initial_state[2]},
-            None,
-            None,
-            None,
-            None,
-        ],
-        non_sequences=step_size,
-        n_steps=2000,
-    )
-
-    trajectory_generator = aesara.function((y_vv,), trajectory[0], updates=updates)
-
-    samples = trajectory_generator(3.0)
-    assert np.mean(samples[1000:]) == pytest.approx(1.0, rel=1e-1)
-    assert np.var(samples[1000:]) == pytest.approx(4.0, rel=1e-1)
+    if diverges:
+        assert np.all(samples == 3.0)
+    else:
+        assert np.mean(samples[1000:]) == pytest.approx(1.0, rel=1e-1)
+        assert np.var(samples[1000:]) == pytest.approx(4.0, rel=1e-1)
 
 
 def compute_ess(samples):
@@ -212,34 +179,36 @@ def multivariate_normal_model(srng):
     def logprob_fn(y):
         return joint_logprob({Y_rv: y})
 
-    return (loc, cov, scale, rho), Y_rv, logprob_fn
+    return (loc, scale, rho), Y_rv, logprob_fn
 
 
 def test_hmc_mcse():
-    """This examples is recommanded in the Stan documentation [1]_ to find bugs
+    """This examples is recommanded in the Stan documentation [0]_ to find bugs
     that introduce bias in the average as well as the variance.
 
     The example is simple enough to be analytically tractable, but complex enough
-    to find subtle bugs.
+    to find subtle bugs. It uses the MCMC CLT [1]_ to check that the estimates
+    of different quantities are within the expected range.
 
-    If we use the covariance of the normal distribution as the inverse mass matrix
-    it can be shown that the dynamics is equivalent to that of two independent
-    position variables with variances close to one [2]_ (section 4.1)
+    We set the inverse mass matrix to be the diagonal of the covariance matrix.
 
-    We can also show that, in these circumstances, choosing any step size that
-    is smaller than 2. will be stable [2]_ (section 4.2); We adjusted the number
-    of integration steps manually in order to get a reasonable number of effective samples.
+    We can also show that trajectory integration will not diverge as long as we
+    choose any step size that is smaller than 2 [2]_ (section 4.2); We adjusted
+    the number of integration steps manually in order to get a reasonable number
+    of effective samples.
 
-    .. [1]: https://github.com/stan-dev/stan/wiki/Testing:-Samplers
+    References
+    ----------
+    .. [0]: https://github.com/stan-dev/stan/wiki/Testing:-Samplers
+    .. [1]: Geyer, C. J. (2011). Introduction to markov chain monte carlo. Handbook of markov chain monte carlo, 20116022, 45.
+    .. [2]: Neal, R. M. (2011). MCMC using Hamiltonian dynamics. Handbook of markov chain monte carlo, 2(11), 2.
 
-    See this issue where testing samplers is discussed:
-    https://github.com/stan-dev/stan/issues/318
     """
     srng = at.random.RandomStream(seed=1)
-    (loc, cov, scale, rho), Y_rv, logprob_fn = multivariate_normal_model(srng)
+    (loc, scale, rho), Y_rv, logprob_fn = multivariate_normal_model(srng)
 
-    L = 100
-    inverse_mass_matrix = at.as_tensor(cov)
+    L = 30
+    inverse_mass_matrix = at.as_tensor(scale)
     kernel = hmc.kernel(srng, logprob_fn, inverse_mass_matrix, L)
 
     y_vv = Y_rv.clone()
@@ -253,8 +222,8 @@ def test_hmc_mcse():
             {"initial": initial_state[2]},
             None,
         ],
-        non_sequences=1.9,
-        n_steps=10000,
+        non_sequences=1.0,
+        n_steps=3000,
     )
 
     trajectory_generator = aesara.function((y_vv,), trajectory, updates=updates)
@@ -282,15 +251,32 @@ def test_hmc_mcse():
     np.testing.assert_array_less(0.01, p_greater_error)
 
 
-@pytest.mark.xfail(
-    reason="The current implementation returns biased samples that are not appropriate to estimate the variance."
-)
 def test_nuts_mcse():
+    """This examples is recommanded in the Stan documentation [0]_ to find bugs
+    that introduce bias in the average as well as the variance.
+
+    The example is simple enough to be analytically tractable, but complex enough
+    to find subtle bugs. It uses the MCMC CLT [1]_ to check that the estimates
+    of different quantities are within the expected range.
+
+    We set the inverse mass matrix to be the diagonal of the covariance matrix.
+
+    We can also show that trajectory integration will not diverge as long as we
+    choose any step size that is smaller than 2 [2]_ (section 4.2); We adjusted
+    the number of integration steps manually in order to get a reasonable number
+    of effective samples.
 
+    References
+    ----------
+    .. [0]: https://github.com/stan-dev/stan/wiki/Testing:-Samplers
+    .. [1]: Geyer, C. J. (2011). Introduction to markov chain monte carlo. Handbook of markov chain monte carlo, 20116022, 45.
+    .. [2]: Neal, R. M. (2011). MCMC using Hamiltonian dynamics. Handbook of markov chain monte carlo, 2(11), 2.
+
+    """
     srng = at.random.RandomStream(seed=1)
-    (loc, cov, scale, rho), Y_rv, logprob_fn = multivariate_normal_model(srng)
+    (loc, scale, rho), Y_rv, logprob_fn = multivariate_normal_model(srng)
 
-    inverse_mass_matrix = at.as_tensor(cov)
+    inverse_mass_matrix = at.as_tensor(scale)
     kernel = nuts.kernel(srng, logprob_fn, inverse_mass_matrix)
 
     y_vv = Y_rv.clone()
@@ -307,8 +293,8 @@ def test_nuts_mcse():
             None,
             None,
         ],
-        non_sequences=1.9,
-        n_steps=10000,
+        non_sequences=1.0,
+        n_steps=3000,
     )
 
     trajectory_generator = aesara.function((y_vv,), trajectory, updates=updates)
@@ -320,22 +306,17 @@ def test_nuts_mcse():
     # MCSE on the location
     delta_loc = samples - loc
     mean, mcse = compute_mcse(delta_loc)
-    ess = compute_ess(delta_loc)
     p_greater_error = stats.norm.sf(np.abs(mean) / mcse)
     np.testing.assert_array_less(0.01, p_greater_error)
 
     # MCSE on the variance
-    delta_var = np.square(samples - loc) - scale**2
+    delta_var = (samples - loc) ** 2 - scale**2
     mean, mcse = compute_mcse(delta_var)
-    ess = compute_ess(delta_var)
     p_greater_error = stats.norm.sf(np.abs(mean) / mcse)
-    print(mean, mcse, ess)
     np.testing.assert_array_less(0.01, p_greater_error)
 
     # MCSE on the correlation
     delta_cor = np.prod(samples - loc, axis=1) / np.prod(scale) - rho
     mean, mcse = compute_mcse(delta_cor)
-    ess = compute_ess(delta_cor)
     p_greater_error = stats.norm.sf(np.abs(mean) / mcse)
     np.testing.assert_array_less(0.01, p_greater_error)
-    print(mean, mcse, ess)