Skip to content

Commit

Permalink
Update flaky tests
Browse files Browse the repository at this point in the history
We use theoretical results on the HMC algorithm to update the flaky tests.
  • Loading branch information
rlouf committed May 19, 2022
1 parent 5448fda commit 18f0e7b
Showing 1 changed file with 64 additions and 83 deletions.
147 changes: 64 additions & 83 deletions tests/test_hmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def kernel_factory(inverse_mass_matrix: TensorVariable):

assert final_state[0] != 3.0 # the chain has moved
assert np.ndim(step_size) == 0 # scalar step size
assert step_size > 0.1 and step_size < 2 # stable step size
assert step_size > 0.1 and step_size < 2 # stable range for the step size
assert np.ndim(inverse_mass_matrix) == 0 # scalar mass matrix
assert inverse_mass_matrix == pytest.approx(4, rel=1.0)

Expand Down Expand Up @@ -91,17 +91,25 @@ def kernel_factory(inverse_mass_matrix: TensorVariable):

assert np.all(final_state[0] != np.array([1.0, 1.0])) # the chain has moved
assert np.ndim(step_size) == 0 # scalar step size
assert step_size > 0.1 and step_size < 2
assert step_size > 0.1 and step_size < 2 # stable range for the step size
assert np.ndim(inverse_mass_matrix) == 1 # scalar mass matrix
np.testing.assert_allclose(inverse_mass_matrix, scale**2, rtol=1.0)


@pytest.mark.skip(reason="this test is flaky")
def test_hmc():
"""Test the HMC kernel on a gaussian target."""
step_size = 1.0
@pytest.mark.parametrize("step_size, diverges", [(3.9, False), (4.1, True)])
def test_univariate_hmc(step_size, diverges):
"""Test the NUTS kernel on a univariate gaussian target.
Theory [1]_ says that the integration of the trajectory should be stable as
long as the step size is smaller than twice the standard deviation.
References
----------
.. [1]: Neal, R. M. (2011). MCMC using Hamiltonian dynamics. Handbook of markov chain monte carlo, 2(11), 2.
"""
inverse_mass_matrix = at.as_tensor(1.0)
num_integration_steps = 10
num_integration_steps = 30

srng = at.random.RandomStream(seed=0)
Y_rv = srng.normal(1, 2)
Expand Down Expand Up @@ -135,52 +143,11 @@ def logprob_fn(y):
trajectory_generator = aesara.function((y_vv,), trajectory[0], updates=updates)

samples = trajectory_generator(3.0)
assert np.mean(samples[1000:]) == pytest.approx(1.0, rel=1e-1)
assert np.var(samples[1000:]) == pytest.approx(4.0, rel=1e-1)


@pytest.mark.skip(reason="this test is flaky")
def test_nuts():
"""Test the NUTS kernel on a gaussian target."""
step_size = 0.1
inverse_mass_matrix = at.as_tensor(1.0)

srng = at.random.RandomStream(seed=0)
Y_rv = srng.normal(1, 2)

def logprob_fn(y):
logprob = joint_logprob({Y_rv: y})
return logprob

kernel = nuts.kernel(
srng,
logprob_fn,
inverse_mass_matrix,
)

y_vv = Y_rv.clone()
initial_state = nuts.new_state(y_vv, logprob_fn)

trajectory, updates = aesara.scan(
kernel,
outputs_info=[
{"initial": initial_state[0]},
{"initial": initial_state[1]},
{"initial": initial_state[2]},
None,
None,
None,
None,
],
non_sequences=step_size,
n_steps=2000,
)

trajectory_generator = aesara.function((y_vv,), trajectory[0], updates=updates)

samples = trajectory_generator(3.0)
assert np.mean(samples[1000:]) == pytest.approx(1.0, rel=1e-1)
assert np.var(samples[1000:]) == pytest.approx(4.0, rel=1e-1)
if diverges:
assert np.all(samples == 3.0)
else:
assert np.mean(samples[1000:]) == pytest.approx(1.0, rel=1e-1)
assert np.var(samples[1000:]) == pytest.approx(4.0, rel=1e-1)


def compute_ess(samples):
Expand Down Expand Up @@ -212,34 +179,36 @@ def multivariate_normal_model(srng):
def logprob_fn(y):
return joint_logprob({Y_rv: y})

return (loc, cov, scale, rho), Y_rv, logprob_fn
return (loc, scale, rho), Y_rv, logprob_fn


def test_hmc_mcse():
"""This examples is recommanded in the Stan documentation [1]_ to find bugs
"""This examples is recommanded in the Stan documentation [0]_ to find bugs
that introduce bias in the average as well as the variance.
The example is simple enough to be analytically tractable, but complex enough
to find subtle bugs.
to find subtle bugs. It uses the MCMC CLT [1]_ to check that the estimates
of different quantities are within the expected range.
If we use the covariance of the normal distribution as the inverse mass matrix
it can be shown that the dynamics is equivalent to that of two independent
position variables with variances close to one [2]_ (section 4.1)
We set the inverse mass matrix to be the diagonal of the covariance matrix.
We can also show that, in these circumstances, choosing any step size that
is smaller than 2. will be stable [2]_ (section 4.2); We adjusted the number
of integration steps manually in order to get a reasonable number of effective samples.
We can also show that trajectory integration will not diverge as long as we
choose any step size that is smaller than 2 [2]_ (section 4.2); We adjusted
the number of integration steps manually in order to get a reasonable number
of effective samples.
.. [1]: https://github.com/stan-dev/stan/wiki/Testing:-Samplers
References
----------
.. [0]: https://github.com/stan-dev/stan/wiki/Testing:-Samplers
.. [1]: Geyer, C. J. (2011). Introduction to markov chain monte carlo. Handbook of markov chain monte carlo, 20116022, 45.
.. [2]: Neal, R. M. (2011). MCMC using Hamiltonian dynamics. Handbook of markov chain monte carlo, 2(11), 2.
See this issue where testing samplers is discussed:
https://github.com/stan-dev/stan/issues/318
"""
srng = at.random.RandomStream(seed=1)
(loc, cov, scale, rho), Y_rv, logprob_fn = multivariate_normal_model(srng)
(loc, scale, rho), Y_rv, logprob_fn = multivariate_normal_model(srng)

L = 100
inverse_mass_matrix = at.as_tensor(cov)
L = 30
inverse_mass_matrix = at.as_tensor(scale)
kernel = hmc.kernel(srng, logprob_fn, inverse_mass_matrix, L)

y_vv = Y_rv.clone()
Expand All @@ -253,8 +222,8 @@ def test_hmc_mcse():
{"initial": initial_state[2]},
None,
],
non_sequences=1.9,
n_steps=10000,
non_sequences=1.0,
n_steps=3000,
)

trajectory_generator = aesara.function((y_vv,), trajectory, updates=updates)
Expand Down Expand Up @@ -282,15 +251,32 @@ def test_hmc_mcse():
np.testing.assert_array_less(0.01, p_greater_error)


@pytest.mark.xfail(
reason="The current implementation returns biased samples that are not appropriate to estimate the variance."
)
def test_nuts_mcse():
"""This examples is recommanded in the Stan documentation [0]_ to find bugs
that introduce bias in the average as well as the variance.
The example is simple enough to be analytically tractable, but complex enough
to find subtle bugs. It uses the MCMC CLT [1]_ to check that the estimates
of different quantities are within the expected range.
We set the inverse mass matrix to be the diagonal of the covariance matrix.
We can also show that trajectory integration will not diverge as long as we
choose any step size that is smaller than 2 [2]_ (section 4.2); We adjusted
the number of integration steps manually in order to get a reasonable number
of effective samples.
References
----------
.. [0]: https://github.com/stan-dev/stan/wiki/Testing:-Samplers
.. [1]: Geyer, C. J. (2011). Introduction to markov chain monte carlo. Handbook of markov chain monte carlo, 20116022, 45.
.. [2]: Neal, R. M. (2011). MCMC using Hamiltonian dynamics. Handbook of markov chain monte carlo, 2(11), 2.
"""
srng = at.random.RandomStream(seed=1)
(loc, cov, scale, rho), Y_rv, logprob_fn = multivariate_normal_model(srng)
(loc, scale, rho), Y_rv, logprob_fn = multivariate_normal_model(srng)

inverse_mass_matrix = at.as_tensor(cov)
inverse_mass_matrix = at.as_tensor(scale)
kernel = nuts.kernel(srng, logprob_fn, inverse_mass_matrix)

y_vv = Y_rv.clone()
Expand All @@ -307,8 +293,8 @@ def test_nuts_mcse():
None,
None,
],
non_sequences=1.9,
n_steps=10000,
non_sequences=1.0,
n_steps=3000,
)

trajectory_generator = aesara.function((y_vv,), trajectory, updates=updates)
Expand All @@ -320,22 +306,17 @@ def test_nuts_mcse():
# MCSE on the location
delta_loc = samples - loc
mean, mcse = compute_mcse(delta_loc)
ess = compute_ess(delta_loc)
p_greater_error = stats.norm.sf(np.abs(mean) / mcse)
np.testing.assert_array_less(0.01, p_greater_error)

# MCSE on the variance
delta_var = np.square(samples - loc) - scale**2
delta_var = (samples - loc) ** 2 - scale**2
mean, mcse = compute_mcse(delta_var)
ess = compute_ess(delta_var)
p_greater_error = stats.norm.sf(np.abs(mean) / mcse)
print(mean, mcse, ess)
np.testing.assert_array_less(0.01, p_greater_error)

# MCSE on the correlation
delta_cor = np.prod(samples - loc, axis=1) / np.prod(scale) - rho
mean, mcse = compute_mcse(delta_cor)
ess = compute_ess(delta_cor)
p_greater_error = stats.norm.sf(np.abs(mean) / mcse)
np.testing.assert_array_less(0.01, p_greater_error)
print(mean, mcse, ess)

0 comments on commit 18f0e7b

Please sign in to comment.