diff --git a/posthog/hogql_queries/experiments/test/test_trends_statistics_continuous.py b/posthog/hogql_queries/experiments/test/test_trends_statistics_continuous.py index ae5bf249671ce..0ed9d43a5684a 100644 --- a/posthog/hogql_queries/experiments/test/test_trends_statistics_continuous.py +++ b/posthog/hogql_queries/experiments/test/test_trends_statistics_continuous.py @@ -64,12 +64,12 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca self.assertEqual(p_value, 1) # Control: ~$100 mean with wide interval due to small sample - self.assertAlmostEqual(intervals["control"][0], 72, delta=5) # Lower bound - self.assertAlmostEqual(intervals["control"][1], 128, delta=5) # Upper bound + self.assertAlmostEqual(intervals["control"][0], 80, delta=5) # Lower bound + self.assertAlmostEqual(intervals["control"][1], 114, delta=5) # Upper bound # Test: ~$105 mean with wide interval due to small sample - self.assertAlmostEqual(intervals["test"][0], 75, delta=5) # Lower bound - self.assertAlmostEqual(intervals["test"][1], 130, delta=5) # Upper bound + self.assertAlmostEqual(intervals["test"][0], 80, delta=5) # Lower bound + self.assertAlmostEqual(intervals["test"][1], 120, delta=5) # Upper bound else: # Original implementation behavior for small sample self.assertAlmostEqual(probabilities[0], 0.5, delta=0.2) @@ -116,7 +116,7 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca # Test: $120 mean with narrow interval due to large sample self.assertAlmostEqual(intervals["test"][0], 116, delta=2) # Lower bound - self.assertAlmostEqual(intervals["test"][1], 124, delta=2) # Upper bound + self.assertAlmostEqual(intervals["test"][1], 122, delta=2) # Upper bound else: # Original implementation behavior for large sample self.assertAlmostEqual(probabilities[1], 0.75, delta=0.25) @@ -224,15 +224,15 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca # Test A variant self.assertAlmostEqual(intervals["test_a"][0], 90, delta=5) # Lower bound - self.assertAlmostEqual(intervals["test_a"][1], 110, delta=5) # Upper bound + self.assertAlmostEqual(intervals["test_a"][1], 102, delta=5) # Upper bound # Test B variant - self.assertAlmostEqual(intervals["test_b"][0], 90, delta=5) # Lower bound - self.assertAlmostEqual(intervals["test_b"][1], 110, delta=5) # Upper bound + self.assertAlmostEqual(intervals["test_b"][0], 96, delta=5) # Lower bound + self.assertAlmostEqual(intervals["test_b"][1], 108, delta=5) # Upper bound # Test C variant - self.assertAlmostEqual(intervals["test_c"][0], 90, delta=5) # Lower bound - self.assertAlmostEqual(intervals["test_c"][1], 110, delta=5) # Upper bound + self.assertAlmostEqual(intervals["test_c"][0], 95, delta=5) # Lower bound + self.assertAlmostEqual(intervals["test_c"][1], 105, delta=5) # Upper bound else: # Original implementation behavior for multiple variants with no clear winner self.assertTrue(all(0.1 < p < 0.9 for p in probabilities)) @@ -299,20 +299,20 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca self.assertEqual(p_value, 0) # Control at $100 - self.assertAlmostEqual(intervals["control"][0], 97, delta=1) - self.assertAlmostEqual(intervals["control"][1], 103, delta=1) + self.assertAlmostEqual(intervals["control"][0], 98, delta=1) + self.assertAlmostEqual(intervals["control"][1], 102, delta=1) # Test A slightly higher at $105 - self.assertAlmostEqual(intervals["test_a"][0], 102, delta=1) - self.assertAlmostEqual(intervals["test_a"][1], 108, delta=1) + self.assertAlmostEqual(intervals["test_a"][0], 103, delta=1) + self.assertAlmostEqual(intervals["test_a"][1], 107, delta=1) # Test B clearly winning at $150 - self.assertAlmostEqual(intervals["test_b"][0], 146, delta=1) - self.assertAlmostEqual(intervals["test_b"][1], 154, delta=1) + self.assertAlmostEqual(intervals["test_b"][0], 147, delta=1) + self.assertAlmostEqual(intervals["test_b"][1], 153, delta=1) # Test C slightly higher at $110 - self.assertAlmostEqual(intervals["test_c"][0], 106, delta=1) - self.assertAlmostEqual(intervals["test_c"][1], 114, delta=1) + self.assertAlmostEqual(intervals["test_c"][0], 108, delta=1) + self.assertAlmostEqual(intervals["test_c"][1], 112, delta=1) else: # Original implementation behavior for multiple variants with clear winner self.assertTrue(probabilities[2] > 0.5) # test_b should be winning @@ -354,10 +354,10 @@ def run_test(stats_version, calculate_probabilities, are_results_significant, ca # Both variants should have wide intervals due to small sample size self.assertAlmostEqual(intervals["control"][0], 62, delta=10) - self.assertAlmostEqual(intervals["control"][1], 138, delta=10) + self.assertAlmostEqual(intervals["control"][1], 117, delta=10) - self.assertAlmostEqual(intervals["test"][0], 75, delta=10) - self.assertAlmostEqual(intervals["test"][1], 160, delta=10) + self.assertAlmostEqual(intervals["test"][0], 85, delta=10) + self.assertAlmostEqual(intervals["test"][1], 140, delta=10) else: # Original implementation behavior for insufficient sample size self.assertAlmostEqual(probabilities[0], 0.075, delta=0.025) diff --git a/posthog/hogql_queries/experiments/trends_statistics_v2_continuous.py b/posthog/hogql_queries/experiments/trends_statistics_v2_continuous.py index 2763e8ca2a05a..78d10ec9ace81 100644 --- a/posthog/hogql_queries/experiments/trends_statistics_v2_continuous.py +++ b/posthog/hogql_queries/experiments/trends_statistics_v2_continuous.py @@ -11,7 +11,7 @@ ALPHA_0 = 1.0 # Prior shape for variance BETA_0 = 1.0 # Prior scale for variance -LOG_VARIANCE = 2 +LOG_VARIANCE = 0.75 SAMPLE_SIZE = 10000 EPSILON = 1e-10 # Small epsilon value to handle zeros