Skip to content

Commit

Permalink
feat(experiments): Restore internal user filter for dw (#26911)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielbachhuber authored Dec 16, 2024
1 parent ff77be5 commit 1861698
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 57 deletions.
80 changes: 33 additions & 47 deletions frontend/src/scenes/experiments/Metrics/PrimaryGoalTrends.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ export function PrimaryGoalTrends(): JSX.Element {

const metricIdx = 0
const currentMetric = experiment.metrics[metricIdx] as ExperimentTrendsQuery
// :FLAG: CLEAN UP AFTER MIGRATION
const isDataWarehouseMetric =
featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL] &&
currentMetric.count_query.series[0].kind === NodeKind.DataWarehouseNode

return (
<>
Expand Down Expand Up @@ -63,18 +59,10 @@ export function PrimaryGoalTrends(): JSX.Element {
MathAvailability.All
)

if (series[0].kind === NodeKind.DataWarehouseNode) {
setTrendsMetric({
metricIdx,
series,
filterTestAccounts: false,
})
} else {
setTrendsMetric({
metricIdx,
series,
})
}
setTrendsMetric({
metricIdx,
series,
})
} else {
if (actions?.length) {
setExperiment({
Expand Down Expand Up @@ -113,37 +101,35 @@ export function PrimaryGoalTrends(): JSX.Element {
showNumericalPropsOnly={true}
{...commonActionFilterProps}
/>
{!isDataWarehouseMetric && (
<div className="mt-4 space-y-4">
<TestAccountFilterSwitch
checked={(() => {
// :FLAG: CLEAN UP AFTER MIGRATION
if (featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) {
const val = currentMetric.count_query?.filterTestAccounts
return hasFilters ? !!val : false
}
return hasFilters ? !!experiment.filters.filter_test_accounts : false
})()}
onChange={(checked: boolean) => {
// :FLAG: CLEAN UP AFTER MIGRATION
if (featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) {
setTrendsMetric({
metricIdx,
filterTestAccounts: checked,
})
} else {
setExperiment({
filters: {
...experiment.filters,
filter_test_accounts: checked,
},
})
}
}}
fullWidth
/>
</div>
)}
<div className="mt-4 space-y-4">
<TestAccountFilterSwitch
checked={(() => {
// :FLAG: CLEAN UP AFTER MIGRATION
if (featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) {
const val = currentMetric.count_query?.filterTestAccounts
return hasFilters ? !!val : false
}
return hasFilters ? !!experiment.filters.filter_test_accounts : false
})()}
onChange={(checked: boolean) => {
// :FLAG: CLEAN UP AFTER MIGRATION
if (featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) {
setTrendsMetric({
metricIdx,
filterTestAccounts: checked,
})
} else {
setExperiment({
filters: {
...experiment.filters,
filter_test_accounts: checked,
},
})
}
}}
fullWidth
/>
</div>
{isExperimentRunning && (
<LemonBanner type="info" className="mt-3 mb-3">
Preview insights are generated based on {EXPERIMENT_DEFAULT_DURATION} days of data. This can cause a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
OBJECT_STORAGE_SECRET_ACCESS_KEY,
XDIST_SUFFIX,
)
from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, flush_persons_and_events
from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person, flush_persons_and_events
from freezegun import freeze_time
from typing import cast
from django.utils import timezone
Expand Down Expand Up @@ -198,10 +198,12 @@ def create_data_warehouse_table_with_usage(self):

path_to_s3_object = "s3://" + OBJECT_STORAGE_BUCKET + f"/{TEST_BUCKET}"

id = pa.array(["1", "2", "3", "4", "5"])
date = pa.array(["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-06", "2023-01-07"])
user_id = pa.array(["user_control_0", "user_test_1", "user_test_2", "user_test_3", "user_extra"])
usage = pa.array([1000, 500, 750, 800, 900])
id = pa.array(["1", "2", "3", "4", "5", "6"])
date = pa.array(["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04", "2023-01-06", "2023-01-07"])
user_id = pa.array(
["user_control_0", "user_test_1", "user_test_2", "internal_test_1", "user_test_3", "user_extra"]
)
usage = pa.array([1000, 500, 750, 100000, 800, 900])
names = ["id", "ds", "userid", "usage"]

pq.write_to_dataset(
Expand Down Expand Up @@ -244,6 +246,15 @@ def create_data_warehouse_table_with_usage(self):
field_name="events",
configuration={"experiments_optimized": True, "experiments_timestamp_key": "ds"},
)

DataWarehouseJoin.objects.create(
team=self.team,
source_table_name=table_name,
source_table_key="userid",
joining_table_name="persons",
joining_table_key="properties.$user_id",
field_name="person",
)
return table_name

@freeze_time("2020-01-01T12:00:00Z")
Expand Down Expand Up @@ -804,6 +815,15 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self)

feature_flag_property = f"$feature/{feature_flag.key}"

self.team.test_account_filters = [
{
"key": "email",
"value": "@posthog.com",
"operator": "not_icontains",
"type": "person",
}
]
self.team.save()
count_query = TrendsQuery(
series=[
DataWarehouseNode(
Expand All @@ -816,9 +836,10 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self)
math_property="usage",
math_property_type="data_warehouse_properties",
)
]
],
filterTestAccounts=True,
)
exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")])
exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=True)

experiment_query = ExperimentTrendsQuery(
experiment_id=experiment.id,
Expand Down Expand Up @@ -846,6 +867,25 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self)
timestamp=datetime(2023, 1, i + 1),
)

_create_person(
team=self.team,
distinct_ids=["internal_test_1"],
properties={"email": "[email protected]", "$user_id": "internal_test_1"},
)

_create_event(
team=self.team,
event="$feature_flag_called",
distinct_id="internal_test_1",
properties={
feature_flag_property: "test",
"$feature_flag_response": "test",
"$feature_flag": feature_flag.key,
"$user_id": "internal_test_1",
},
timestamp=datetime(2023, 1, 3),
)

# "user_test_3" first exposure (feature_flag_property="control") is on 2023-01-03
# "user_test_3" relevant exposure (feature_flag_property="test") is on 2023-01-04
# "user_test_3" other event (feature_flag_property="control" is on 2023-01-05
Expand Down Expand Up @@ -906,8 +946,12 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self)
)

# Assert the expected join condition in the clickhouse SQL
expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_8)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_9)s, 6, %(hogql_val_10)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_11)s, 6, %(hogql_val_12)s))))) AS e__events ON"
self.assertIn(expected_join_condition, str(response.clickhouse))
expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_12)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_13)s, 6, %(hogql_val_14)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_15)s, 6, %(hogql_val_16)s))))) AS e__events ON"
self.assertIn(
expected_join_condition,
str(response.clickhouse),
"Please check to make sure the timestamp statements are included in the ASOF LEFT JOIN select statement. This may also fail if the placeholder numbers have changed.",
)

result = query_runner.calculate()

Expand Down Expand Up @@ -935,6 +979,65 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self)
[0.0, 500.0, 1250.0, 1250.0, 1250.0, 2050.0, 2050.0, 2050.0, 2050.0, 2050.0],
)

# Run the query again with filter_test_accounts=False
# as a point of comparison to above
count_query = TrendsQuery(
series=[
DataWarehouseNode(
id=table_name,
distinct_id_field="userid",
id_field="id",
table_name=table_name,
timestamp_field="ds",
math="avg",
math_property="usage",
math_property_type="data_warehouse_properties",
)
],
filterTestAccounts=False,
)
exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=False)

experiment_query = ExperimentTrendsQuery(
experiment_id=experiment.id,
kind="ExperimentTrendsQuery",
count_query=count_query,
exposure_query=exposure_query,
)

experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}]
experiment.save()

query_runner = ExperimentTrendsQueryRunner(
query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team
)
with freeze_time("2023-01-07"):
result = query_runner.calculate()

trend_result = cast(ExperimentTrendsQueryResponse, result)

self.assertEqual(len(result.variants), 2)

control_result = next(variant for variant in trend_result.variants if variant.key == "control")
test_result = next(variant for variant in trend_result.variants if variant.key == "test")

control_insight = next(variant for variant in trend_result.insight if variant["breakdown_value"] == "control")
test_insight = next(variant for variant in trend_result.insight if variant["breakdown_value"] == "test")

self.assertEqual(control_result.count, 1000)
self.assertEqual(test_result.count, 102050)
self.assertEqual(control_result.absolute_exposure, 1)
self.assertEqual(test_result.absolute_exposure, 4)

self.assertEqual(
control_insight["data"][:10],
[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0],
)
self.assertEqual(
test_insight["data"][:10],
[0.0, 500.0, 1250.0, 101250.0, 101250.0, 102050.0, 102050.0, 102050.0, 102050.0, 102050.0],
)

def test_query_runner_with_data_warehouse_series_expected_query(self):
table_name = self.create_data_warehouse_table_with_payments()

Expand Down Expand Up @@ -1004,7 +1107,11 @@ def test_query_runner_with_data_warehouse_series_expected_query(self):

# Assert the expected join condition in the clickhouse SQL
expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_7)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_8)s, 6, %(hogql_val_9)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_10)s, 6, %(hogql_val_11)s))))) AS e__events ON"
self.assertIn(expected_join_condition, str(response.clickhouse))
self.assertIn(
expected_join_condition,
str(response.clickhouse),
"Please check to make sure the timestamp statements are included in the ASOF LEFT JOIN select statement. This may also fail if the placeholder numbers have changed.",
)

result = query_runner.calculate()

Expand Down

0 comments on commit 1861698

Please sign in to comment.