-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(experiments): Restore internal user filter for dw (#26911)
- Loading branch information
1 parent
ff77be5
commit 1861698
Showing
2 changed files
with
150 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ | |
OBJECT_STORAGE_SECRET_ACCESS_KEY, | ||
XDIST_SUFFIX, | ||
) | ||
from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, flush_persons_and_events | ||
from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person, flush_persons_and_events | ||
from freezegun import freeze_time | ||
from typing import cast | ||
from django.utils import timezone | ||
|
@@ -198,10 +198,12 @@ def create_data_warehouse_table_with_usage(self): | |
|
||
path_to_s3_object = "s3://" + OBJECT_STORAGE_BUCKET + f"/{TEST_BUCKET}" | ||
|
||
id = pa.array(["1", "2", "3", "4", "5"]) | ||
date = pa.array(["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-06", "2023-01-07"]) | ||
user_id = pa.array(["user_control_0", "user_test_1", "user_test_2", "user_test_3", "user_extra"]) | ||
usage = pa.array([1000, 500, 750, 800, 900]) | ||
id = pa.array(["1", "2", "3", "4", "5", "6"]) | ||
date = pa.array(["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04", "2023-01-06", "2023-01-07"]) | ||
user_id = pa.array( | ||
["user_control_0", "user_test_1", "user_test_2", "internal_test_1", "user_test_3", "user_extra"] | ||
) | ||
usage = pa.array([1000, 500, 750, 100000, 800, 900]) | ||
names = ["id", "ds", "userid", "usage"] | ||
|
||
pq.write_to_dataset( | ||
|
@@ -244,6 +246,15 @@ def create_data_warehouse_table_with_usage(self): | |
field_name="events", | ||
configuration={"experiments_optimized": True, "experiments_timestamp_key": "ds"}, | ||
) | ||
|
||
DataWarehouseJoin.objects.create( | ||
team=self.team, | ||
source_table_name=table_name, | ||
source_table_key="userid", | ||
joining_table_name="persons", | ||
joining_table_key="properties.$user_id", | ||
field_name="person", | ||
) | ||
return table_name | ||
|
||
@freeze_time("2020-01-01T12:00:00Z") | ||
|
@@ -804,6 +815,15 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) | |
|
||
feature_flag_property = f"$feature/{feature_flag.key}" | ||
|
||
self.team.test_account_filters = [ | ||
{ | ||
"key": "email", | ||
"value": "@posthog.com", | ||
"operator": "not_icontains", | ||
"type": "person", | ||
} | ||
] | ||
self.team.save() | ||
count_query = TrendsQuery( | ||
series=[ | ||
DataWarehouseNode( | ||
|
@@ -816,9 +836,10 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) | |
math_property="usage", | ||
math_property_type="data_warehouse_properties", | ||
) | ||
] | ||
], | ||
filterTestAccounts=True, | ||
) | ||
exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")]) | ||
exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=True) | ||
|
||
experiment_query = ExperimentTrendsQuery( | ||
experiment_id=experiment.id, | ||
|
@@ -846,6 +867,25 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) | |
timestamp=datetime(2023, 1, i + 1), | ||
) | ||
|
||
_create_person( | ||
team=self.team, | ||
distinct_ids=["internal_test_1"], | ||
properties={"email": "[email protected]", "$user_id": "internal_test_1"}, | ||
) | ||
|
||
_create_event( | ||
team=self.team, | ||
event="$feature_flag_called", | ||
distinct_id="internal_test_1", | ||
properties={ | ||
feature_flag_property: "test", | ||
"$feature_flag_response": "test", | ||
"$feature_flag": feature_flag.key, | ||
"$user_id": "internal_test_1", | ||
}, | ||
timestamp=datetime(2023, 1, 3), | ||
) | ||
|
||
# "user_test_3" first exposure (feature_flag_property="control") is on 2023-01-03 | ||
# "user_test_3" relevant exposure (feature_flag_property="test") is on 2023-01-04 | ||
# "user_test_3" other event (feature_flag_property="control" is on 2023-01-05 | ||
|
@@ -906,8 +946,12 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) | |
) | ||
|
||
# Assert the expected join condition in the clickhouse SQL | ||
expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_8)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_9)s, 6, %(hogql_val_10)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_11)s, 6, %(hogql_val_12)s))))) AS e__events ON" | ||
self.assertIn(expected_join_condition, str(response.clickhouse)) | ||
expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_12)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_13)s, 6, %(hogql_val_14)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_15)s, 6, %(hogql_val_16)s))))) AS e__events ON" | ||
self.assertIn( | ||
expected_join_condition, | ||
str(response.clickhouse), | ||
"Please check to make sure the timestamp statements are included in the ASOF LEFT JOIN select statement. This may also fail if the placeholder numbers have changed.", | ||
) | ||
|
||
result = query_runner.calculate() | ||
|
||
|
@@ -935,6 +979,65 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) | |
[0.0, 500.0, 1250.0, 1250.0, 1250.0, 2050.0, 2050.0, 2050.0, 2050.0, 2050.0], | ||
) | ||
|
||
# Run the query again with filter_test_accounts=False | ||
# as a point of comparison to above | ||
count_query = TrendsQuery( | ||
series=[ | ||
DataWarehouseNode( | ||
id=table_name, | ||
distinct_id_field="userid", | ||
id_field="id", | ||
table_name=table_name, | ||
timestamp_field="ds", | ||
math="avg", | ||
math_property="usage", | ||
math_property_type="data_warehouse_properties", | ||
) | ||
], | ||
filterTestAccounts=False, | ||
) | ||
exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=False) | ||
|
||
experiment_query = ExperimentTrendsQuery( | ||
experiment_id=experiment.id, | ||
kind="ExperimentTrendsQuery", | ||
count_query=count_query, | ||
exposure_query=exposure_query, | ||
) | ||
|
||
experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] | ||
experiment.save() | ||
|
||
query_runner = ExperimentTrendsQueryRunner( | ||
query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team | ||
) | ||
with freeze_time("2023-01-07"): | ||
result = query_runner.calculate() | ||
|
||
trend_result = cast(ExperimentTrendsQueryResponse, result) | ||
|
||
self.assertEqual(len(result.variants), 2) | ||
|
||
control_result = next(variant for variant in trend_result.variants if variant.key == "control") | ||
test_result = next(variant for variant in trend_result.variants if variant.key == "test") | ||
|
||
control_insight = next(variant for variant in trend_result.insight if variant["breakdown_value"] == "control") | ||
test_insight = next(variant for variant in trend_result.insight if variant["breakdown_value"] == "test") | ||
|
||
self.assertEqual(control_result.count, 1000) | ||
self.assertEqual(test_result.count, 102050) | ||
self.assertEqual(control_result.absolute_exposure, 1) | ||
self.assertEqual(test_result.absolute_exposure, 4) | ||
|
||
self.assertEqual( | ||
control_insight["data"][:10], | ||
[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], | ||
) | ||
self.assertEqual( | ||
test_insight["data"][:10], | ||
[0.0, 500.0, 1250.0, 101250.0, 101250.0, 102050.0, 102050.0, 102050.0, 102050.0, 102050.0], | ||
) | ||
|
||
def test_query_runner_with_data_warehouse_series_expected_query(self): | ||
table_name = self.create_data_warehouse_table_with_payments() | ||
|
||
|
@@ -1004,7 +1107,11 @@ def test_query_runner_with_data_warehouse_series_expected_query(self): | |
|
||
# Assert the expected join condition in the clickhouse SQL | ||
expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_7)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_8)s, 6, %(hogql_val_9)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_10)s, 6, %(hogql_val_11)s))))) AS e__events ON" | ||
self.assertIn(expected_join_condition, str(response.clickhouse)) | ||
self.assertIn( | ||
expected_join_condition, | ||
str(response.clickhouse), | ||
"Please check to make sure the timestamp statements are included in the ASOF LEFT JOIN select statement. This may also fail if the placeholder numbers have changed.", | ||
) | ||
|
||
result = query_runner.calculate() | ||
|
||
|