From b37a29ae0855c00ff35b86d4d1dbb27605108d08 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 9 Jan 2025 21:12:36 +0000 Subject: [PATCH] Update query snapshots --- .../test_clickhouse_stickiness.ambr | 562 ++++++++------ .../__snapshots__/test_clickhouse_trends.ambr | 719 ++++++++++-------- 2 files changed, 737 insertions(+), 544 deletions(-) diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_stickiness.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_stickiness.ambr index b2eb1c11b7b61..0d92ce9f57ab6 100644 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_stickiness.ambr +++ b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_stickiness.ambr @@ -2,35 +2,40 @@ # name: TestClickhouseStickiness.test_aggregate_by_groups ''' /* user_id:0 request:_snapshot_ */ - SELECT countDistinct(aggregation_target), - num_intervals + SELECT groupArray(num_actors) AS counts, + groupArray(num_intervals) AS intervals FROM - (SELECT e."$group_0" AS aggregation_target, - countDistinct(toStartOfWeek(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'), 0)) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') - AND event = 'watched movie' - AND (NOT has([''], "$group_0")) - GROUP BY aggregation_target) - WHERE num_intervals <= 8 - GROUP BY num_intervals - ORDER BY num_intervals + (SELECT sum(num_actors) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT 0 AS num_actors, + plus(numbers.number, 1) AS num_intervals + FROM numbers(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), 0), plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-02-15 23:59:59', 6, 'UTC')), 0), toIntervalWeek(1)))) AS numbers + UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT aggregation_target AS aggregation_target, + count() AS num_intervals + FROM + (SELECT e.`$group_0` AS aggregation_target, + toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS start_of_interval + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-02-15 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(notEquals(nullIf(nullIf(e.`$group_0`, ''), 'null'), ''), 1), notEquals(e.`$group_0`, '')) + GROUP BY aggregation_target, + start_of_interval + HAVING ifNull(greater(count(), 0), 0)) + GROUP BY aggregation_target) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: TestClickhouseStickiness.test_aggregate_by_groups.1 @@ -138,108 +143,147 @@ # name: TestClickhouseStickiness.test_compare ''' /* user_id:0 request:_snapshot_ */ - SELECT countDistinct(aggregation_target), - num_intervals + SELECT groupArray(num_actors) AS counts, + groupArray(num_intervals) AS intervals FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND event = 'watched movie' - GROUP BY aggregation_target) - WHERE num_intervals <= 9 - GROUP BY num_intervals - ORDER BY num_intervals + (SELECT sum(num_actors) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT 0 AS num_actors, + plus(numbers.number, 1) AS num_intervals + FROM numbers(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(1)))) AS numbers + UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT aggregation_target AS aggregation_target, + count() AS num_intervals + FROM + (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie')) + GROUP BY aggregation_target, + start_of_interval + HAVING ifNull(greater(count(), 0), 0)) + GROUP BY aggregation_target) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: TestClickhouseStickiness.test_compare.1 ''' /* user_id:0 request:_snapshot_ */ - SELECT countDistinct(aggregation_target), - num_intervals + SELECT groupArray(num_actors) AS counts, + groupArray(num_intervals) AS intervals FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-24 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2019-12-31 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-24 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2019-12-31 23:59:59', 'UTC') - AND event = 'watched movie' - GROUP BY aggregation_target) - WHERE num_intervals <= 9 - GROUP BY num_intervals - ORDER BY num_intervals + (SELECT sum(num_actors) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT 0 AS num_actors, + plus(numbers.number, 1) AS num_intervals + FROM numbers(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), toIntervalDay(1)))) AS numbers + UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT aggregation_target AS aggregation_target, + count() AS num_intervals + FROM + (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie')) + GROUP BY aggregation_target, + start_of_interval + HAVING ifNull(greater(count(), 0), 0)) + GROUP BY aggregation_target) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: TestClickhouseStickiness.test_filter_by_group_properties ''' /* user_id:0 request:_snapshot_ */ - SELECT countDistinct(aggregation_target), - num_intervals + SELECT groupArray(num_actors) AS counts, + groupArray(num_intervals) AS intervals FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfWeek(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'), 0)) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') - AND event = 'watched movie' - AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - GROUP BY aggregation_target) - WHERE num_intervals <= 8 - GROUP BY num_intervals - ORDER BY num_intervals + (SELECT sum(num_actors) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT 0 AS num_actors, + plus(numbers.number, 1) AS num_intervals + FROM numbers(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), 0), plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-02-15 23:59:59', 6, 'UTC')), 0), toIntervalWeek(1)))) AS numbers + UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT aggregation_target AS aggregation_target, + count() AS num_intervals + FROM + (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS start_of_interval + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-02-15 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__group_0.properties___industry, 'technology'), 0)) + GROUP BY aggregation_target, + start_of_interval + HAVING ifNull(greater(count(), 0), 0)) + GROUP BY aggregation_target) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: TestClickhouseStickiness.test_filter_by_group_properties.1 @@ -376,34 +420,58 @@ # name: TestClickhouseStickiness.test_stickiness_all_time.1 ''' /* user_id:0 request:_snapshot_ */ - SELECT countDistinct(aggregation_target), - num_intervals + SELECT groupArray(num_actors) AS counts, + groupArray(num_intervals) AS intervals FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND event = 'watched movie' - GROUP BY aggregation_target) - WHERE num_intervals <= 9 - GROUP BY num_intervals - ORDER BY num_intervals + (SELECT sum(num_actors) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT 0 AS num_actors, + plus(numbers.number, 1) AS num_intervals + FROM numbers(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC'))), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(1)))) AS numbers + UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT aggregation_target AS aggregation_target, + count() AS num_intervals + FROM + (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie')) + GROUP BY aggregation_target, + start_of_interval + HAVING ifNull(greater(count(), 0), 0)) + GROUP BY aggregation_target) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestClickhouseStickiness.test_stickiness_all_time.2 + ''' + /* user_id:0 request:_snapshot_ */ + SELECT timestamp + from events + WHERE team_id = 99999 + AND timestamp > '2015-01-01' + order by timestamp + limit 1 ''' # --- # name: TestClickhouseStickiness.test_stickiness_all_time_with_sampling @@ -420,67 +488,104 @@ # name: TestClickhouseStickiness.test_stickiness_all_time_with_sampling.1 ''' /* user_id:0 request:_snapshot_ */ - SELECT countDistinct(aggregation_target), - num_intervals + SELECT groupArray(num_actors) AS counts, + groupArray(num_intervals) AS intervals FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals - FROM events e SAMPLE 1.0 - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND event = 'watched movie' - GROUP BY aggregation_target) - WHERE num_intervals <= 9 - GROUP BY num_intervals - ORDER BY num_intervals + (SELECT sum(num_actors) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT 0 AS num_actors, + plus(numbers.number, 1) AS num_intervals + FROM numbers(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC'))), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(1)))) AS numbers + UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT aggregation_target AS aggregation_target, + count() AS num_intervals + FROM + (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval + FROM events AS e SAMPLE 1.0 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie')) + GROUP BY aggregation_target, + start_of_interval + HAVING ifNull(greater(count(), 0), 0)) + GROUP BY aggregation_target) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestClickhouseStickiness.test_stickiness_all_time_with_sampling.2 + ''' + /* user_id:0 request:_snapshot_ */ + SELECT timestamp + from events + WHERE team_id = 99999 + AND timestamp > '2015-01-01' + order by timestamp + limit 1 ''' # --- # name: TestClickhouseStickiness.test_stickiness_hours ''' /* user_id:0 request:_snapshot_ */ - SELECT countDistinct(aggregation_target), - num_intervals + SELECT groupArray(num_actors) AS counts, + groupArray(num_intervals) AS intervals FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfHour(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfHour(toDateTime('2020-01-01 12:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-01 20:00:00', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfHour(toDateTime('2020-01-01 12:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-01 20:00:00', 'UTC') - AND event = 'watched movie' - GROUP BY aggregation_target) - WHERE num_intervals <= 10 - GROUP BY num_intervals - ORDER BY num_intervals + (SELECT sum(num_actors) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT 0 AS num_actors, + plus(numbers.number, 1) AS num_intervals + FROM numbers(dateDiff('hour', toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC'))), plus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 20:00:00', 6, 'UTC'))), toIntervalHour(1)))) AS numbers + UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT aggregation_target AS aggregation_target, + count() AS num_intervals + FROM + (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 20:00:00', 6, 'UTC'))), equals(e.event, 'watched movie')) + GROUP BY aggregation_target, + start_of_interval + HAVING ifNull(greater(count(), 0), 0)) + GROUP BY aggregation_target) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: TestClickhouseStickiness.test_stickiness_people_endpoint @@ -591,28 +696,47 @@ # name: TestClickhouseStickiness.test_stickiness_with_person_on_events_v2.1 ''' /* user_id:0 request:_snapshot_ */ - SELECT countDistinct(aggregation_target), - num_intervals + SELECT groupArray(num_actors) AS counts, + groupArray(num_intervals) AS intervals FROM - (SELECT if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND event = 'watched movie' - AND notEmpty(e.person_id) - GROUP BY aggregation_target) - WHERE num_intervals <= 9 - GROUP BY num_intervals - ORDER BY num_intervals + (SELECT sum(num_actors) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT 0 AS num_actors, + plus(numbers.number, 1) AS num_intervals + FROM numbers(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(1)))) AS numbers + UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, + num_intervals AS num_intervals + FROM + (SELECT aggregation_target AS aggregation_target, + count() AS num_intervals + FROM + (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie')) + GROUP BY aggregation_target, + start_of_interval + HAVING ifNull(greater(count(), 0), 0)) + GROUP BY aggregation_target) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + GROUP BY num_intervals + ORDER BY num_intervals ASC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: TestClickhouseStickiness.test_timezones diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr index 96fa7e98d64e0..4d1cee60be802 100644 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr +++ b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr @@ -2,12 +2,17 @@ # name: ClickhouseTestTrends.test_insight_trends_aggregate ''' /* user_id:0 request:_snapshot_ */ - SELECT count(*) AS total - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') + SELECT count() AS total + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + ORDER BY 1 DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrends.test_insight_trends_aggregate.1 @@ -49,41 +54,35 @@ # name: ClickhouseTestTrends.test_insight_trends_basic ''' /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total FROM - (SELECT SUM(total) AS count, - day_start + (SELECT sum(total) AS count, + day_start AS day_start FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-15 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), toDateTime('2012-01-15 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT pdi.person_id) AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY date) + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + GROUP BY day_start) GROUP BY day_start - ORDER BY day_start) + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrends.test_insight_trends_basic.1 @@ -190,63 +189,72 @@ # name: ClickhouseTestTrends.test_insight_trends_cumulative ''' /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total FROM - (SELECT SUM(total) AS count, - day_start + (SELECT day_start AS day_start, + sum(count) OVER ( + ORDER BY day_start ASC) AS count FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-15 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), toDateTime('2012-01-15 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')) - UNION ALL SELECT count(*) AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY date) - GROUP BY day_start - ORDER BY day_start) + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT count() AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + GROUP BY day_start) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrends.test_insight_trends_cumulative.1 ''' /* user_id:0 request:_snapshot_ */ - SELECT person_id AS actor_id, - count() AS actor_value + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - e.distinct_id as distinct_id, - e.team_id as team_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC') ) - GROUP BY actor_id - ORDER BY actor_value DESC, - actor_id DESC - LIMIT 100 - OFFSET 0 + (SELECT day_start AS day_start, + sum(count) OVER ( + ORDER BY day_start ASC) AS count + FROM + (SELECT sum(total) AS count, + day_start AS day_start + FROM + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + GROUP BY if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) + GROUP BY day_start + ORDER BY day_start ASC) + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrends.test_insight_trends_cumulative.10 @@ -366,97 +374,182 @@ # name: ClickhouseTestTrends.test_insight_trends_cumulative.2 ''' /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM - (SELECT SUM(total) AS count, - day_start + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-15 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), toDateTime('2012-01-15 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')) - UNION ALL SELECT COUNT(DISTINCT actor_id) AS total, - toStartOfDay(toTimeZone(toDateTime(first_seen_timestamp, 'UTC'), 'UTC')) AS date + (SELECT day_start AS day_start, + sum(count) OVER (PARTITION BY breakdown_value + ORDER BY day_start ASC) AS count, + breakdown_value AS breakdown_value FROM - (SELECT pdi.person_id AS actor_id, - min(timestamp) AS first_seen_timestamp - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY actor_id) - GROUP BY date) - GROUP BY day_start - ORDER BY day_start) + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count() AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + GROUP BY day_start, + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + ORDER BY day_start ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrends.test_insight_trends_cumulative.3 ''' /* user_id:0 request:_snapshot_ */ - SELECT person_id AS actor_id, - count() AS actor_value + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - e.distinct_id as distinct_id, - e.team_id as team_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC') ) - GROUP BY actor_id - ORDER BY actor_value DESC, - actor_id DESC - LIMIT 100 - OFFSET 0 + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT day_start AS day_start, + sum(count) OVER (PARTITION BY breakdown_value + ORDER BY day_start ASC) AS count, + breakdown_value AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT counts AS total, + toStartOfDay(timestamp) AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT d.timestamp AS timestamp, + count(DISTINCT e.actor_id) AS counts, + e.breakdown_value AS breakdown_value + FROM + (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp + FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC')))) AS numbers) AS d + CROSS JOIN + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS actor_id, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'key'), ''), 'null'), '^"|"$', '') AS properties___key + FROM person + WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id) + WHERE and(equals(e.team_id, 99999), and(equals(e.event, '$pageview'), ifNull(equals(e__person.properties___key, 'some_val'), 0), ifNull(equals(e__person.properties___key, 'some_val'), 0)), greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC')))) + GROUP BY timestamp, actor_id, + breakdown_value) AS e + WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) + GROUP BY d.timestamp, + breakdown_value + ORDER BY d.timestamp ASC) + WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), 0))) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + ORDER BY day_start ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrends.test_insight_trends_cumulative.4 ''' /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 + SELECT groupArray(1)(date)[1] AS date, + arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, + if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value + FROM + (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, + breakdown_value AS breakdown_value, + rowNumberInAllBlocks() AS row_number + FROM + (SELECT day_start AS day_start, + sum(count) OVER (PARTITION BY breakdown_value + ORDER BY day_start ASC) AS count, + breakdown_value AS breakdown_value + FROM + (SELECT sum(total) AS count, + day_start AS day_start, + breakdown_value AS breakdown_value + FROM + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start, + ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value + FROM events AS e SAMPLE 1 + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + GROUP BY if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), + breakdown_value) + GROUP BY day_start, + breakdown_value + ORDER BY day_start ASC, breakdown_value ASC) + ORDER BY day_start ASC) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) + WHERE isNotNull(breakdown_value) + GROUP BY breakdown_value + ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrends.test_insight_trends_cumulative.5 @@ -723,189 +816,164 @@ # name: ClickhouseTestTrendsCaching.test_insight_trends_merging ''' /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total FROM - (SELECT SUM(total) AS count, - day_start + (SELECT sum(total) AS count, + day_start AS day_start FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-15 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), toDateTime('2012-01-15 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT pdi.person_id) AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY date) + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + GROUP BY day_start) GROUP BY day_start - ORDER BY day_start) + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrendsCaching.test_insight_trends_merging.1 ''' /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total FROM - (SELECT SUM(total) AS count, - day_start + (SELECT sum(total) AS count, + day_start AS day_start FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-15 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2012-01-14 00:00:00', 'UTC')), toDateTime('2012-01-15 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-14 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT pdi.person_id) AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-14 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-14 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY date) + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + GROUP BY day_start) GROUP BY day_start - ORDER BY day_start) + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrendsCaching.test_insight_trends_merging_skipped_interval ''' /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-14 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total FROM - (SELECT SUM(total) AS count, - day_start + (SELECT sum(total) AS count, + day_start AS day_start FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-14 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2011-12-31 00:00:00', 'UTC')), toDateTime('2012-01-14 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2011-12-31 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT pdi.person_id) AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2011-12-31 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2011-12-31 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC') - GROUP BY date) + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-14 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + GROUP BY day_start) GROUP BY day_start - ORDER BY day_start) + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrendsCaching.test_insight_trends_merging_skipped_interval.1 ''' /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-16 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total FROM - (SELECT SUM(total) AS count, - day_start + (SELECT sum(total) AS count, + day_start AS day_start FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-16 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2012-01-02 00:00:00', 'UTC')), toDateTime('2012-01-16 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-02 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT pdi.person_id) AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e + (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-02 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-16 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-02 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-16 23:59:59', 'UTC') - GROUP BY date) + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-16 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + GROUP BY day_start) GROUP BY day_start - ORDER BY day_start) + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrendsGroups.test_aggregating_by_group ''' /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total FROM - (SELECT SUM(total) AS count, - day_start + (SELECT sum(total) AS count, + day_start AS day_start FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-12 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), toDateTime('2020-01-12 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT "$group_0") AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND "$group_0" != '' - GROUP BY date) + (SELECT count(DISTINCT e.`$group_0`) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview'), ifNull(notEquals(nullIf(nullIf(e.`$group_0`, ''), 'null'), ''), 1), notEquals(e.`$group_0`, '')) + GROUP BY day_start) GROUP BY day_start - ORDER BY day_start) + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrendsGroups.test_aggregating_by_group.1 @@ -934,27 +1002,28 @@ # name: ClickhouseTestTrendsGroups.test_aggregating_by_session ''' /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total + SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, + arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) + and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total FROM - (SELECT SUM(total) AS count, - day_start + (SELECT sum(total) AS count, + day_start AS day_start FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-12 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), toDateTime('2020-01-12 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) - UNION ALL SELECT count(DISTINCT e."$session_id") AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') - GROUP BY date) + (SELECT count(DISTINCT e.`$session_id`) AS total, + toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start + FROM events AS e SAMPLE 1 + WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) + GROUP BY day_start) GROUP BY day_start - ORDER BY day_start) + ORDER BY day_start ASC) + ORDER BY arraySum(total) DESC + LIMIT 50000 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 ''' # --- # name: ClickhouseTestTrendsGroups.test_aggregating_by_session.1