From 6b46930ddd765a6c863171e8597e84eea97a062a Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 10 Dec 2024 09:56:33 +0100 Subject: [PATCH 1/4] skip null technologies --- definitions/output/reports/cwv_tech_categories.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/definitions/output/reports/cwv_tech_categories.js b/definitions/output/reports/cwv_tech_categories.js index 62664fe..6bccbf3 100644 --- a/definitions/output/reports/cwv_tech_categories.js +++ b/definitions/output/reports/cwv_tech_categories.js @@ -32,6 +32,7 @@ technologies AS ( FROM pages, UNNEST(technologies) AS t, UNNEST(t.categories) AS category + WHERE technology IS NOT NULL GROUP BY category, technology @@ -42,7 +43,7 @@ SELECT categories.origins, ARRAY_AGG(technology ORDER BY technologies.origins DESC) AS technologies FROM categories -JOIN technologies +LEFT JOIN technologies USING (category) GROUP BY category, From 18289118624d3601307dbb1c45ed7c501482f8a9 Mon Sep 17 00:00:00 2001 From: Max Ostapenko Date: Tue, 10 Dec 2024 09:02:01 +0000 Subject: [PATCH 2/4] ignore null technologies --- definitions/output/reports/cwv_tech_categories.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/definitions/output/reports/cwv_tech_categories.js b/definitions/output/reports/cwv_tech_categories.js index 6bccbf3..9b637a9 100644 --- a/definitions/output/reports/cwv_tech_categories.js +++ b/definitions/output/reports/cwv_tech_categories.js @@ -32,7 +32,6 @@ technologies AS ( FROM pages, UNNEST(technologies) AS t, UNNEST(t.categories) AS category - WHERE technology IS NOT NULL GROUP BY category, technology @@ -41,9 +40,9 @@ technologies AS ( SELECT category, categories.origins, - ARRAY_AGG(technology ORDER BY technologies.origins DESC) AS technologies + ARRAY_AGG(technology IGNORE NULLS ORDER BY technologies.origins DESC) AS technologies FROM categories -LEFT JOIN technologies +JOIN technologies USING (category) GROUP BY category, From 05164fa715028ed8d5558fdca72492cc1e9aae51 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 10 Dec 2024 19:00:33 +0100 Subject: [PATCH 3/4] sql review --- .../output/core_web_vitals/technologies.js | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/definitions/output/core_web_vitals/technologies.js b/definitions/output/core_web_vitals/technologies.js index 66f99c8..d307929 100644 --- a/definitions/output/core_web_vitals/technologies.js +++ b/definitions/output/core_web_vitals/technologies.js @@ -61,7 +61,7 @@ crux AS ( WHEN 10000 THEN 'Top 10k' WHEN 1000 THEN 'Top 1k' END AS rank, - CONCAT(origin, '/') AS root_page_url, + CONCAT(origin, '/') AS root_page, IF(device = 'desktop', 'desktop', 'mobile') AS client, # CWV @@ -94,9 +94,9 @@ crux AS ( technologies AS ( SELECT - technology.technology AS app, + technology.technology, client, - page AS url + page FROM ${ctx.ref('crawl', 'pages')}, UNNEST(technologies) AS technology WHERE @@ -106,9 +106,9 @@ technologies AS ( technology.technology != '' UNION ALL SELECT - 'ALL' AS app, + 'ALL' AS technology, client, - page AS url + page FROM ${ctx.ref('crawl', 'pages')} WHERE date = '${pastMonth}' @@ -117,7 +117,7 @@ UNION ALL categories AS ( SELECT - technology.technology AS app, + technology.technology, ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category FROM ${ctx.ref('crawl', 'pages')}, UNNEST(technologies) AS technology, @@ -125,10 +125,10 @@ categories AS ( WHERE date = '${pastMonth}' ${constants.devRankFilter} - GROUP BY app + GROUP BY technology UNION ALL SELECT - 'ALL' AS app, + 'ALL' AS technology, ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category FROM ${ctx.ref('crawl', 'pages')}, UNNEST(technologies) AS technology, @@ -142,8 +142,8 @@ UNION ALL summary_stats AS ( SELECT client, - page AS url, - root_page AS root_page_url, + page, + root_page AS root_page, SAFE.INT64(summary.bytesTotal) AS bytesTotal, SAFE.INT64(summary.bytesJS) AS bytesJS, SAFE.INT64(summary.bytesImg) AS bytesImg, @@ -161,8 +161,8 @@ summary_stats AS ( lab_data AS ( SELECT client, - root_page_url, - app, + root_page, + technology, ANY_VALUE(category) AS category, AVG(bytesTotal) AS bytesTotal, AVG(bytesJS) AS bytesJS, @@ -174,13 +174,13 @@ lab_data AS ( AVG(seo) AS seo FROM summary_stats JOIN technologies - USING (client, url) + USING (client, page) JOIN categories - USING (app) + USING (technology) GROUP BY client, - root_page_url, - app + root_page, + technology ) SELECT @@ -188,7 +188,7 @@ SELECT geo, rank, ANY_VALUE(category) AS category, - app, + technology AS app, client, COUNT(0) AS origins, @@ -226,9 +226,8 @@ SELECT SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS median_bytes_image FROM lab_data -JOIN crux -USING - (client, root_page_url) +INNER JOIN crux +USING (client, root_page) GROUP BY app, geo, From a1c8e6fc887f880db5006d22de4bafaaf7f2c6eb Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 10 Dec 2024 19:01:33 +0100 Subject: [PATCH 4/4] updated data fixed --- infra/dataform-trigger/index.js | 2 +- infra/tf/function_dataform_trigger.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/infra/dataform-trigger/index.js b/infra/dataform-trigger/index.js index 252b268..345c623 100644 --- a/infra/dataform-trigger/index.js +++ b/infra/dataform-trigger/index.js @@ -12,7 +12,7 @@ DECLARE previousMonth_YYYYMM STRING DEFAULT SUBSTR(previousMonth, 1, 6); WITH crux AS ( SELECT LOGICAL_AND(total_rows > 0) AS rows_available, - LOGICAL_AND(TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), last_modified_time, HOUR) < 7) AS recent_last_modified + LOGICAL_OR(TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), last_modified_time, HOUR) < 8) AS recent_last_modified FROM chrome-ux-report.materialized.INFORMATION_SCHEMA.PARTITIONS WHERE table_name IN ('device_summary', 'country_summary') AND partition_id IN (previousMonth, previousMonth_YYYYMM) diff --git a/infra/tf/function_dataform_trigger.tf b/infra/tf/function_dataform_trigger.tf index 3d9fd14..4b4af59 100644 --- a/infra/tf/function_dataform_trigger.tf +++ b/infra/tf/function_dataform_trigger.tf @@ -105,7 +105,7 @@ resource "google_cloud_scheduler_job" "bq-poller-crux-ready" { paused = false project = local.project region = local.region - schedule = "0 */7 8-14 * *" + schedule = "0 */8 8-14 * *" time_zone = "Etc/UTC" http_target { body = base64encode(local.crux_ready_scheduler_body)