Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes to CrUX pipeline #36

Merged
merged 5 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 19 additions & 20 deletions definitions/output/core_web_vitals/technologies.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ crux AS (
WHEN 10000 THEN 'Top 10k'
WHEN 1000 THEN 'Top 1k'
END AS rank,
CONCAT(origin, '/') AS root_page_url,
CONCAT(origin, '/') AS root_page,
IF(device = 'desktop', 'desktop', 'mobile') AS client,

# CWV
Expand Down Expand Up @@ -94,9 +94,9 @@ crux AS (

technologies AS (
SELECT
technology.technology AS app,
technology.technology,
client,
page AS url
page
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS technology
WHERE
Expand All @@ -106,9 +106,9 @@ technologies AS (
technology.technology != ''
UNION ALL
SELECT
'ALL' AS app,
'ALL' AS technology,
client,
page AS url
page
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${pastMonth}'
Expand All @@ -117,18 +117,18 @@ UNION ALL

categories AS (
SELECT
technology.technology AS app,
technology.technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS technology,
UNNEST(technology.categories) AS category
WHERE
date = '${pastMonth}'
${constants.devRankFilter}
GROUP BY app
GROUP BY technology
UNION ALL
SELECT
'ALL' AS app,
'ALL' AS technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM ${ctx.ref('crawl', 'pages')},
UNNEST(technologies) AS technology,
Expand All @@ -142,8 +142,8 @@ UNION ALL
summary_stats AS (
SELECT
client,
page AS url,
root_page AS root_page_url,
page,
root_page AS root_page,
SAFE.INT64(summary.bytesTotal) AS bytesTotal,
SAFE.INT64(summary.bytesJS) AS bytesJS,
SAFE.INT64(summary.bytesImg) AS bytesImg,
Expand All @@ -161,8 +161,8 @@ summary_stats AS (
lab_data AS (
SELECT
client,
root_page_url,
app,
root_page,
technology,
ANY_VALUE(category) AS category,
AVG(bytesTotal) AS bytesTotal,
AVG(bytesJS) AS bytesJS,
Expand All @@ -174,21 +174,21 @@ lab_data AS (
AVG(seo) AS seo
FROM summary_stats
JOIN technologies
USING (client, url)
USING (client, page)
JOIN categories
USING (app)
USING (technology)
GROUP BY
client,
root_page_url,
app
root_page,
technology
)

SELECT
DATE('${pastMonth}') AS date,
geo,
rank,
ANY_VALUE(category) AS category,
app,
technology AS app,
client,
COUNT(0) AS origins,

Expand Down Expand Up @@ -226,9 +226,8 @@ SELECT
SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS median_bytes_image

FROM lab_data
JOIN crux
USING
(client, root_page_url)
INNER JOIN crux
USING (client, root_page)
GROUP BY
app,
geo,
Expand Down
2 changes: 1 addition & 1 deletion definitions/output/reports/cwv_tech_categories.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ technologies AS (
SELECT
category,
categories.origins,
ARRAY_AGG(technology ORDER BY technologies.origins DESC) AS technologies
ARRAY_AGG(technology IGNORE NULLS ORDER BY technologies.origins DESC) AS technologies
FROM categories
JOIN technologies
USING (category)
Expand Down
2 changes: 1 addition & 1 deletion infra/dataform-trigger/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ DECLARE previousMonth_YYYYMM STRING DEFAULT SUBSTR(previousMonth, 1, 6);
WITH crux AS (
SELECT
LOGICAL_AND(total_rows > 0) AS rows_available,
LOGICAL_AND(TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), last_modified_time, HOUR) < 7) AS recent_last_modified
LOGICAL_OR(TIMESTAMP_DIFF(CURRENT_TIMESTAMP(), last_modified_time, HOUR) < 8) AS recent_last_modified
FROM chrome-ux-report.materialized.INFORMATION_SCHEMA.PARTITIONS
WHERE table_name IN ('device_summary', 'country_summary')
AND partition_id IN (previousMonth, previousMonth_YYYYMM)
Expand Down
2 changes: 1 addition & 1 deletion infra/tf/function_dataform_trigger.tf
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ resource "google_cloud_scheduler_job" "bq-poller-crux-ready" {
paused = false
project = local.project
region = local.region
schedule = "0 */7 8-14 * *"
schedule = "0 */8 8-14 * *"
time_zone = "Etc/UTC"
http_target {
body = base64encode(local.crux_ready_scheduler_body)
Expand Down
Loading