From c47261732880659f11b70ef28157a8394e215dfb Mon Sep 17 00:00:00 2001 From: Gil Forsyth Date: Wed, 5 Feb 2025 12:00:27 -0500 Subject: [PATCH] fix(check-nightly): multiple daily runs don't overwrite each other (#43) Co-authored-by: Bradley Dice --- .../check-nightly-success/check.py | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py index 9a05711..822c9b6 100644 --- a/check_nightly_success/check-nightly-success/check.py +++ b/check_nightly_success/check-nightly-success/check.py @@ -4,10 +4,10 @@ # ruff: noqa: INP001 import argparse -import itertools import os import re import sys +from collections import defaultdict from datetime import datetime import requests @@ -52,7 +52,24 @@ def main( now = datetime.now(tz=tz) latest_success = {} - for branch, branch_runs in itertools.groupby(runs, key=lambda r: r["head_branch"]): + # Rather frustratingly, the workflow runs returned from the GitHub API can + # have alternating ordering of `head_branch` + # e.g. + # run[0]['head_branch'] == "branch-25.02" + # run[1]['head_branch'] == "branch-25.04" + # run[2]['head_branch'] == "branch-25.02" + # + # In this situation, the behavior of `itertools.groupby` (previously used + # here) is to only group _consecutive_ runs, so the results of the + # subsequent branch match (i.e. the second group of `branch-25.02` runs) + # will overwrite the results of the first one, potentially overwriting a + # previous success. The snippet below unifies the groups so it's more like a + # SQL groupby and there is no chance of overwriting. + branch_dict = defaultdict(list) + for run in runs: + branch_dict[run["head_branch"]].append(run) + + for branch, branch_runs in branch_dict.items(): # only consider RAPIDS release branches, which have versions like # '25.02' (RAPIDS) or '0.42' (ucxx, ucx-py) if not re.match("branch-[0-9]{1,2}.[0-9]{2}", branch):