From c47261732880659f11b70ef28157a8394e215dfb Mon Sep 17 00:00:00 2001
From: Gil Forsyth <gforsyth@users.noreply.github.com>
Date: Wed, 5 Feb 2025 12:00:27 -0500
Subject: [PATCH] fix(check-nightly): multiple daily runs don't overwrite each
 other (#43)

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 .../check-nightly-success/check.py            | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/check_nightly_success/check-nightly-success/check.py b/check_nightly_success/check-nightly-success/check.py
index 9a05711..822c9b6 100644
--- a/check_nightly_success/check-nightly-success/check.py
+++ b/check_nightly_success/check-nightly-success/check.py
@@ -4,10 +4,10 @@
 # ruff: noqa: INP001
 
 import argparse
-import itertools
 import os
 import re
 import sys
+from collections import defaultdict
 from datetime import datetime
 
 import requests
@@ -52,7 +52,24 @@ def main(
     now = datetime.now(tz=tz)
 
     latest_success = {}
-    for branch, branch_runs in itertools.groupby(runs, key=lambda r: r["head_branch"]):
+    # Rather frustratingly, the workflow runs returned from the GitHub API can
+    # have alternating ordering of `head_branch`
+    # e.g.
+    #   run[0]['head_branch'] == "branch-25.02"
+    #   run[1]['head_branch'] == "branch-25.04"
+    #   run[2]['head_branch'] == "branch-25.02"
+    #
+    # In this situation, the behavior of `itertools.groupby` (previously used
+    # here) is to only group _consecutive_ runs, so the results of the
+    # subsequent branch match (i.e.  the second group of `branch-25.02` runs)
+    # will overwrite the results of the first one, potentially overwriting a
+    # previous success. The snippet below unifies the groups so it's more like a
+    # SQL groupby and there is no chance of overwriting.
+    branch_dict = defaultdict(list)
+    for run in runs:
+        branch_dict[run["head_branch"]].append(run)
+
+    for branch, branch_runs in branch_dict.items():
         # only consider RAPIDS release branches, which have versions like
         # '25.02' (RAPIDS) or '0.42' (ucxx, ucx-py)
         if not re.match("branch-[0-9]{1,2}.[0-9]{2}", branch):