From 22d43fa66b8fe531b2da44b9446a25a6b90e4500 Mon Sep 17 00:00:00 2001
From: Devin Matte <devinmatte@gmail.com>
Date: Sat, 10 Feb 2024 22:27:00 -0500
Subject: [PATCH] Fixing aggregate query performance (#950)

---
 server/chalicelib/s3.py            | 6 +++---
 server/chalicelib/s3_historical.py | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/server/chalicelib/s3.py b/server/chalicelib/s3.py
index c529fa88e..d0ce5180c 100644
--- a/server/chalicelib/s3.py
+++ b/server/chalicelib/s3.py
@@ -81,9 +81,9 @@ def parallel_download_events(datestop):
 
 
 def download_events(sdate, edate, stops: list):
-    # This used to be month_range but updated to date_range to support live ranges
-    # If something breaks, this may be why
-    datestops = itertools.product(parallel.date_range(sdate, edate), stops)
+    # This needs to be month_range for performance and memory,
+    # however, for data from gobble we'll need specific dates, not just first of the month
+    datestops = itertools.product(parallel.month_range(sdate, edate), stops)
     result = parallel_download_events(datestops)
     result = filter(lambda row: sdate.strftime("%Y-%m-%d") <= row["service_date"] <= edate.strftime("%Y-%m-%d"), result)
     return sorted(result, key=lambda row: row["event_time"])
diff --git a/server/chalicelib/s3_historical.py b/server/chalicelib/s3_historical.py
index f0346f662..26f491e65 100644
--- a/server/chalicelib/s3_historical.py
+++ b/server/chalicelib/s3_historical.py
@@ -77,6 +77,7 @@ def headways(stop_ids: list, sdate, edate):
         headway_time_sec = delta.total_seconds()
 
         # Throw out any headways > 120 min
+        # TODO: We can't do this anymore for CR data
         if headway_time_sec > 120 * 60:
             continue