From 22d43fa66b8fe531b2da44b9446a25a6b90e4500 Mon Sep 17 00:00:00 2001 From: Devin Matte Date: Sat, 10 Feb 2024 22:27:00 -0500 Subject: [PATCH] Fixing aggregate query performance (#950) --- server/chalicelib/s3.py | 6 +++--- server/chalicelib/s3_historical.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/server/chalicelib/s3.py b/server/chalicelib/s3.py index c529fa88e..d0ce5180c 100644 --- a/server/chalicelib/s3.py +++ b/server/chalicelib/s3.py @@ -81,9 +81,9 @@ def parallel_download_events(datestop): def download_events(sdate, edate, stops: list): - # This used to be month_range but updated to date_range to support live ranges - # If something breaks, this may be why - datestops = itertools.product(parallel.date_range(sdate, edate), stops) + # This needs to be month_range for performance and memory, + # however, for data from gobble we'll need specific dates, not just first of the month + datestops = itertools.product(parallel.month_range(sdate, edate), stops) result = parallel_download_events(datestops) result = filter(lambda row: sdate.strftime("%Y-%m-%d") <= row["service_date"] <= edate.strftime("%Y-%m-%d"), result) return sorted(result, key=lambda row: row["event_time"]) diff --git a/server/chalicelib/s3_historical.py b/server/chalicelib/s3_historical.py index f0346f662..26f491e65 100644 --- a/server/chalicelib/s3_historical.py +++ b/server/chalicelib/s3_historical.py @@ -77,6 +77,7 @@ def headways(stop_ids: list, sdate, edate): headway_time_sec = delta.total_seconds() # Throw out any headways > 120 min + # TODO: We can't do this anymore for CR data if headway_time_sec > 120 * 60: continue