diff --git a/common/types/charts.ts b/common/types/charts.ts index 5df1e091..746ea099 100644 --- a/common/types/charts.ts +++ b/common/types/charts.ts @@ -22,6 +22,8 @@ export interface AggregateDataPoint { '50%': number; '75%': number; count: number; + bunched?: number; + on_time?: number; max: number; mean: number; min: number; diff --git a/common/utils/widgets.ts b/common/utils/widgets.ts index 3987bcb6..168adb64 100644 --- a/common/utils/widgets.ts +++ b/common/utils/widgets.ts @@ -69,6 +69,18 @@ const getAggDataPointsOfInterest = (aggData: AggregateDataPoint[]) => { return { average, min, max, median, p10, p90 }; }; +const getAggHeadwayDataPoints = (aggData: AggregateDataPoint[]) => { + const totalTrips = aggData.map((tt) => tt.count).reduce((a, b) => a + b, 0); + const bunchedTrips = aggData.map((tt) => tt.bunched).reduce((a = 0, b = 0) => a + b, 0) || 0; + const onTimeTrips = aggData.map((tt) => tt.on_time).reduce((a = 0, b = 0) => a + b, 0) || 0; + + const bunched = bunchedTrips / totalTrips; + const onTime = onTimeTrips / totalTrips; + + const { average, min, max, median, p10, p90 } = getAggDataPointsOfInterest(aggData); + return { average, min, max, median, p10, p90, bunched, onTime }; +}; + export const getAggDataWidgets = (aggData: AggregateDataPoint[], type: 'times' | 'speeds') => { const { average, min, max, median, p10, p90 } = getAggDataPointsOfInterest(aggData); return [ @@ -81,6 +93,20 @@ export const getAggDataWidgets = (aggData: AggregateDataPoint[], type: 'times' | ]; }; +export const getAggHeadwayDataWidgets = (aggData: AggregateDataPoint[], type: 'times') => { + const { average, min, max, median, p10, p90, bunched, onTime } = getAggHeadwayDataPoints(aggData); + return [ + { text: 'Avg', widgetValue: getWidget(type, average), type: 'data' }, + { text: 'Median', widgetValue: getWidget(type, median), type: 'data' }, + { text: '10%', widgetValue: getWidget(type, p10), type: 'data' }, + { text: '90%', widgetValue: getWidget(type, p90), type: 'data' }, + { text: 'Min', widgetValue: getWidget(type, min), type: 'data' }, + { text: 'Max', widgetValue: getWidget(type, max), type: 'data' }, + { text: 'Bunched Trips', widgetValue: new PercentageWidgetValue(bunched), type: 'data' }, + { text: 'On Time Trips', widgetValue: new PercentageWidgetValue(onTime), type: 'data' }, + ]; +}; + const getSingleDayNumberArray = ( data: SingleDayDataPoint[], type: 'traveltimes' | 'dwells' | 'headways' | 'speeds' diff --git a/modules/headways/HeadwaysAggregateWrapper.tsx b/modules/headways/HeadwaysAggregateWrapper.tsx index a92f4b8e..dd37c4b4 100644 --- a/modules/headways/HeadwaysAggregateWrapper.tsx +++ b/modules/headways/HeadwaysAggregateWrapper.tsx @@ -6,7 +6,7 @@ import { ChartPlaceHolder } from '../../common/components/graphics/ChartPlaceHol import { CarouselGraphDiv } from '../../common/components/charts/CarouselGraphDiv'; import { NoDataNotice } from '../../common/components/notices/NoDataNotice'; import { MiniWidgetCreator } from '../../common/components/widgets/MiniWidgetCreator'; -import { getAggDataWidgets } from '../../common/utils/widgets'; +import { getAggHeadwayDataWidgets } from '../../common/utils/widgets'; import { HeadwaysAggregateChart } from './charts/HeadwaysAggregateChart'; interface HeadwaysAggregateWrapperProps { @@ -24,7 +24,7 @@ export const HeadwaysAggregateWrapper: React.FC = if (!dataReady) return ; const headwaysData = query.data.by_date.filter((datapoint) => datapoint.peak === 'all'); if (headwaysData.length < 1) return ; - const widgetObjects = getAggDataWidgets(headwaysData, 'times'); + const widgetObjects = getAggHeadwayDataWidgets(headwaysData, 'times'); return ( diff --git a/server/chalicelib/aggregation.py b/server/chalicelib/aggregation.py index 810a2f1c..a6013a78 100644 --- a/server/chalicelib/aggregation.py +++ b/server/chalicelib/aggregation.py @@ -1,6 +1,7 @@ import datetime from chalicelib import data_funcs import pandas as pd +from pandas.core.groupby.generic import DataFrameGroupBy from pandas.tseries.holiday import USFederalHolidayCalendar import numpy as np @@ -8,7 +9,7 @@ SERVICE_HR_OFFSET = datetime.timedelta(hours=3, minutes=30) -def train_peak_status(df): +def train_peak_status(df: pd.DataFrame): cal = USFederalHolidayCalendar() holidays = cal.holidays(start=df["dep_dt"].min(), end=df["dep_dt"].max()) # pandas has a bug where sometimes empty holidays returns an Index and we need DateTimeIndex @@ -27,7 +28,7 @@ def train_peak_status(df): return df -def faster_describe(grouped): +def faster_describe(grouped: DataFrameGroupBy): # This does the same thing as pandas.DataFrame.describe(), but is up to 25x faster! # also, we can specify population std instead of sample. stats = grouped.aggregate(["count", "mean", "min", "median", "max", "sum"]) @@ -73,7 +74,7 @@ def aggregate_traveltime_data(start_date: datetime.date, end_date: datetime.date return df -def calc_travel_times_by_time(df): +def calc_travel_times_by_time(df: pd.DataFrame): # convert time of day to a consistent datetime relative to epoch timedeltas = pd.to_timedelta(df["dep_time"].astype(str)) timedeltas.loc[timedeltas < SERVICE_HR_OFFSET] += datetime.timedelta(days=1) @@ -85,7 +86,7 @@ def calc_travel_times_by_time(df): return stats -def calc_travel_times_by_date(df): +def calc_travel_times_by_date(df: pd.DataFrame): # get summary stats summary_stats = faster_describe(df.groupby("service_date")["travel_time_sec"]) summary_stats["peak"] = "all" @@ -149,6 +150,21 @@ def headways_over_time(start_date: datetime.date, end_date: datetime.date, stops # combine summary stats summary_stats_final = pd.concat([summary_stats, summary_stats_peak]) + grouped = df.groupby("service_date") + # Calculate the ratio of headway_time_sec to benchmark_headway_time_sec + df["benchmark_headway_time_sec"] = df["benchmark_headway_time_sec"].astype(float) + df["headway_ratio"] = df["headway_time_sec"] / df["benchmark_headway_time_sec"] + + # Calculate the count of trips under 0.5 (bunched) per service_date + bunched = grouped.apply(lambda x: (x["headway_ratio"] <= 0.5).sum()) + bunched.name = "bunched" + summary_stats_final = summary_stats_final.merge(bunched, on="service_date", how="left") + + # Calculate the count of trips between 0.75 and 1.25 (on-time) per service_date + on_time = grouped.apply(lambda x: ((x["headway_ratio"] < 1.25) & (x["headway_ratio"] > 0.75)).sum()) + on_time.name = "on_time" + summary_stats_final = summary_stats_final.merge(on_time, on="service_date", how="left") + # filter peak status results = summary_stats_final.loc[summary_stats_final["peak"] == "all"] # convert to dictionary