Skip to content

Commit

Permalink
Add some basic aggregate bunched stats
Browse files Browse the repository at this point in the history
  • Loading branch information
devinmatte committed Feb 16, 2025
1 parent 7f6eda4 commit 5534b56
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 6 deletions.
2 changes: 2 additions & 0 deletions common/types/charts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ export interface AggregateDataPoint {
'50%': number;
'75%': number;
count: number;
bunched?: number;
on_time?: number;
max: number;
mean: number;
min: number;
Expand Down
26 changes: 26 additions & 0 deletions common/utils/widgets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ const getAggDataPointsOfInterest = (aggData: AggregateDataPoint[]) => {
return { average, min, max, median, p10, p90 };
};

const getAggHeadwayDataPoints = (aggData: AggregateDataPoint[]) => {
const totalTrips = aggData.map((tt) => tt.count).reduce((a, b) => a + b, 0);
const bunchedTrips = aggData.map((tt) => tt.bunched).reduce((a = 0, b = 0) => a + b, 0) || 0;
const onTimeTrips = aggData.map((tt) => tt.on_time).reduce((a = 0, b = 0) => a + b, 0) || 0;

const bunched = bunchedTrips / totalTrips;
const onTime = onTimeTrips / totalTrips;

const { average, min, max, median, p10, p90 } = getAggDataPointsOfInterest(aggData);
return { average, min, max, median, p10, p90, bunched, onTime };
};

export const getAggDataWidgets = (aggData: AggregateDataPoint[], type: 'times' | 'speeds') => {
const { average, min, max, median, p10, p90 } = getAggDataPointsOfInterest(aggData);
return [
Expand All @@ -81,6 +93,20 @@ export const getAggDataWidgets = (aggData: AggregateDataPoint[], type: 'times' |
];
};

export const getAggHeadwayDataWidgets = (aggData: AggregateDataPoint[], type: 'times') => {
const { average, min, max, median, p10, p90, bunched, onTime } = getAggHeadwayDataPoints(aggData);
return [
{ text: 'Avg', widgetValue: getWidget(type, average), type: 'data' },
{ text: 'Median', widgetValue: getWidget(type, median), type: 'data' },
{ text: '10%', widgetValue: getWidget(type, p10), type: 'data' },
{ text: '90%', widgetValue: getWidget(type, p90), type: 'data' },
{ text: 'Min', widgetValue: getWidget(type, min), type: 'data' },
{ text: 'Max', widgetValue: getWidget(type, max), type: 'data' },
{ text: 'Bunched Trips', widgetValue: new PercentageWidgetValue(bunched), type: 'data' },
{ text: 'On Time Trips', widgetValue: new PercentageWidgetValue(onTime), type: 'data' },
];
};

const getSingleDayNumberArray = (
data: SingleDayDataPoint[],
type: 'traveltimes' | 'dwells' | 'headways' | 'speeds'
Expand Down
4 changes: 2 additions & 2 deletions modules/headways/HeadwaysAggregateWrapper.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { ChartPlaceHolder } from '../../common/components/graphics/ChartPlaceHol
import { CarouselGraphDiv } from '../../common/components/charts/CarouselGraphDiv';
import { NoDataNotice } from '../../common/components/notices/NoDataNotice';
import { MiniWidgetCreator } from '../../common/components/widgets/MiniWidgetCreator';
import { getAggDataWidgets } from '../../common/utils/widgets';
import { getAggHeadwayDataWidgets } from '../../common/utils/widgets';
import { HeadwaysAggregateChart } from './charts/HeadwaysAggregateChart';

interface HeadwaysAggregateWrapperProps {
Expand All @@ -24,7 +24,7 @@ export const HeadwaysAggregateWrapper: React.FC<HeadwaysAggregateWrapperProps> =
if (!dataReady) return <ChartPlaceHolder query={query} />;
const headwaysData = query.data.by_date.filter((datapoint) => datapoint.peak === 'all');
if (headwaysData.length < 1) return <NoDataNotice />;
const widgetObjects = getAggDataWidgets(headwaysData, 'times');
const widgetObjects = getAggHeadwayDataWidgets(headwaysData, 'times');

return (
<CarouselGraphDiv>
Expand Down
24 changes: 20 additions & 4 deletions server/chalicelib/aggregation.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import datetime
from chalicelib import data_funcs
import pandas as pd
from pandas.core.groupby.generic import DataFrameGroupBy
from pandas.tseries.holiday import USFederalHolidayCalendar
import numpy as np

# This matches the cutoff used in MbtaPerformanceApi.py
SERVICE_HR_OFFSET = datetime.timedelta(hours=3, minutes=30)


def train_peak_status(df):
def train_peak_status(df: pd.DataFrame):
cal = USFederalHolidayCalendar()
holidays = cal.holidays(start=df["dep_dt"].min(), end=df["dep_dt"].max())
# pandas has a bug where sometimes empty holidays returns an Index and we need DateTimeIndex
Expand All @@ -27,7 +28,7 @@ def train_peak_status(df):
return df


def faster_describe(grouped):
def faster_describe(grouped: DataFrameGroupBy):
# This does the same thing as pandas.DataFrame.describe(), but is up to 25x faster!
# also, we can specify population std instead of sample.
stats = grouped.aggregate(["count", "mean", "min", "median", "max", "sum"])
Expand Down Expand Up @@ -73,7 +74,7 @@ def aggregate_traveltime_data(start_date: datetime.date, end_date: datetime.date
return df


def calc_travel_times_by_time(df):
def calc_travel_times_by_time(df: pd.DataFrame):
# convert time of day to a consistent datetime relative to epoch
timedeltas = pd.to_timedelta(df["dep_time"].astype(str))
timedeltas.loc[timedeltas < SERVICE_HR_OFFSET] += datetime.timedelta(days=1)
Expand All @@ -85,7 +86,7 @@ def calc_travel_times_by_time(df):
return stats


def calc_travel_times_by_date(df):
def calc_travel_times_by_date(df: pd.DataFrame):
# get summary stats
summary_stats = faster_describe(df.groupby("service_date")["travel_time_sec"])
summary_stats["peak"] = "all"
Expand Down Expand Up @@ -149,6 +150,21 @@ def headways_over_time(start_date: datetime.date, end_date: datetime.date, stops
# combine summary stats
summary_stats_final = pd.concat([summary_stats, summary_stats_peak])

grouped = df.groupby("service_date")
# Calculate the ratio of headway_time_sec to benchmark_headway_time_sec
df["benchmark_headway_time_sec"] = df["benchmark_headway_time_sec"].astype(float)
df["headway_ratio"] = df["headway_time_sec"] / df["benchmark_headway_time_sec"]

# Calculate the count of trips under 0.5 (bunched) per service_date
bunched = grouped.apply(lambda x: (x["headway_ratio"] <= 0.5).sum())
bunched.name = "bunched"
summary_stats_final = summary_stats_final.merge(bunched, on="service_date", how="left")

# Calculate the count of trips between 0.75 and 1.25 (on-time) per service_date
on_time = grouped.apply(lambda x: ((x["headway_ratio"] < 1.25) & (x["headway_ratio"] > 0.75)).sum())
on_time.name = "on_time"
summary_stats_final = summary_stats_final.merge(on_time, on="service_date", how="left")

# filter peak status
results = summary_stats_final.loc[summary_stats_final["peak"] == "all"]
# convert to dictionary
Expand Down

0 comments on commit 5534b56

Please sign in to comment.