Skip to content

Commit

Permalink
Merge pull request #25 from UDST/enable-service_ids-from-calendar_dates
Browse files Browse the repository at this point in the history
Enable service ids from calendar dates
  • Loading branch information
sablanchard authored May 10, 2017
2 parents c36dc93 + ecc3e84 commit 2f37f8e
Show file tree
Hide file tree
Showing 7 changed files with 569 additions and 218 deletions.
4 changes: 2 additions & 2 deletions urbanaccess/gtfs/headways.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging as lg

from urbanaccess.utils import log
from urbanaccess.gtfs.network import _timeselector
from urbanaccess.gtfs.network import _time_selector

warnings.simplefilter(action = "ignore", category = FutureWarning)

Expand Down Expand Up @@ -89,7 +89,7 @@ def _headway_handler(interpolated_stop_times_df, trips_df,
columns = ['unique_route_id','route_long_name','route_type','unique_agency_id']
routes_df = routes_df[columns]

selected_interpolated_stop_times_df = _timeselector(df=interpolated_stop_times_df, starttime=headway_timerange[0], endtime=headway_timerange[1])
selected_interpolated_stop_times_df = _time_selector(df=interpolated_stop_times_df, starttime=headway_timerange[0], endtime=headway_timerange[1])

tmp1 = pd.merge(trips_df, routes_df, how='left', left_on='unique_route_id', right_on='unique_route_id', sort=False)
merge_df = pd.merge(selected_interpolated_stop_times_df, tmp1, how='left',
Expand Down
9 changes: 6 additions & 3 deletions urbanaccess/gtfs/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ def _txt_header_whitespace_check(csv_rootpath=os.path.join(config.settings.data_
f.writelines(lines)
log('GTFS text file header whitespace check completed. Took {:,.2f} seconds'.format(time.time()-start_time))

def gtfsfeed_to_df(gtfsfeed_path=None,validation=False,verbose=True,bbox=None,remove_stops_outsidebbox=None,append_definitions=False):
def gtfsfeed_to_df(gtfsfeed_path=None, validation=False, verbose=True,
bbox=None, remove_stops_outsidebbox=None,
append_definitions=False):
"""
Read all GTFS feed components as a dataframe in a gtfsfeeds_dfs object and
merge all individual GTFS feeds into a regional metropolitan data table.
Expand Down Expand Up @@ -125,8 +127,9 @@ def gtfsfeed_to_df(gtfsfeed_path=None,validation=False,verbose=True,bbox=None,re
remove_stops_outsidebbox : bool
if true stops that are outside the bbox will be removed
append_definitions : bool
if true, columns that use the GTFS data schema for their attribute codes will have the corresponding GTFS
definition information of that code appended to the resulting dataframes for reference
if true, columns that use the GTFS data schema for their attribute
codes will have the corresponding GTFS definition information of
that code appended to the resulting dataframes for reference
Returns
-------
Expand Down
755 changes: 545 additions & 210 deletions urbanaccess/gtfs/network.py

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions urbanaccess/gtfs/utils_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@ def _calendar_agencyid(calendar_df=None, routes_df=None, trips_df=None, agency_d
"""
tmp1 = pd.merge(routes_df, agency_df, how='left', on='agency_id', sort=False, copy=False)
tmp2 = pd.merge(trips_df, tmp1, how='left', on='route_id', sort=False, copy=False)
# do another merge to account for service ids that may not be utilized
# across all GTFS files for accounting purposes so we keep those that
# dont show up after merge
merged_df = pd.merge(calendar_df[['service_id']], tmp2, how='left', on='service_id', sort=False, copy=False)
merged_df['unique_agency_id'] = _generate_unique_agency_id(merged_df, 'agency_name')
merged_df.drop_duplicates(subset='service_id', keep='first', inplace=True)
Expand Down Expand Up @@ -297,6 +300,9 @@ def _stops_agencyid(stops_df=None, trips_df=None, routes_df=None, stop_times_df=
tmp1 = pd.merge(routes_df, agency_df, how='left', on='agency_id', sort=False, copy=False)
tmp2 = pd.merge(trips_df, tmp1, how='left', on='route_id', sort=False, copy=False)
tmp3 = pd.merge(stop_times_df, tmp2, how='left', on='trip_id', sort=False, copy=False)
# do another merge to account for stops that may not be utilized across all
# GTFS files for accounting purposes so we keep those that dont show up
# after merge
merged_df = pd.merge(stops_df[['stop_id']], tmp3, how='left', on='stop_id', sort=False, copy=False)
merged_df['unique_agency_id'] = _generate_unique_agency_id(merged_df, 'agency_name')
merged_df.drop_duplicates(subset='stop_id', keep='first', inplace=True)
Expand Down
1 change: 1 addition & 0 deletions urbanaccess/gtfs/utils_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def _boundingbox_check(df=None, feed_folder=None, lat_min=None, lng_min=None, la
log('Removed identified stops that are outside of bounding box.')
return df_subset
else:
log('No GTFS feed stops were found to be outside the bounding box coordinates')
return df

def _checkcoordinates(df=None, feed_folder=None):
Expand Down
8 changes: 7 additions & 1 deletion urbanaccess/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,13 @@ def _format_pandana_edges_nodes(edge_df, node_df):
# turn mixed dtype cols into all same format
col_list = edge_df_wnumericid.select_dtypes(include=['object']).columns
for col in col_list:
edge_df_wnumericid[col] = edge_df_wnumericid[col].astype(str)
try:
edge_df_wnumericid[col] = edge_df_wnumericid[col].astype(str)
# deal with edge cases where typically the name of a street is not
# in a uniform string encoding such as names with accents
except UnicodeEncodeError:
log('Fixed unicode error in {} column'.format(col))
edge_df_wnumericid[col] = edge_df_wnumericid[col].str.encode('utf-8')

node_df.set_index('id_int',drop=True,inplace=True)
# turn mixed dtype col into all same format
Expand Down
4 changes: 2 additions & 2 deletions urbanaccess/tests/test_gtfs_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def stop_times_interpolated():


def test_interpolator(stop_times, calendar):
df = network._interpolatestoptimes(stop_times, calendar, day='monday')
df = network._interpolate_stop_times(stop_times, calendar, day='monday')

# unique_trip_id should be generated
assert df.loc[1, 'unique_trip_id'] == 'a_citytrains'
Expand Down Expand Up @@ -121,7 +121,7 @@ def test_skip_interpolator(stop_times, calendar):

stop_times['departure_time_sec'] = series

df = network._interpolatestoptimes(stop_times, calendar, day='monday')
df = network._interpolate_stop_times(stop_times, calendar, day='monday')

# everything should be the same,
# with one row dropped for calendar day filter
Expand Down

0 comments on commit 2f37f8e

Please sign in to comment.