-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathutils.py
132 lines (112 loc) · 4.18 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
import pickle
from tqdm import tqdm
import torch
from torch import nn
import pandas as pd
import geopandas as gpd
import random
import copy
import matplotlib.pyplot as plt
import numpy as np
from scipy import spatial
from sklearn.cluster import KMeans
import time
from my_constants import *
from collections import defaultdict
import networkx as nx
edge_df = gpd.read_file(EDGE_DATA)
node_df = gpd.read_file(NODE_DATA)
a = node_df['osmid'].to_numpy()
b = node_df[['y', 'x']].to_numpy()
map_node_osm_to_coords = {e:(u,v) for e,(u,v) in zip(a,b)}
del a
del b
map_edge_id_to_u_v = edge_df[['u', 'v']].to_numpy()
map_u_v_to_edge_id = {(u,v):i for i,(u,v) in enumerate(map_edge_id_to_u_v)}
unique_edge_labels = list(map_u_v_to_edge_id.values()) # these are from OSM map data, not train data
def condense_edges(edge_route):
global map_edge_id_to_u_v, map_u_v_to_edge_id
route = [map_u_v_to_edge_id[tuple(map_edge_id_to_u_v[e])] for e in edge_route]
return route
def fetch_map_fid_to_zero_indexed(data):
s = set()
for _,t,_ in data:
s.update(set(t))
return {el:i for i,el in enumerate(s)}
def relabel_trips(data, mapping):
return [(idx, [mapping[e] for e in trip], timestamps) for (idx, trip, timestamps) in data]
def remove_loops(path):
reduced = []
last_occ = {p:-1 for p in path}
for i in range(len(path)-1,-1,-1):
if last_occ[path[i]] == -1:
last_occ[path[i]] = i
current = 0
while(current < len(path)):
reduced.append(path[current])
current = last_occ[path[current]] + 1
return reduced
def nbrs_sanity_check(node_nbrs, data):
print("SANITY CHECK 1")
for _, t, _ in tqdm(data, dynamic_ncols=True):
for i in range(len(t)-1):
assert t[i+1] in node_nbrs[t[i]], "How did this happen?"
print('Cleared :)')
def create_node_nbrs(forward):
start_nodes = defaultdict(set)
for e in forward:
u,v = map_edge_id_to_u_v[e]
start_nodes[u].add(forward[e])
node_nbrs = {} # here nodes are actually edges of the road network
for e in forward:
_,v = map_edge_id_to_u_v[e]
node_nbrs[forward[e]] = list(start_nodes[v])
return node_nbrs
def load_data(args, less=False, sample=1000, fname=TRAIN_TRIP_DATA_PICKLED_WITH_TIMESTAMPS):
print("Loading map matched trajectories")
f = open(fname, "rb")
data = pickle.load(f)
f.close()
if less:
data = data[:sample]
data = [(idx, condense_edges(t), timestamps) for (idx, t, timestamps) in tqdm(data, dynamic_ncols=True)]
if args.remove_loops or args.remove_loops_from_train:
data = [(idx, remove_loops(t),timestamps) for (idx,t,timestamps) in tqdm(data, dynamic_ncols=True)]
data = [(idx,t,timestamps) for (idx,t,timestamps) in tqdm(data, dynamic_ncols=True) if len(t) >= 5] # ignoring very small trips
forward = fetch_map_fid_to_zero_indexed(data)
data = relabel_trips(data, forward)
return data, forward
def load_test_data(args, forward, less=False, sample = 1000, fname = TEST_TRIP_DATA_PICKLED_WITH_TIMESTAMPS):
print('Loading test/val data')
f = open(fname, "rb")
data = pickle.load(f)
f.close()
if less:
data = data[:sample]
data = [(idx, condense_edges(t), timestamps) for (idx, t, timestamps) in tqdm(data, dynamic_ncols=True)]
if args.remove_loops:
data = [(idx, remove_loops(t),timestamps) for (idx,t,timestamps) in tqdm(data, dynamic_ncols=True)]
data = [(idx,t,timestamps) for (idx,t,timestamps) in tqdm(data, dynamic_ncols=True) if len(t) >= 5] # ignoring very small trips
orig_num = len(data)
print('Number of trips in test data (initially): {}'.format(orig_num))
unseen_data = [trip_tup for trip_tup in data if not set(trip_tup[1]).issubset(forward)]
for (_, trip, _) in unseen_data:
for e in trip:
if e not in forward:
forward[e] = len(forward)
print('Number of trips with unseen nodes: {}'.format(len(unseen_data)))
print('Keeping these trips!')
print('Relabelling trips with updated forward map')
data = relabel_trips(data, forward)
return data
def single_source_shortest_path_length_range(graph, node_range, cutoff):
dists_dict = {}
for node in node_range:
dists_dict[node] = nx.single_source_dijkstra_path_length(graph, node, cutoff=cutoff, weight='haversine')
return dists_dict
def merge_dicts(dicts):
result = {}
for dictionary in dicts:
result.update(dictionary)
return result