-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_formatter.py
executable file
·76 lines (67 loc) · 2.03 KB
/
data_formatter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
import sys
import glob
from tqdm import tqdm
# Specify the paths to the raw data from a single day.
PATH = "../data/complete_data/raw_data"
from tempfile import TemporaryFile
def list_to_df(temp):
lat = []
lon = []
a_x = []
a_y = []
v = []
frame = []
for i in range(
7, len(temp) - 6, 6
): # start range from 3 for sample, and 7 for complete. Check this to make it consistent with the format of raw data
lon.append(temp[i + 2]) # check lat
lat.append(temp[i + 1]) # check lon
v.append(temp[i + 3])
a_x.append(temp[i + 4])
a_y.append(temp[i + 5])
frame.append(temp[i + 6])
df_meta = pd.DataFrame(
{"id": [temp[0]], "type": [temp[1]], "dist": [temp[6]], "avg_speed": [temp[7]]}
)
df_trajectory = pd.DataFrame(
{
"id": temp[0],
"frame": frame,
"lon": lon,
"lat": lat,
"v": v,
"a_x": a_x,
"a_y": a_y,
}
)
return df_meta, df_trajectory
def write_list(file):
with open(file) as f, TemporaryFile("w+") as t:
next(f)
list_meta = []
list_trajectory = []
for line in tqdm(f):
h, ln = line, len(line.split("; "))
temp = h.strip().split("; ")
m, t = list_to_df(temp)
list_meta.append(m)
list_trajectory.append(t)
return list_meta, list_trajectory
if __name__ == "__main__":
paths = glob.glob(PATH + "/*.csv")
print(paths)
for file in paths:
print(file)
filename = file.split("_")[-2]
# filenames will be the data collection hours e.g. 0900-0930
print(filename)
list_meta, list_trajectory = write_list(file)
# save meta file
pd.concat(list_meta).to_csv(
"../data/long/meta_" + filename + ".csv", index=None
)
# save trajectory data
pd.concat(list_trajectory).to_csv(
"../data/long/" + filename + ".csv", index=None
)