-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathformat_vtt.py
45 lines (39 loc) · 1.03 KB
/
format_vtt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import webvtt
import json
import time
start_time = time.time()
'''
with open("./COIN/demo_coffee.txt") as f:
data = f.readlines()
data = data[0].split(",")
print(data)
'''
with open("./COIN/idd_to_vtt.json") as f:
content = json.load(f)
data = list(content.keys())
print(len(data))
with open("./coin_ids.txt", "w") as outfile:
for k in data:
outfile.write(k + ",")
f = open("./COIN/idd_to_vtt.json")
linker = json.load(f)
f.close()
output = {}
valid = []
for id in data:
try:
start, end, text = [], [], []
for caption in webvtt.read(linker[id]):
start.append(caption.start)
end.append(caption.end)
text.append(caption.text)
output[id] = {"start": start, "end": end, "text": text}
#print(linker[id])
valid.append(id)
except:
print("ERROR!", id)
##print(len(output))
#print("VALID", valid)
with open("./coin_formatted_all.json", "w") as outfile:
json.dump(output, outfile)
print(time.time() - start_time, "seconds")