-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathload_edgelist_dataverse.py
48 lines (36 loc) · 1.2 KB
/
load_edgelist_dataverse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import csv
from datetime import datetime
import json
import networkx as nx
from networkx.readwrite import json_graph
import sys
epoch = datetime.utcfromtimestamp(0)
def convert_timestamp(ts_str):
t = datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S")
return int((t - epoch).total_seconds()) / (24 * 60 * 60) # sec --> day
# return int((t - epoch).total_seconds()) / (60*60) # sec --> hour
argv = sys.argv
if len(argv) < 3:
print("Usage: python %s [InputCSV] [OutputJSON]")
exit(1)
input_csv = argv[1]
output_json = argv[2]
g = nx.MultiGraph()
with open(input_csv, "r") as rf:
reader = csv.reader(rf, quotechar="'", delimiter=",")
for row in reader:
ts = row[0].replace("\"", "")
src = int(row[1])
dst = int(row[2].replace("\"", ""))
sec = convert_timestamp(ts)
g.add_edge(src, dst, add=sec)
tss = nx.get_edge_attributes(g, "add")
base_step = min(tss.values())
print("Base step: %d" % base_step)
new_tss = {k: v - base_step for k, v in tss.items()}
nx.set_edge_attributes(g, new_tss, "add")
print("Vertices: %d" % g.number_of_nodes())
print("Edges: %d" % g.number_of_edges())
with open(output_json, "w") as wf:
data = json_graph.node_link_data(g)
json.dump(data, wf, indent=2)