-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmerge.py
94 lines (83 loc) · 3.95 KB
/
merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python
import json, argparse, logging, sys
import calendar, iso8601
def convert_enddate_to_ms(ts):
"""Takes ISO 8601 format(string) and converts into epoch time."""
dt = iso8601.parse_date(ts)
dtUtc = (dt - dt.utcoffset()).replace(tzinfo=None)
ms = calendar.timegm(dtUtc.timetuple())*1000 + dtUtc.microsecond/1000.0
return ms
def execute(args):
with open(args.harfile, 'r') as f:
har = json.load(f)
logging.info('HAR file loaded')
if args.file:
with open(args.file, 'r') as f:
tcpTime = json.load(f)
else:
tcpTime = json.load(sys.stdin)
logging.info('tcpdump timing file loaded')
for i in range(len(har['log']['entries'])):
entry = har['log']['entries'][i]
url = entry['request']['url']
timestamp = convert_enddate_to_ms(entry['startedDateTime'])
if url not in tcpTime:
logging.warning('No record found in tcpdump for %s', url)
continue
else:
if len(tcpTime[url]) > 1:
logging.warning("multiple records found for %s, TODO", url)
timings = entry['timings']
timings['dataArrivals'] = []
timings['reset'] = []
for index in range(len(tcpTime[url])):
timedata = tcpTime[url][index]['data']
timings = entry['timings']
# time in tcp timing is computed based on the timestamp when request is sent
# time in HAR is computed based on the timestamp when the url of the object is parsed
# the gap needs to be closed
gap = max(timings['blocked'], 0) + max(timings['dns'], 0)\
+ max(timings['connect'], 0) + max(timings['send'], 0)
shift = tcpTime[url][index]['request']*1000 - (timestamp + gap)
logging.debug("%s has time shift %f", url, shift)
if abs(shift) > args.threshold:
logging.warning('Big time shift %.3f ms in request sent time for %s', shift, url)
#continue
dataTimestamps = []
for d in timedata:
dataTimestamps.append({'timestamp': d + gap + shift})
timings['dataArrivals'] += dataTimestamps
if 'reset' in tcpTime[url][index]:
timings['reset'].append(tcpTime[url][index]['reset'] + gap + shift)
entry['timings'] = timings
har['log']['entries'][i] = entry
if args.output:
with open(args.output, 'w') as outfile:
json.dump(har, outfile, indent=4)
else:
print json.dumps(har, indent=4)
def main():
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,\
description='Merge the output from HTTP analyzer to HAR file')
parser.add_argument('-f', '--file', default=None, help='Read from a tcp timing file in JSON format instead of STDIN')
parser.add_argument('harfile', help='Path to the HAR file')
parser.add_argument('-o', '--output', default=None, help='Output file path instead of STDOUT')
parser.add_argument('-t', '--threshold', type=float, default=10.0, help='The threshold (ms) for matching objects.\
If the difference of timestamps of the same object in HAR and tcp timing is\
beyond the threshold, the record is discarded.')
parser.add_argument('-q', '--quiet', action='store_true', default=False, help='Only print errors')
parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print debug info.')
args = parser.parse_args()
if args.quiet:
level = logging.ERROR
elif args.verbose:
level = logging.DEBUG
else:
level = logging.WARNING
logging.basicConfig(
format = "%(levelname)s:%(message)s",
level = level
)
execute(args)
if __name__ == '__main__':
main()