-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEventParser.py
185 lines (155 loc) · 7.38 KB
/
EventParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/python
# Types of lines:
# Variable setting: Python identifier followed by =, number or quoted string, optional datetime follows
# Date mark: dateteime, optional text on line alone
# Events: unquoted string, followed by : optional datetime, optional - and datetime
# Indentation before event is important.
# Notes: Line of text which does not match any of the above
# ignore after #
import sys
import re
import dateutil.parser as du
import datetime
import pytz
def parse( filename ):
initial_split = re.compile(r'(\s+|[\-:=])')
empty_line = re.compile(r'^\s*$')
identifier = re.compile(r'^[^\d\W]\w*\Z$')
tz = pytz.timezone('US/Eastern')
last_time = datetime.datetime.today()
last_time = last_time.replace(hour=0,minute=0,second=0,microsecond=0)
last_time = tz.localize(last_time)
print last_time
with open(filename,"r") as log:
for line in log:
line_before_comment = line.rstrip().split('#')[0]
if empty_line.match(line_before_comment):
continue
components = initial_split.split(line_before_comment)
space_condensed_components = [0]
for comp in components:
if re.match(r'^\s*$',comp):
if len(space_condensed_components) == 1:
for char in comp:
if char == ' ':
space_condensed_components[-1] += 1
elif char == '\t':
space_condensed_components[-1] += 4
else:
space_condensed_components.append(' ')
else:
space_condensed_components.append(comp)
# line = original line
# components = line split on space, -, : and =
# space_condensed = [0] is number of leading spaces, followed by components, multiple spaces condensed to one
print line.strip()
#print space_condensed_components
# Try to pull out times
dt,space_condensed_components = extract_time( space_condensed_components, last_time, tz )
if space_condensed_components and space_condensed_components[-1] == '-':
# Its a range, we just found the end time
# now get the start time
end_time = dt
dt,space_condensed_components = extract_time( space_condensed_components[:-1], last_time, tz )
if dt == last_time:
pass
# No time found
else:
last_time = dt
print "***Time:",last_time.isoformat()
if len(space_condensed_components) > 1:
if identifier.match(space_condensed_components[1]) and space_condensed_components[2] == '=':
try:
if space_condensed_components[1] == 'TZ':
tz = pytz.timezone(space_condensed_components[3])
last_time = last_time.replace(tzinfo=tz)
else:
process_variable( space_condensed_components[1], space_condensed_components[3], last_time )
except:
print space_condensed_components
# Parse 3rd component into int, float or string
elif ':' in space_condensed_components:
# Its an event
event_name = "".join(space_condensed_components[1:]).rstrip()[:-1]
indent_level = space_condensed_components[0]
#print "indent is",indent_level,"stack is",event_stack
process_event( event_name, indent_level, last_time, end_time )
# Need to clear event stack at end of file
else:
#print components
print space_condensed_components
end_time = None
events = process_event("End Of File",0,last_time,None)
vars = process_variable("",0,last_time)
return (events[0,-1], vars)
def dump_results(events,vars):
for v in vars:
print v[0],'=',v[1],'from',v[2],'to',v[3]
for e in events:
print events.index(e),":",e[0],'from',e[1],'to',e[2],'parent',e[3]
def extract_time( space_condensed_components , default, tz ):
# Try to extract a time off the end of string
dt = default
num_components_used = 0
for i in range(1,len(space_condensed_components)):
test_string = "".join( space_condensed_components[-i:] )
if space_condensed_components[-i-1] != '=':
# We aren't working with the last component of an assignment
if space_condensed_components[-i] == '-':
# We are dealing with a time range and have just found the end time
break
try:
dt = du.parse("".join( space_condensed_components[-i:] ), default=default )
if dt.tzinfo is None:
#print "localizing",dt,"to",
dt = tz.localize(dt) # If time doesn't include a timezone, then localize to preset tz
#print dt
num_components_used = i
except ValueError:
# No time found with this combo
pass
if num_components_used > 0:
space_condensed_components = space_condensed_components[:-num_components_used]
return (dt,space_condensed_components)
event_stack=[]
events_complete=[]
def process_event( name, indent_level, start_time, end_time ):
while event_stack and indent_level <= event_stack[-1][1]:
# While there are events on the stack, and they are more indented, remove, they are ended
last_event = event_stack.pop()
if last_event[3] is None:
last_event[3] = start_time
events_complete[last_event[4]][2] = start_time
print "***End Event:",last_event[0],"from",last_event[2],"to",last_event[3]
# Handle event by setting start and end times
# Now either the stack is empty, or the head of the stack is the parent of this event
if event_stack:
parent = event_stack[-1][4]
else:
parent = -1
events_complete.append([name,start_time,end_time,parent])
index = len(events_complete)-1
event_stack.append([name,indent_level,start_time,end_time,index])
print "***Start Event:",name
return events_complete
variable_ranges = []
variable_last_setting = dict()
def process_variable( name, value, time ):
# Create new variable seetting and close out old one
if name == "":
# Close all open Variables
for v in variable_last_setting.keys():
variable_ranges[variable_last_setting[v]][3] = time
return variable_ranges
if name in variable_last_setting:
variable_ranges[variable_last_setting[name]][3] = time
print "***Set",name,'to',value
variable_ranges.append([name,value,time,None])
variable_last_setting[name] = len(variable_ranges)-1
return variable_ranges
if __name__ == "__main__":
if len(sys.argv) > 1:
es,vs = parse(sys.argv[1])
dump_results(es,vs)
else:
print "Please supply filename of log to parse"