-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinstrument.py
1530 lines (1268 loc) · 77 KB
/
instrument.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Module for performing instrumentation of the service based on the contents of the verification_config.py file.
"""
import sys
import traceback
import ast
import marshal
import pickle
import hashlib
import requests
import argparse
import os
import json
import base64
import datetime
# for now, we remove the final occurrence of VyPR from the first path to look in for modules
rindex = sys.path[0].rfind("/VyPR")
sys.path[0] = sys.path[0][:rindex] + sys.path[0][rindex + len("/VyPR"):]
# get the formula building functions before we evaluate the configuration code
from VyPR.QueryBuilding import *
from VyPR.SCFG.construction import *
VERDICT_SERVER_URL = None
VERBOSE = False
EXPLANATION = False
DRAW_GRAPHS = False
BYTECODE_EXTENSION = ".pyc"
vypr_module = "."
VERIFICATION_INSTRUCTION = "verification.send_event"
LOGS_TO_STDOUT = False
"""
Specification compilation.
"""
def get_function_asts_in_module(module_ast):
"""
Given a Module AST object, traverse it and find all the functions.
:param module_ast:
:return: List of function names.
"""
all_function_names = []
# map elements of ast to pairs - left hand side is module path, right hand side is ast object
stack = list(map(lambda item : ("", item), module_ast.body))
while len(stack) > 0:
top = stack.pop()
module_path = top[0]
ast_obj = top[1]
if type(ast_obj) is ast.FunctionDef:
all_function_names.append(("%s%s" % (top[0], top[1].name), top[1]))
elif hasattr(ast_obj, "body"):
if type(ast_obj) is ast.If:
stack += map(
lambda item : (module_path, item),
ast_obj.body
)
elif type(ast_obj) is ast.ClassDef:
stack += map(
lambda item: ("%s%s%s:" %
(module_path, "." if (module_path != "" and module_path[-1] != ":") else "",
ast_obj.name), item),
ast_obj.body
)
return all_function_names
def compile_queries(specification_file):
"""
Given a specification file, complete the syntax and add imports, then inspect the objects
used as keys to build the final dictionary structure.
:param specification_file:
:return: fully-expanded dictionary structure
"""
logger.log("Compiling queries...")
# load in verification config file
# to do this, we read in the existing one, write a temporary one with imports added and import that one
# this is to allow users to write specifications without caring about importing anything from QueryBuilding
# when we read in the original specification code, we add verification_conf = {} to turn it into valid specification
# syntax
specification_code = "verification_conf = %s" % open(specification_file, "r").read()
# replace empty lists with a fake property
fake_property = "[Forall(q = changes('fake_vypr_var')).Check(lambda q : q('fake_vypr_var').equals(True))]"
specification_code = specification_code.replace("[]", fake_property)
with_imports = "from VyPR.QueryBuilding import *\n%s" % specification_code
with open("VyPR_queries_with_imports.py", "w") as h:
h.write(with_imports)
# we now import the specification file
from VyPR_queries_with_imports import verification_conf
# this hierarchy will be populated as functions are found in the project that satisfy requirements
final_hierarchy = {}
# finally, we process the keys of the verification_conf dictionary
# these keys are objects representing searches we should perform to generate the final expanded specification
# with fully-qualified module names
# we have this initial step to enable developers to write more sophisticated selection criteria the functions
# to which they want to apply a query
# we go through each function in the project we're instrumenting, check if there's a key in the initial
# configuration file whose criteria are fulfilled by the function and, if so, add to the hierarchy
for (root, directories, files) in os.walk("."):
for file in files:
# read in the file, traverse its AST structure to find all the functions and then determine
# whether it satisfies a function selector
# only consider Python files
filename = os.path.join(root, file)
module_name = "%s%s" % (root[1:].replace("/", ""), file.replace(".py", ""))
if (filename[-3:] == ".py"
and "VyPR" not in filename
and "venv" not in filename):
# traverse the AST structure
code = open(filename, "r").read()
module_asts = ast.parse(code)
function_asts = get_function_asts_in_module(module_asts)
# process each function
for (function_name, function_ast) in function_asts:
# construct the SCFG
scfg = CFG()
scfg.process_block(function_ast.body)
# process each function selector
for function_selector in verification_conf:
if type(function_selector) is Functions:
if function_selector.is_satisfied_by(function_ast, scfg):
# add to the final hierarchy
if not(final_hierarchy.get(module_name)):
final_hierarchy[module_name] = {}
if not(final_hierarchy[module_name].get(function_name)):
final_hierarchy[module_name][function_name] = verification_conf[function_selector]
else:
final_hierarchy[module_name][function_name] += verification_conf[function_selector]
elif type(function_selector) is Module:
if (module_name == function_selector._module_name
and function_name == function_selector._function_name):
# add to the final hierarchy
if not (final_hierarchy.get(module_name)):
final_hierarchy[module_name] = {}
if not (final_hierarchy[module_name].get(function_name)):
final_hierarchy[module_name][function_name] = verification_conf[function_selector]
else:
final_hierarchy[module_name][function_name] += verification_conf[function_selector]
print(final_hierarchy)
return final_hierarchy
class InstrumentationLog(object):
"""
Class to handle instrumentation logging.
"""
def __init__(self, logs_to_stdout):
self.logs_to_stdout = logs_to_stdout
# check for log directory - create it if it doesn't exist
if not (os.path.isdir("instrumentation_logs")):
os.mkdir("instrumentation_logs")
self.log_file_name = "instrumentation_logs/%s" \
% str(datetime.datetime.utcnow()). \
replace(" ", "_").replace(":", "_").replace(".", "_").replace("-", "_")
self.handle = None
def start_logging(self):
# open the log file in append mode
self.handle = open(self.log_file_name, "a")
def end_logging(self):
self.handle.close()
def log(self, message):
if self.handle:
message = "[VyPR instrumentation - %s] %s" % (str(datetime.datetime.utcnow()), message)
self.handle.write("%s\n" % message)
# flush the contents of the file to disk - this way we get a log even with an unhandled exception
self.handle.flush()
if self.logs_to_stdout:
print(message)
"""
Bytecode writing functions - these depend on the Python version.
"""
def compile_bytecode_and_write(asts, file_name_without_extension):
"""Compile ASTs to bytecode then write to file. The method we use depends on the Python version."""
backup_file_name = "%s.py.inst" % file_name_without_extension
instrumented_code = compile(asts, backup_file_name, "exec")
# append an underscore to indicate that it's instrumented - removed for now
instrumented_file_name = "%s%s" % (file_name_without_extension, BYTECODE_EXTENSION)
logger.log("Writing instrumented bytecode to %s." % instrumented_file_name)
import struct
with open(instrumented_file_name, "wb") as h:
if sys.version_info[0] < 3:
import py_compile
import time
h.write(py_compile.MAGIC)
py_compile.wr_long(h, long(time.time()))
else:
import importlib
mtime = int(os.stat("%s.py" % file_name_without_extension).st_mtime)
preamble = struct.pack('<4sll', importlib.util.MAGIC_NUMBER, len(instrumented_code.co_code), mtime)
h.write(preamble)
# the write operation is the same regardless of Python version
marshal.dump(instrumented_code, h)
# rename the original file so it doesn't overwrite bytecode at runtime with recompilation
logger.log("Renaming original file to .py.inst suffix")
os.rename("%s.py" % file_name_without_extension, "%s.py.inst" % file_name_without_extension)
"""
End of bytecode writing functions.
"""
def scfg_to_tree(root):
"""
Given a root vertex, compute the set of vertices and edges reachable from that
vertex in the scfg.
"""
reachable_set = []
traversal_stack = [root]
traversed_edges = []
while len(traversal_stack) > 0:
top_vertex = traversal_stack.pop()
for edge in top_vertex.edges:
if not (edge in traversed_edges):
reachable_set.append(edge)
reachable_set.append(edge._target_state)
traversed_edges.append(edge)
traversal_stack.append(edge._target_state)
return reachable_set
def construct_reachability_map(scfg):
"""
For each vertex in the scfg given, do a depth-first search
"""
vertex_to_reachable_set = {}
for vertex in scfg.vertices:
reachable_set = scfg_to_tree(vertex)
vertex_to_reachable_set[vertex] = reachable_set
return vertex_to_reachable_set
def compute_binding_space(quantifier_sequence, scfg, reachability_map, current_binding=[]):
"""
Given a sequence of quantifiers over symbolic states/edges in the given scfg, compute the space of bindings that
can be given to the formula to which this quantifier sequence is applied. The current_binding given may be
partial, but represents the current point in the space of bindings upto which we have traversed.
"""
if len(current_binding) == 0:
# we've just started - compute the static qd for the first quantifier,
# then iterate over it and recurse into the list of quantifiers for each element
if type(list(quantifier_sequence._bind_variables)[0]) is StaticState:
if list(quantifier_sequence._bind_variables)[0]._name_changed:
# a name was given as selection criteria
variable_changed = list(quantifier_sequence._bind_variables)[0]._name_changed
qd = list(filter(lambda symbolic_state: symbolic_state._name_changed == variable_changed \
or variable_changed in symbolic_state._name_changed,
scfg.vertices))
# we also check loop variables
# when instruments are placed, if a loop vertex is processed instrumentation will change accordingly
for vertex in scfg.vertices:
if vertex._name_changed == ["loop"]:
if (type(vertex._structure_obj.target) is ast.Name and
vertex._structure_obj.target.id == variable_changed):
# the variable we're looking for was found as a simple loop variable
qd.append(vertex)
elif (type(vertex._structure_obj.target) is ast.Tuple and
variable_changed in list(map(lambda item: item.id, vertex._structure_obj.target))):
# the loop variable we're looking for was found inside a tuple
qd.append(vertex)
else:
qd = []
# a list of coordinates were given
if type(list(quantifier_sequence._bind_variables)[0]._instruction_coordinates) is list:
coordinates = list(quantifier_sequence._bind_variables)[0]._instruction_coordinates
else:
coordinates = [list(quantifier_sequence._bind_variables)[0]._instruction_coordinates]
for coordinate in coordinates:
# get all vertices whose previous edges have statements with matching lineno values,
# sort the col_offset values in ascending order, then get the vertex at the index
# specified by the coordinate
if type(coordinate) is int:
line_number = coordinate
offset = 0
else:
line_number = coordinate[0]
if len(coordinate) == 1:
offset = 0
else:
offset = coordinate[1]
relevant_vertices = list(filter(
lambda symbolic_state: not (symbolic_state._previous_edge is None) \
and not (type(symbolic_state._previous_edge._instruction) is str) \
and symbolic_state._previous_edge._instruction.lineno == line_number,
scfg.vertices
))
relevant_vertices.sort(key=lambda vertex: vertex._previous_edge._instruction.col_offset)
relevant_vertex = relevant_vertices[offset]
qd.append(relevant_vertex)
elif type(list(quantifier_sequence._bind_variables)[0]) is StaticTransition:
variable_operated_on = list(quantifier_sequence._bind_variables)[0]._operates_on
relevant_target_vertices = list(filter(
lambda symbolic_state: symbolic_state._name_changed == variable_operated_on \
or variable_operated_on in symbolic_state._name_changed,
scfg.vertices
))
qd = list(map(lambda symbolic_state: symbolic_state._previous_edge, relevant_target_vertices))
binding_space = []
# recurse with (possibly partial) bindings
for element in qd:
binding_space += compute_binding_space(quantifier_sequence, scfg, reachability_map, [element])
logger.log("Computed whole binding space")
logger.log(binding_space)
return binding_space
elif len(current_binding) < len(list(quantifier_sequence._bind_variables)):
# we have a partial binding
# find the next quantifier
next_index = len(current_binding)
next_quantifier = list(quantifier_sequence._bind_variables)[next_index]
# find the position in the quantifier sequence on which the next quantifier depends
index_in_quantifier_sequence = list(quantifier_sequence._bind_variables).index(
next_quantifier._required_binding)
# get the current value at that position in the binding we have
current_binding_value = current_binding[index_in_quantifier_sequence]
# use the type of the qd we need to traverse the scfg from this point
if type(next_quantifier) is StaticState:
logger.log("Computing unbounded future states according to %s" % next_quantifier)
# TODO: implement state-based unbounded quantification, hasn't been needed yet
elif type(next_quantifier) is StaticTransition:
logger.log("Computing unbounded future transitions according to %s using binding %s" % (
next_quantifier, current_binding))
# only works for vertex inputs this has to cater for edges that are both assignments and function calls (
# assignments of function call return values)
qd = list(filter(lambda edge: hasattr(edge, "_operates_on") and \
(edge._operates_on == next_quantifier._operates_on or \
next_quantifier._operates_on in edge._operates_on),
reachability_map[current_binding_value]))
# compute the bindings from this new qd
binding_space = []
for element in qd:
binding_space += compute_binding_space(quantifier_sequence, scfg, reachability_map,
current_binding + [element])
return binding_space
else:
# we now have a complete binding - return it
return [current_binding]
def get_qualifier_subsequence(function_qualifier):
"""
Given a fully qualified function name, iterate over it and find the file
in which the function is defined (this is the entry in the qualifier chain
before the one that causes an import error)/
"""
# tokenise the qualifier string so we have names and symbols
# the symbol used to separate two names tells us what the relationship is
# a/b means a is a directory and b is contained within it
# a.b means b is a member of a, so a is either a module or a class
tokens = []
last_position = 0
for (n, character) in enumerate(list(function_qualifier)):
if character in [".", "/"]:
tokens.append(function_qualifier[last_position:n])
tokens.append(function_qualifier[n])
last_position = n + 1
elif n == len(function_qualifier) - 1:
tokens.append(function_qualifier[last_position:])
return tokens
def get_file_from_qualifier(qualifier_chain):
"""
Given a qualifier chain that points to a function/method, find the file name in which the function/module is found.
"""
# for now, just modify the string given and use that as the filename
# the importlib method was accidentally starting the service's monitoring thread
logger.log("Getting file from qualifier %s" % qualifier_chain)
# get the subsequence of the qualifier chain that can be read in as a file
# the remainder of the qualifier chain will be pointing to something in that file
index_of_first_dot = qualifier_chain.index(".")
filename = "".join(qualifier_chain[0:index_of_first_dot]) + ".py"
return filename
def get_function_from_qualifier(qualifier_chain):
index_of_first_dot = qualifier_chain.index(".")
function_qualifier = qualifier_chain[index_of_first_dot + 1:]
return function_qualifier
def write_scfg_to_file(scfg, file_name):
"""
Given an scfg and a file name, write the scfg in dot format to the file.
"""
if DRAW_GRAPHS:
from graphviz import Digraph
graph = Digraph()
graph.attr("graph", splines="true", fontsize="10")
shape = "rectangle"
for vertex in scfg.vertices:
graph.node(str(id(vertex)), str(vertex._name_changed), shape=shape)
for edge in vertex.edges:
graph.edge(
str(id(vertex)),
str(id(edge._target_state)),
"%s - %s - path length = %s" % \
(str(edge._operates_on),
edge._condition,
str(edge._target_state._path_length))
)
graph.render(file_name)
logger.log("Writing SCFG to file '%s'." % file_name)
def post_to_verdict_server(url, data):
"""
Given a url (extension of the base URL set by configuration) and some data, send a post request to the verdict server.
"""
global VERDICT_SERVER_URL
response = requests.post(os.path.join(VERDICT_SERVER_URL, url), data).text
return response
def is_verdict_server_reachable():
"""Try to query the index page of the verdict server."""
global VERDICT_SERVER_URL
try:
requests.get(VERDICT_SERVER_URL)
return True
except:
return False
def read_configuration(file):
"""
Read in 'file', parse into an object and return.
"""
with open(file, "r") as h:
contents = h.read()
return json.loads(contents)
def get_instrumentation_points_from_comp_sequence(value_from_binding, moves):
"""
Given a starting point (value_from_binding) and a set of moves,
traverse the SCFG and determine the set of instrumentation points.
"""
# iterate through the moves we have to make,
# using the type of the operator used in the move to compute the points we have to instrument
# we set the default set to include just the current binding
# for the case where no transformation happens
instrumentation_points = [value_from_binding]
# currently only works for a single move
# for multiple moves we need to apply the transformation to each "previous" instrumentation point
# TODO: this code needs to be changed to support nesting.
for move in moves:
if type(move) is NextStaticTransition:
calls = []
if type(value_from_binding) is CFGVertex:
scfg.next_calls(value_from_binding, move._operates_on, calls, marked_vertices=[])
elif type(value_from_binding) is CFGEdge:
scfg.next_calls(value_from_binding._target_state, move._operates_on, calls, marked_vertices=[])
instrumentation_points = calls
elif type(move) in [SourceStaticState, DestinationStaticState]:
# we don't need to do anything with these yet
pass
return instrumentation_points
def instrument_point_state(state, name, point, binding_space_indices,
atom_index, atom_sub_index, instrumentation_point_db_ids,
measure_attribute=None):
"""
state is the PyCFTL object, and point is the part of the SCFG found by traversal.
"""
global VERIFICATION_INSTRUCTION
logger.log("Instrumenting point %s" % point)
if measure_attribute == "length":
state_variable_alias = name.replace(".", "_").replace("(", "__").replace(")", "__")
state_recording_instrument = "record_state_%s = len(%s); " % (state_variable_alias, name)
elif measure_attribute == "type":
state_variable_alias = name.replace(".", "_").replace("(", "__").replace(")", "__")
state_recording_instrument = "record_state_%s = type(%s).__name__; " % (state_variable_alias, name)
elif measure_attribute == "time_attained":
state_variable_alias = "time_attained_%i" % atom_sub_index
state_recording_instrument = "record_state_%s = vypr.get_time(); " % state_variable_alias
# the only purpose here is to match what is expected in the monitoring algorithm
name = "time"
else:
state_variable_alias = name.replace(".", "_").replace("(", "__").replace(")", "__")
state_recording_instrument = "record_state_%s = %s; " % (state_variable_alias, name)
# note that observed_value is used three times:
# 1) to capture the time attained by the state for checking of a property - this is duplicated
# because we have the start and end time of the state, which is the same because states are instantaneous.
# 3) to capture the time at which an observation was received - it makes sense that these times would
# be the same.
instrument_tuple = ("'{formula_hash}', 'instrument', '{function_qualifier}', {binding_space_index}, "
"{atom_index}, {atom_sub_index}, {instrumentation_point_db_id}, {observed_value}, "
"{observed_value}, {{ '{atom_program_variable}' : {observed_value} }}, __thread_id") \
.format(
formula_hash=formula_hash,
function_qualifier=instrument_function_qualifier,
binding_space_index=binding_space_indices,
atom_index=atom_index,
atom_sub_index=atom_sub_index,
instrumentation_point_db_id=instrumentation_point_db_ids,
atom_program_variable=name,
observed_value=("record_state_%s" % state_variable_alias)
)
state_recording_instrument += "%s((%s))" % (VERIFICATION_INSTRUCTION, instrument_tuple)
record_state_ast = ast.parse(state_recording_instrument).body[0]
queue_ast = ast.parse(state_recording_instrument).body[1]
if type(state) is SourceStaticState or type(state) is DestinationStaticState:
# if the state we're measuring a property of is derived from a source/destination operator,
# then the instrumentation point we're given is an SCFG edge which contains
# an instruction for us to place a state recording instrument before
logger.log("Adding state recording instrument for source or target.")
parent_block = point._instruction._parent_body
record_state_ast.lineno = point._instruction.lineno
record_state_ast.col_offset = point._instruction.col_offset
queue_ast.lineno = point._instruction.lineno
queue_ast.col_offset = point._instruction.col_offset
index_in_block = parent_block.index(point._instruction)
if type(state) is SourceStaticState:
# for source state recording, we record the state, but only insert its value after
# this is so triggers can be inserted before normal instruments without introducing
# a special case for trigger insertion
parent_block.insert(index_in_block, queue_ast)
parent_block.insert(index_in_block, record_state_ast)
elif type(state) is DestinationStaticState:
parent_block.insert(index_in_block + 1, queue_ast)
parent_block.insert(index_in_block + 1, record_state_ast)
else:
if point._name_changed == ["loop"]:
# we're instrumenting the change of a loop variable
logger.log("Performing instrumentation for loop variable.")
# determine the edge leading into the loop body
for edge in point.edges:
if edge._condition == ["enter-loop"]:
# place an instrument before the instruction on this edge
parent_block = edge._instruction._parent_body
record_state_ast.lineno = edge._instruction.lineno
record_state_ast.col_offset = edge._instruction.col_offset
queue_ast.lineno = edge._instruction.lineno
queue_ast.col_offset = edge._instruction.col_offset
index_in_block = parent_block.index(edge._instruction)
# insert instruments in reverse order
parent_block.insert(index_in_block, queue_ast)
parent_block.insert(index_in_block, record_state_ast)
else:
# we're instrumenting a normal vertex where there is an explicit instruction
logger.log("Not source or destination state - performing normal instrumentation.")
incident_edge = point._previous_edge
parent_block = incident_edge._instruction._parent_body
record_state_ast.lineno = incident_edge._instruction.lineno
record_state_ast.col_offset = incident_edge._instruction.col_offset
queue_ast.lineno = incident_edge._instruction.lineno
queue_ast.col_offset = incident_edge._instruction.col_offset
index_in_block = parent_block.index(incident_edge._instruction)
# insert instruments in reverse order
parent_block.insert(index_in_block + 1, queue_ast)
parent_block.insert(index_in_block + 1, record_state_ast)
def instrument_point_transition(atom, point, binding_space_indices, atom_index,
atom_sub_index, instrumentation_point_db_ids):
composition_sequence = derive_composition_sequence(atom)
# if the composition sequence was derived from a mixed atom,
# we use the atom_sub_index we're given to decide which sequence to take
if type(composition_sequence) is dict:
if atom_sub_index == 0:
composition_sequence = composition_sequence["lhs"]
else:
composition_sequence = composition_sequence["rhs"]
composition_sequence = list(reversed(composition_sequence))[:-1]
if hasattr(composition_sequence[-1], "_record") and composition_sequence[-1]._record:
states = []
for var in composition_sequence[-1]._record:
states.append("'%s' : %s" % (var, var))
state_string = ", ".join(states)
state_dict = "{%s}" % state_string
else:
state_dict = "{}"
timer_start_statement = "__timer_s = vypr.get_time()"
timer_end_statement = "__timer_e = vypr.get_time()"
time_difference_statement = "__duration = __timer_e - __timer_s; "
instrument_tuple = ("'{formula_hash}', 'instrument', '{function_qualifier}', {binding_space_index}," +
"{atom_index}, {atom_sub_index}, {instrumentation_point_db_id}, {obs_time}, {obs_end_time}, "
"{observed_value}, __thread_id, {state_dict}") \
.format(
formula_hash=formula_hash,
function_qualifier=instrument_function_qualifier,
binding_space_index=binding_space_indices,
atom_index=atom_index,
atom_sub_index=atom_sub_index,
instrumentation_point_db_id=instrumentation_point_db_ids,
obs_time="__timer_s",
obs_end_time="__timer_e",
observed_value="__duration",
state_dict=state_dict
)
time_difference_statement += "%s((%s))" % (VERIFICATION_INSTRUCTION, instrument_tuple)
start_ast = ast.parse(timer_start_statement).body[0]
end_ast = ast.parse(timer_end_statement).body[0]
difference_ast = ast.parse(time_difference_statement).body[0]
queue_ast = ast.parse(time_difference_statement).body[1]
start_ast.lineno = point._instruction.lineno
start_ast.col_offset = point._instruction.col_offset
end_ast.lineno = point._instruction.lineno
end_ast.col_offset = point._instruction.col_offset
difference_ast.lineno = point._instruction.lineno
difference_ast.col_offset = point._instruction.col_offset
queue_ast.lineno = point._instruction.lineno
queue_ast.col_offset = point._instruction.col_offset
index_in_block = point._instruction._parent_body.index(point._instruction)
# insert instruments in reverse order
point._instruction._parent_body.insert(index_in_block + 1, queue_ast)
point._instruction._parent_body.insert(index_in_block + 1, difference_ast)
point._instruction._parent_body.insert(index_in_block + 1, end_ast)
point._instruction._parent_body.insert(index_in_block, start_ast)
def place_path_recording_instruments(scfg):
# insert path recording instruments - these don't depend on the formula being checked so
# this is done independent of binding space computation
for vertex_information in scfg.branch_initial_statements:
logger.log("-" * 100)
if vertex_information[0] in ['conditional', 'try-catch']:
if vertex_information[0] == 'conditional':
logger.log(
"Placing branch recording instrument for conditional with first instruction %s in "
"block" %
vertex_information[1])
# instrument_code = "logger.log(\"appending path condition %s inside conditional\")"
# % vertex_information[2] send branching condition to verdict server, take the ID
# from the response and use it in the path recording instruments.
condition_dict = {
"serialised_condition": vertex_information[2]
}
else:
logger.log(
"Placing branch recording instrument for try-catch with first instruction %s in "
"block" %
vertex_information[1])
# send branching condition to verdict server, take the ID from the response and use
# it in the path recording instruments.
condition_dict = {
"serialised_condition": vertex_information[2]
}
# if the condition already exists in the database, the verdict server will return the
# existing ID
try:
branching_condition_id = int(post_to_verdict_server("store_branching_condition/",
data=json.dumps(condition_dict)))
except:
traceback.print_exc()
logger.log(
"There was a problem with the verdict server at '%s'. Instrumentation cannot be "
"completed." % VERDICT_SERVER_URL)
exit()
instrument_code = "%s((\"%s\", \"path\", \"%s\", %i))" % (
VERIFICATION_INSTRUCTION, formula_hash, instrument_function_qualifier,
branching_condition_id)
instrument_ast = ast.parse(instrument_code).body[0]
index_in_parent = vertex_information[1]._parent_body.index(vertex_information[1])
vertex_information[1]._parent_body.insert(index_in_parent, instrument_ast)
logger.log("Branch recording instrument placed")
elif vertex_information[0] == "conditional-no-else":
# no else was present in the conditional, so we add a path recording instrument
# to the else block
logger.log("Placing branch recording instrument for conditional with no else")
# send branching condition to verdict server, take the ID from the response and use it in
# the path recording instruments.
condition_dict = {
"serialised_condition": vertex_information[2]
}
# if the condition already exists in the database, the verdict server will return the
# existing ID
try:
branching_condition_id = int(post_to_verdict_server("store_branching_condition/",
data=json.dumps(condition_dict)))
except:
traceback.print_exc()
logger.log(
"There was a problem with the verdict server at '%s'. Instrumentation cannot be "
"completed." % VERDICT_SERVER_URL)
exit()
instrument_code = "%s((\"%s\", \"path\", \"%s\", %i))" % (
VERIFICATION_INSTRUCTION, formula_hash, instrument_function_qualifier,
branching_condition_id)
instrument_ast = ast.parse(instrument_code).body[0]
vertex_information[1].orelse.insert(0, instrument_ast)
logger.log("Branch recording instrument placed")
elif vertex_information[0] in ['post-conditional', 'post-try-catch']:
if vertex_information[0] == 'post-conditional':
logger.log("Processing post conditional path instrument")
logger.log(vertex_information)
# need this to decide if we've left a conditional, since paths lengths reset after
# conditionals
logger.log(
"Placing branch recording instrument for end of conditional at %s - %i in parent "
"block - line no %i" % \
(vertex_information[1],
vertex_information[1]._parent_body.index(vertex_information[1]),
vertex_information[1].lineno))
condition_dict = {
"serialised_condition": "conditional exited"
}
else:
logger.log("Processing post try-catch path instrument")
logger.log(vertex_information)
# need this to decide if we've left a conditional, since paths lengths reset after
# conditionals
logger.log(
"Placing branch recording instrument for end of try-catch at %s - %i in parent "
"block - line no %i" % \
(vertex_information[1],
vertex_information[1]._parent_body.index(vertex_information[1]),
vertex_information[1].lineno))
condition_dict = {
"serialised_condition": "try-catch exited"
}
try:
branching_condition_id = int(post_to_verdict_server("store_branching_condition/",
data=json.dumps(condition_dict)))
except:
traceback.print_exc()
logger.log(
"There was a problem with the verdict server at '%s'. Instrumentation cannot be "
"completed." % VERDICT_SERVER_URL)
exit()
instrument_code = "%s((\"%s\", \"path\", \"%s\", %i))" % (
VERIFICATION_INSTRUCTION, formula_hash, instrument_function_qualifier,
branching_condition_id)
instrument_code_ast = ast.parse(instrument_code).body[0]
index_in_parent = vertex_information[1]._parent_body.index(vertex_information[1]) + 1
logger.log(vertex_information[1]._parent_body)
logger.log(index_in_parent)
vertex_information[1]._parent_body.insert(index_in_parent, instrument_code_ast)
logger.log(vertex_information[1]._parent_body)
elif vertex_information[0] == 'loop':
logger.log(
"Placing branch recording instrument for loop with first instruction %s in body" %
vertex_information[1])
condition_dict = {
"serialised_condition": vertex_information[2]
}
# if the condition already exists in the database, the verdict server will return the
# existing ID
try:
branching_condition_id = int(post_to_verdict_server("store_branching_condition/",
data=json.dumps(condition_dict)))
except:
traceback.print_exc()
logger.log(
"There was a problem with the verdict server at '%s'. Instrumentation cannot be "
"completed." % VERDICT_SERVER_URL)
exit()
instrument_code_inside_loop = "%s((\"%s\", \"path\", \"%s\", %i))" % (
VERIFICATION_INSTRUCTION, formula_hash, instrument_function_qualifier,
branching_condition_id)
instrument_inside_loop_ast = ast.parse(instrument_code_inside_loop).body[0]
condition_dict = {
"serialised_condition": vertex_information[4]
}
# if the condition already exists in the database, the verdict server will return the
# existing ID
try:
branching_condition_id = int(post_to_verdict_server("store_branching_condition/",
data=json.dumps(condition_dict)))
except:
traceback.print_exc()
logger.log(
"There was a problem with the verdict server at '%s'. Instrumentation cannot be "
"completed." % VERDICT_SERVER_URL)
exit()
instrument_code_outside_loop = "%s((\"%s\", \"path\", \"%s\", %i))" % (
VERIFICATION_INSTRUCTION, formula_hash, instrument_function_qualifier,
branching_condition_id)
instrument_outside_loop_ast = ast.parse(instrument_code_outside_loop).body[0]
# insert at beginning of loop body
inside_index_in_parent = vertex_information[1]._parent_body.index(vertex_information[1])
# insert just after loop body
outside_index_in_parent = vertex_information[3]._parent_body.index(
vertex_information[3]) + 1
vertex_information[1]._parent_body.insert(inside_index_in_parent,
instrument_inside_loop_ast)
vertex_information[3]._parent_body.insert(outside_index_in_parent,
instrument_outside_loop_ast)
logger.log("Branch recording instrument for conditional placed")
logger.log("=" * 100)
def place_function_begin_instruments(function_def, formula_hash, instrument_function_qualifier):
# NOTE: only problem with this is that the "end" instrument is inserted before the return,
# so a function call in the return statement maybe missed if it's part of verification...
thread_id_capture = "import threading; __thread_id = threading.current_thread().ident;"
vypr_start_time_instrument = "vypr_start_time = vypr.get_time();"
start_instrument = \
"%s((\"%s\", \"function\", \"%s\", \"start\", vypr_start_time, \"%s\", __thread_id))" \
% (VERIFICATION_INSTRUCTION, formula_hash, instrument_function_qualifier, formula_hash)
threading_import_ast = ast.parse(thread_id_capture).body[0]
thread_id_capture_ast = ast.parse(thread_id_capture).body[1]
vypr_start_time_ast = ast.parse(vypr_start_time_instrument).body[0]
start_ast = ast.parse(start_instrument).body[0]
threading_import_ast.lineno = function_def.body[0].lineno
thread_id_capture_ast.lineno = function_def.body[0].lineno
vypr_start_time_ast.lineno = function_def.body[0].lineno
start_ast.lineno = function_def.body[0].lineno
function_def.body.insert(0, start_ast)
function_def.body.insert(0, thread_id_capture_ast)
function_def.body.insert(0, threading_import_ast)
function_def.body.insert(0, vypr_start_time_ast)
def place_function_end_instruments(function_def, scfg, formula_hash, instrument_function_qualifier):
# insert the end instrument before every return statement
for end_vertex in scfg.return_statements:
end_instrument = \
"%s((\"%s\", \"function\", \"%s\", \"end\", vypr_start_time, \"%s\", __thread_id, " \
"vypr.get_time()))" \
% (VERIFICATION_INSTRUCTION, formula_hash, instrument_function_qualifier, formula_hash)
end_ast = ast.parse(end_instrument).body[0]
end_ast.lineno = end_vertex._previous_edge._instruction._parent_body[-1].lineno
logger.log("Placing end instrument at line %i." % end_ast.lineno)
insertion_position = len(end_vertex._previous_edge._instruction._parent_body) - 1
end_vertex._previous_edge._instruction._parent_body.insert(insertion_position, end_ast)
logger.log(end_vertex._previous_edge._instruction._parent_body)
# if the last instruction in the ast is not a return statement, add an end instrument at the end
if not (type(function_def.body[-1]) is ast.Return):
end_instrument = "%s((\"%s\", \"function\", \"%s\", \"end\", vypr_start_time, \"%s\", __thread_id, " \
"vypr.get_time()))" \
% (VERIFICATION_INSTRUCTION, formula_hash, instrument_function_qualifier,
formula_hash)
end_ast = ast.parse(end_instrument).body[0]
logger.log("Placing end instrument at the end of the function body.")
function_def.body.insert(len(function_def.body), end_ast)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Instrumentation for VyPR.')
parser.add_argument('--verbose', action='store_true',
help='If given, output will be turned on for the instrumentation module.', required=False)
parser.add_argument('--draw-graphs', action='store_true',
help='If given, SCFGs derived from functions to be monitored will be written to GV files and '
'compiled into PDFs.',
required=False)
parser.add_argument('--std-output', action='store_true',
help='If given, every message written to the instrumentation log will also be written to '
'standard out.',
required=False)
args = parser.parse_args()
VERBOSE = args.verbose
DRAW_GRAPHS = args.draw_graphs
LOGS_TO_STDOUT = args.std_output
inst_configuration = read_configuration("vypr.config")
VERDICT_SERVER_URL = inst_configuration.get("verdict_server_url") \
if inst_configuration.get("verdict_server_url") else "http://localhost:9001/"
EXPLANATION = inst_configuration.get("explanation") == "on"
BYTECODE_EXTENSION = inst_configuration.get("bytecode_extension") \
if inst_configuration.get("bytecode_extension") else ".pyc"
VYPR_MODULE = inst_configuration.get("vypr_module") \
if inst_configuration.get("vypr_module") else ""
VERIFICATION_INSTRUCTION = "vypr.send_event"
# VERIFICATION_INSTRUCTION = "print"
# initialise instrumentation logger
logger = InstrumentationLog(LOGS_TO_STDOUT)
# set up the file handle
logger.start_logging()
# reset code to non-instrumented
for directory in os.walk("."):
for file in directory[2]:
if not ("venv" in file):
f = os.path.join(directory[0], file)
if ".py.inst" in f:
# rename to .py
os.rename(f, f.replace(".py.inst", ".py"))
# delete bytecode
os.remove(f.replace(".py.inst", BYTECODE_EXTENSION))
logger.log("Reset file %s to uninstrumented version." % f)
verification_conf = compile_queries("VyPR_queries.py")
verified_modules = verification_conf.keys()
# first, check that the verdict server is reachable
if not (is_verdict_server_reachable()):
print("Verdict server is not reachable. Ending instrumentation - nothing has been done.")
exit()
# for each verified function, find the file in which it is defined
for module in verified_modules:
logger.log("Processing module '%s'." % module)
verified_functions = verification_conf[module].keys()
file_name = module.replace(".", "/") + ".py"
file_name_without_extension = module.replace(".", "/")
# extract asts from the code in the file
code = "".join(open(file_name, "r").readlines())
asts = ast.parse(code)
# add import for init_vypr module
import_code = "from %s import vypr" % VYPR_MODULE
import_ast = ast.parse(import_code).body[0]
import_ast.lineno = asts.body[0].lineno
import_ast.col_offset = asts.body[0].col_offset
asts.body.insert(0, import_ast)
# add vypr datetime import
vypr_datetime_import = "from datetime import datetime as vypr_dt"
datetime_import_ast = ast.parse(vypr_datetime_import).body[0]
datetime_import_ast.lineno = asts.body[0].lineno
datetime_import_ast.col_offset = asts.body[0].col_offset
asts.body.insert(0, datetime_import_ast)
for function in verified_functions:
logger.log("Processing function '%s'." % function)