This repository has been archived by the owner on Nov 5, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfaithfulness_lime_utils.py
327 lines (250 loc) · 15.7 KB
/
faithfulness_lime_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# preprocessing code for comprehensiveness test
# generates the versions of the instances with rationale words removed, and the versions with all non rationale words removed
import copy
import numpy as np
import lime.explanation
import lime.lime_text
import numpy as np
import torch
import gc
BATCH_SIZE = 8
def lime_create_index_arrays(instances, pred_fn, explainer, n_samples=10, k_labels=5):
"""create the indexed strings and index array for the LIME explainer that can then be processed for the faithfulness test
Args:
instances (list(string)): list of instances to create the indexed strings and index array for
pred_fn (function): function to get the predictions from the model
explainer (LimeTextExplainer): the explainer to use to get the explanation
n_samples (int, optional): number of samples to use for the LIME explainer. Defaults to 10.
k_labels (int, optional): number of labels to explain. Defaults to 5.
"""
indexed_strs = np.array([])
# get the amount of padding needed by finding the longest instance
# unfourtunately the overall instance length doesn't correspond to the indexed string length, so an additional for loop is needed
padding_len = 0
# i originally didn't realize padding was an option on the tokenizer so we can probably remove this later
for instance in instances:
indexed_str = lime.lime_text.IndexedString(instance)
inst_len = len(indexed_str.as_np)
if inst_len > padding_len:
padding_len = inst_len
# get the single word list version of instances from LIME
index_array = None
for i, instance in enumerate(instances):
indexed_str = lime.lime_text.IndexedString(instance)
torch.cuda.empty_cache()
with torch.no_grad():
exp = explainer.explain_instance(instances[0], pred_fn, num_features=indexed_str.num_words(), num_samples=n_samples, top_labels=k_labels)
# create masked array from map
exp_map = exp.as_map()
# get the rationalle words
for label in exp_map.keys():
for item in exp_map[label]:
if index_array is None:
index_array = np.array([[i, item[0]]])
else:
# append to the index array so that np.take can be used to mask the data
index_array = np.append(index_array, [[i, item[0]]], axis=0)
#print(index_array)
# pad and save
str_as_np = indexed_str.as_np
padding = np.full((padding_len - len(str_as_np)), [''], dtype=str)
str_as_np = np.append(str_as_np, padding)
if indexed_strs.size == 0:
indexed_strs = np.array([str_as_np])
else:
indexed_strs = np.append(indexed_strs, [str_as_np], axis=0)
index_array_x = np.transpose(index_array)[0]
index_array_y = np.transpose(index_array)[1]
index_array = np.array([index_array_x, index_array_y])
return indexed_strs, index_array
def save_indexed_strs(indexed_strs, index_array, file_path):
"""Saves the indexed strings and index array to a npz file
Args:
indexed_strs (list[list[string]]): list of instances with words stored as separate strings in a list
index_array (list[list[int]]): list of indexes storing the explanation for each string
file_path (string): path to save the npz file
"""
# save the indexed strings and index array to a json file
np.savez(file_path, indexed_strs=indexed_strs, index_array=index_array)
def load_indexed_strs(file_path):
"""Loads the indexed strings and index array from a npz file
Args:
file_path (string): path to the npz file to load
"""
# load the indexed strings and index array from a json file
with np.load(file_path) as data:
indexed_strs = data['indexed_strs']
index_array = data['index_array']
return indexed_strs, index_array
def remove_rationale_words(instances, rationales, join=True, tokenized=False):
"""remove the rationale words from the instances
Args:
instances (list(list(string))): list of instances to remove the rationale words from. Each instance is a list of words.
rationales (list(list(int))): list of rationales to remove from the instances. Each rationale is a list of indexes, where the first index is the instance index and the second index is the word index.
join (bool, optional): automatically joins the returned string list for the lists with rationale words removed. Defaults to True.
Returns:
list(string) (join True) or list(list(string)) (join False) : list of instances with the rationale words removed. Each instance is a list of words or a string.
"""
inst_rationale_removed = copy.deepcopy(instances)
# TODO: add handling for tokenized data. This will involves masking and editing the inputs key in the dictionary rather than the whole input as it done for
# non tokenized inputs
if tokenized == True:
rationales_mask = np.zeros(instances['input_ids'].numpy().shape, dtype=bool)
rationales_mask[rationales[0], rationales[1]] = True
inst_rationale_removed['input_ids'] = torch.from_numpy(np.delete(inst_rationale_removed['input_ids'].numpy(), np.where(rationales_mask), axis=1))
else:
rationales_mask = np.zeros(instances.shape, dtype=bool)
# set the values of the rational mask to true based on rationales in a vectorized manner
# the rationales are in the format [[instance_index_1, instance_index_2, ...], [word_index_1, word_index_2, ...]]
rationales_mask[rationales[0], rationales[1]] = True
# print(rationales_mask)
# remove the rationale words from the instance in a vectorized manner. The rationale words are a mask, w
# do this for every instance at the same time using numpy, this is faster than looping through each instance. do not use a list comprehension here
inst_rationale_removed = np.where(rationales_mask, " ", instances)
if join:
inst_rationale_removed = [''.join(inst_rationale_removed[i].tolist()) for i in range(len(inst_rationale_removed))]
return inst_rationale_removed
def remove_other_words(instances, rationales, join=True, tokenized=False):
"""remove all words that are not in the rationale from the instances
Args:
instances (list(list(string))): list of instances to remove the non rationale words from. Each instance is a list of words.
rationales (list(list(int))): list of rationales to remove from the instances. Each rationale is a list of indexes, where the first index is the instance index and the second index is the word index.
join (bool, optional): automatically joins the returned string list for the lists with non rationale words removed. Defaults to True.
Returns:
list(string) (join True) or list(list(string)) (join False) : list of instances with all non rationale words removed. Each instance is a list of words or a string.
"""
inst_other_removed = copy.deepcopy(instances)
# TODO: add handling for tokenized data. This will involves masking and editing the inputs key in the dictionary rather than the whole input as it done for
# non tokenized inputs
if tokenized == True:
inverse_rationales_mask = np.ones(instances['input_ids'].numpy().shape, dtype=bool)
inverse_rationales_mask[rationales[0], rationales[1]] = False
inst_other_removed['input_ids'] = torch.from_numpy(np.delete(inst_other_removed['input_ids'].numpy(), np.where(inverse_rationales_mask), axis=1))
else:
# create version of index array where all indexes are added that are not in the rationalle
inverse_rationales_mask = np.ones(instances.shape, dtype=bool)
inverse_rationales_mask[rationales[0], rationales[1]] = False
# remove the rationale words from the instance in a vectorized manner
# do this for every instance at the same time using numpy, this is faster than looping through each instance. do not use a list comprehension here
# replace each word with "" so that the length of the instance stays the same
inst_other_removed = np.where(inverse_rationales_mask, " ", instances)
if join:
inst_other_removed = [''.join(inst_other_removed[i].tolist()) for i in range(len(inst_other_removed))]
return inst_other_removed
def calculate_comprehensiveness(predictions, instances_rationale_removed, model, tokenizer, predictor_func):
""" Calculate the comprehensiveness of the rationales
Args:
predictions (np.array(np.array(float))): List of predictions made with the base instances (no words removed) using the given model.
instances_rationale_removed (np.array(np.array(string))): List of rationales to compute the comprehensiveness for. This is formatted as a np array of strings.
model (model): The model to compute the comprehensiveness for.
tokenizer (tokenizer): The tokenizer to use for the model.
predictor_func (function): The function to use to get the predictions from the model.
"""
print("Calculating Comprehensiveness")
# pass the instances through the model - get the predictions
torch.cuda.empty_cache()
predictions_rationale_removed = None
for i in range(0, len(instances_rationale_removed), BATCH_SIZE):
end_range = i + BATCH_SIZE if i + BATCH_SIZE < len(instances_rationale_removed) else len(instances_rationale_removed)
instances_batch = instances_rationale_removed[i:end_range]
output_batch = predictor_func(instances_batch, model, tokenizer)
if i == 0:
predictions_rationale_removed = output_batch
else:
predictions_rationale_removed = np.concatenate((predictions_rationale_removed, output_batch), axis=0)
gc.collect()
# calculate the euclidean distance between the probability of the predicted class and sum over multi labels
# logits are the classification scores for the opt model
# confidence_dif = predictions.logits - predictions_rationale_removed.logits
confidence_dif = predictions - predictions_rationale_removed
# print("Confidence Dif: ", confidence_dif)
confidence_dif = np.linalg.norm(confidence_dif, axis=-1)
# print("Confidence Dif - eudclidean distance: ", confidence_dif)
# return the average confidence difference over the samples
return np.mean(confidence_dif, axis=-1), confidence_dif
def calculate_sufficency(predictions, instances_other_removed, model, tokenizer, predictor_func):
"""Calculates the sufficiency of the rationales
Args:
predictions (np.array(np.array(float))): List of predictions made with the base instances (no words removed) using the given model.
instances_other_removed (np.array(string)): List of rationales to compute the sufficency for. This is formatted as a list of strings
model (model): The model to compute the sufficency for.
tokenizer (tokenizer): The tokenizer to use for the model.
predictor_func (function): The function to use to get the predictions from the model.
"""
print("Calculating Sufficiency")
torch.cuda.empty_cache()
predictions_other_removed = None
for i in range(0, len(instances_other_removed), BATCH_SIZE):
end_range = i + BATCH_SIZE if i + BATCH_SIZE < len(instances_other_removed) else len(instances_other_removed)
# print("end range: ", end_range)
# print(i)
instances_batch = instances_other_removed[i:end_range]
output_batch = predictor_func(instances_batch, model, tokenizer)
if i == 0:
predictions_other_removed = output_batch
else:
predictions_other_removed = np.concatenate((predictions_other_removed, output_batch), axis=0)
gc.collect()
# predictions_other_removed = predictor_func(instances_other_removed, model, tokenizer)
# print("Predicitons other removed: ", predictions_other_removed)
# calculate the euclidean distance between the predictions and the predictions with the other words removed
# logits are the classification scores
# confidence_dif = predictions.logits - predictions_other_removed.logits
confidence_dif = predictions - predictions_other_removed
# print("Confidence Dif: ", confidence_dif)
confidence_dif = np.linalg.norm(confidence_dif, axis=-1)
# print("Confidence Dif - eudclidean distance: ", confidence_dif)
# return the average confidence difference
return np.mean(confidence_dif, axis=-1), confidence_dif
def calculate_faithfulness(instances, instances_rationalle_removed, instances_other_removed, model, tokenizer, predictor_func):
"""Calculate the faithfulness of the rationales
Args:
instances (numpy(numpy(string))): list of instances to compute the faithfulness for. This is formatted as a list of numpy arrays of words.
instances_rationalle_removed (np.array(string)): list of strings with rationalle words removed. This is formatted as a np array of strings.
instances_other_removed (np.array(string)): list of instances with all non rationale words removed to compute the faithfulness for. This is formatted as a np array of strings.
model (model): The model to compute the faithfulness for.
tokenizer (tokenizer): The tokenizer to use for the model.
predictor_func (function): The function to use to get the predictions from the model.
"""
# generate predictions
predictions = None
for i in range(0, len(instances), BATCH_SIZE):
end_range = i + BATCH_SIZE if i + BATCH_SIZE < len(instances) else len(instances)
instances_batch = instances[i:end_range]
# print(len(instances_batch))
# print(instances_batch)
output_batch = predictor_func(instances_batch, model, tokenizer)
if i == 0:
predictions = output_batch
else:
predictions = np.concatenate((predictions, output_batch), axis=0)
gc.collect()
# predictions = predictor_func(instances, model, tokenizer)
faithfulness_calc = []
sufficency, suf_list = calculate_sufficency(predictions, instances_rationalle_removed, model, tokenizer, predictor_func)
comprehensiveness, comp_list = calculate_comprehensiveness(predictions, instances_other_removed, model, tokenizer, predictor_func)
# calculate faithfulness
faithfulness = sufficency * comprehensiveness
print()
print('-- Metrics -------------------------------------------------------------')
print()
print()
print("Faithfulness: ", faithfulness)
print("Comprehensiveness: ", comprehensiveness)
print("Sufficency: ", sufficency)
print()
print('Sufficiency list:', suf_list)
print('Comprehensiveness list:', comp_list)
print()
print("Comprehensiveness Median: ", np.median(comp_list, axis=-1))
print("Comprehensiveness q1 (25% percentile): ", np.quantile(comp_list, 0.25, axis=-1))
print("Comprehensiveness q3 (75% percentile): ", np.quantile(comp_list, 0.75, axis=-1))
print()
print()
print("Sufficency Median: ", np.median(suf_list, axis=-1))
print("Sufficency q1 (25% percentile): ", np.quantile(suf_list, 0.25, axis=-1))
print("Sufficency q3 (75% percentile): ", np.quantile(suf_list, 0.75, axis=-1))
print()
faithfulness_calc.append(faithfulness)
# return the minimum index of the faithfulness_calc to get the best method
return np.argmin(faithfulness_calc), faithfulness_calc