forked from simpson0114/cleanse_pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmad_outlier_detection.py
104 lines (76 loc) · 3.12 KB
/
mad_outlier_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2018-11-28 16:27:19
# @Author : Bolun Wang ([email protected])
# @Link : http://cs.ucsb.edu/~bolunwang
import os
import sys
import time
import numpy as np
#from keras.preprocessing import image
import cv2
##############################
# PARAMETERS #
##############################
RESULT_DIR = 'results_Li_rn_tgt7_t0d10_r05_ep5' # directory for storing results
IMG_FILENAME_TEMPLATE = 'gtsrb_visualize_%s_label_%d.png' # image filename template for visualization results
# input size
IMG_ROWS = 32
IMG_COLS = 32
IMG_COLOR = 3
INPUT_SHAPE = (IMG_ROWS, IMG_COLS, IMG_COLOR)
NUM_CLASSES = 43 # total number of classes in the model
##############################
# END PARAMETERS #
##############################
def outlier_detection(l1_norm_list, idx_mapping):
print("check input l1-norm: ", l1_norm_list)
consistency_constant = 1.4826 # if normal distribution
median = np.median(l1_norm_list)
mad = consistency_constant * np.median(np.abs(l1_norm_list - median))
min_mad = np.abs(np.min(l1_norm_list) - median) / mad
print('median: %f, MAD: %f' % (median, mad))
print('anomaly index: %f' % min_mad)
flag_list = []
for y_label in idx_mapping:
anomaly_index = np.abs(l1_norm_list[idx_mapping[y_label]] - median) / mad
print("label: ", idx_mapping[y_label], "l1-norm: ", l1_norm_list[idx_mapping[y_label]], "anomaly_index: ", anomaly_index)
if l1_norm_list[idx_mapping[y_label]] > median:
continue
if anomaly_index > 2.0:
flag_list.append((y_label, l1_norm_list[idx_mapping[y_label]]))
if len(flag_list) > 0:
flag_list = sorted(flag_list, key=lambda x: x[1])
print('flagged label list: %s' %
', '.join(['%d: %2f' % (y_label, l_norm)
for y_label, l_norm in flag_list]))
pass
def analyze_pattern_norm_dist():
mask_flatten = []
idx_mapping = {}
for y_label in range(NUM_CLASSES):
mask_filename = IMG_FILENAME_TEMPLATE % ('mask', y_label)
if os.path.isfile('%s/%s' % (RESULT_DIR, mask_filename)):
#img = image.load_img(
# '%s/%s' % (RESULT_DIR, mask_filename),
# color_mode='grayscale',
# target_size=INPUT_SHAPE)
#mask = image.img_to_array(img)
mask = cv2.imread('%s/%s' % (RESULT_DIR, mask_filename), 0)/255.0
#print("check mask: ", mask.shape, type(mask), mask)
#mask = np.array(mask, dtype=np.float64)
#mask /= 255.0
#mask = mask[:, :, 0]
mask_flatten.append(mask.flatten())
idx_mapping[y_label] = len(mask_flatten) - 1
l1_norm_list = [np.sum(np.abs(m)) for m in mask_flatten]
print('%d labels found' % len(l1_norm_list))
print("check idx_mapping", idx_mapping)
outlier_detection(l1_norm_list, idx_mapping)
pass
if __name__ == '__main__':
print('%s start' % sys.argv[0])
start_time = time.time()
analyze_pattern_norm_dist()
elapsed_time = time.time() - start_time
print('elapsed time %.2f s' % elapsed_time)