-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathresnet.py
133 lines (101 loc) · 4.44 KB
/
resnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""Some code sections are taken from
https://github.com/raghakot/keras-resnet
"""
import sys
import numpy as np
from keras.models import Model
from keras.layers import Input, Activation, Dense, Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import AveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras.layers.merge import add
from keras import backend as K
sys.setrecursionlimit(10000)
BN_AXIS = 3
# losses that need sigmoid on top of last layer
yes_softmax = ['crossentropy', 'forward', 'est_forward', 'backward',
'est_backward', 'boot_soft', 'savage']
# unhinged needs bounded models or it diverges
yes_bound = ['unhinged', 'sigmoid']
def cifar10_resnet(depth, cifar10model, decay, loss):
# how many layers this is going to create?
# 2 + 6 * depth
model = cifar10model
input = Input(shape=(model.img_rows, model.img_cols, model.img_channels))
# 1 conv + BN + relu
filters = 16
b = Conv2D(filters=filters, kernel_size=(model.num_conv, model.num_conv),
kernel_initializer="he_normal", padding="same",
kernel_regularizer=l2(decay), bias_regularizer=l2(0))(input)
b = BatchNormalization(axis=BN_AXIS)(b)
b = Activation("relu")(b)
# 1 res, no striding
b = residual(model, filters, decay, first=True)(b) # 2 layers inside
for _ in np.arange(1, depth): # start from 1 => 2 * depth in total
b = residual(model, filters, decay)(b)
filters *= 2
# 2 res, with striding
b = residual(model, filters, decay, more_filters=True)(b)
for _ in np.arange(1, depth):
b = residual(model, filters, decay)(b)
filters *= 2
# 3 res, with striding
b = residual(model, filters, decay, more_filters=True)(b)
for _ in np.arange(1, depth):
b = residual(model, filters, decay)(b)
b = BatchNormalization(axis=BN_AXIS)(b)
b = Activation("relu")(b)
b = AveragePooling2D(pool_size=(8, 8), strides=(1, 1),
padding="valid")(b)
out = Flatten()(b)
if loss in yes_softmax:
dense = Dense(units=model.classes, kernel_initializer="he_normal",
activation="softmax",
kernel_regularizer=l2(decay),
bias_regularizer=l2(0))(out)
elif loss in yes_bound:
dense = Dense(units=model.classes, kernel_initializer="he_normal",
kernel_regularizer=l2(decay),
bias_regularizer=l2(0))(out)
dense = BatchNormalization(axis=BN_AXIS)(dense)
else:
dense = Dense(units=model.classes, kernel_initializer="he_normal",
kernel_regularizer=l2(decay),
bias_regularizer=l2(0))(out)
return Model(inputs=input, outputs=dense)
def residual(model, filters, decay, more_filters=False, first=False):
def f(input):
if more_filters and not first:
stride = 2
else:
stride = 1
if not first:
b = BatchNormalization(axis=BN_AXIS)(input)
b = Activation("relu")(b)
else:
b = input
b = Conv2D(filters=filters,
kernel_size=(model.num_conv, model.num_conv),
strides=(stride, stride),
kernel_initializer="he_normal", padding="same",
kernel_regularizer=l2(decay), bias_regularizer=l2(0))(b)
b = BatchNormalization(axis=BN_AXIS)(b)
b = Activation("relu")(b)
res = Conv2D(filters=filters,
kernel_size=(model.num_conv, model.num_conv),
kernel_initializer="he_normal", padding="same",
kernel_regularizer=l2(decay), bias_regularizer=l2(0))(b)
# check and match number of filter for the shortcut
input_shape = K.int_shape(input)
residual_shape = K.int_shape(res)
if not input_shape[3] == residual_shape[3]:
stride_width = int(round(input_shape[1] / residual_shape[1]))
stride_height = int(round(input_shape[2] / residual_shape[2]))
input = Conv2D(filters=residual_shape[3], kernel_size=(1, 1),
strides=(stride_width, stride_height),
kernel_initializer="he_normal",
padding="valid",
kernel_regularizer=l2(decay))(input)
return add([input, res])
return f