-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMTCNN.py
151 lines (115 loc) · 5.19 KB
/
MTCNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import numpy as np
import tensorflow as tf
from keras.layers import Input, Conv2D, MaxPool2D, ZeroPadding2D, Activation, Dense, Reshape, Permute, Flatten
from keras.layers.advanced_activations import PReLU
from keras.models import Model, Sequential
"""
MTCNN (Multi-task Cascaded Convolution Neural Network)
step1: Resize the image into image pyramid
step2: Use P-Net(Proposal Network) to obtain the candidate facial windows and their bounding box regression vectors.
step3: Use R-Net(Refine Network) to reject a large number of false candidates, performs calibration with bounding box regression, and conducts NMS
step4: USe O-Net(OUtput Network) to produce the final bounding box and the landmarks
Notations:
Activations: PReLU
P-Net input size: (None, None, 3)
R-Net input size: (24, 24, 3)
O-Net input size: (48, 48, 3)
"""
def create_Pnet(weight_path):
"""
Implement P-Net to obtain the candidate facial widows
Arguments:
weight_path: the weights path of P-Net
Returns:
P-Net model
classifier -- whether the image has a face
bbox_regress -- bounding box regression have left upper corner, height and width, including 4 elements.
"""
# n_H and n_W could be unknown, and in paper the input size is set to (12, 12, 3) after image scaling
X_input = Input(shape=[None, None, 3])
# (12, 12, 3) --> (5, 5, 10)
X = Conv2D(10, (3, 3), strides=1, padding="valid", name="conv1")(X_input)
X = PReLU(shared_axes=[1, 2], name="PReLU1")(X)
X = MaxPool2D(pool_size=2)(X)
# (5, 5, 10) --> (3, 3, 16)
X = Conv2D(16, (3, 3), strides=1, padding="valid", name="conv2")(X)
X = PReLU(shared_axes=[1, 2], name="PReLU2")(X)
# (3, 3, 16) --> (1, 1, 32)
X = Conv2D(32, (3, 3), strides=1, padding="valid", name="conv3")(X)
X = PReLU(shared_axes=[1, 2], name="PReLU3")(X)
classifier = Conv2D(2, (1, 1), activation="softmax", name="conv4-1")(X)
bbox_regress = Conv2D(4, (1, 1), name="conv4-2")(X)
model = Model([X_input], [classifier, bbox_regress])
model.load_weights(weight_path, by_name=True)
return model
def create_Rnet(weigth_path):
"""
Implement R-Net to choose the candidate windows and refine the bounding box
:param weigth_path: the weigths of R-Net
:return:
R-Net model
classifier -- face classification after FC
bbox_regress -- refine the bounding box through FC
"""
# input size are set to (24, 24, 3)
X_input = Input(shape=[24, 24, 3])
# (24, 24, 3) --> (11, 11, 28)
X = Conv2D(28, (3, 3), strides=1, padding="valid", name="conv1")(X_input)
X = PReLU(shared_axes=[1, 2], name="PReLU1")(X)
X = MaxPool2D(pool_size=3, strides=2, padding="same")(X)
# (11, 11, 28) --> (4, 4, 48)
X = Conv2D(48, (3, 3), strides=1, padding="valid", name="conv2")(X)
X = PReLU(shared_axes=[1, 2], name="PReLU2")(X)
X = MaxPool2D(pool_size=3, strides=2)(X)
# (4, 4, 48) --> (3,3,64)
X = Conv2D(64, (2, 2), strides=1, padding="valid", name="conv3")(X)
X = PReLU(shared_axes=[1, 2], name="PReLU3")(X)
X = Permute((3, 2, 1))(X) # transpose to (n_C, n_W, n_H)
X = Flatten()(X)
# Fully connected layer
X = Dense(128, name="conv4")(X)
X = PReLU(name="PReLU4")(X)
classifier = Dense(2, activation="softmax", name="conv5-1")(X)
bbox_regress = Dense(4, name="conv5-2")(X)
model = Model([X_input], [classifier, bbox_regress])
model.load_weights(weigth_path, by_name=True)
return model
def create_Onet(weight_path):
"""
Implement O-Net to out put the final facial window and bounding box
:param weight_path: the weights of O-Net
:return:
O-Net model
classifier -- final classification
bbox_regress -- final bounding box
landmark_regress -- the 5 landmarks in the bounding box including left eye, right eye, nose, left mouth corner,
and right mouth corner, thus landmark_regress have 10 elements
"""
# input size are set to (48, 48, 3)
X_input = Input(shape=[48, 48, 3])
# (48, 48, 3) --> (23, 23, 32)
X = Conv2D(32, (3, 3), strides=1, padding="valid", name="conv1")(X_input)
X = PReLU(shared_axes=[1, 2], name="PReLU1")(X)
X = MaxPool2D(pool_size=3, strides=2, padding="same")(X)
# (23, 23, 32) --> (10, 10, 64)
X = Conv2D(64, (3, 3), strides=1, padding="valid", name="conv2")(X)
X = PReLU(shared_axes=[1, 2], name="PReLU2")(X)
X = MaxPool2D(pool_size=3, strides=2)(X)
# (10, 10, 64) --> (4, 4, 64)
X = Conv2D(64, (3, 3), strides=1, padding="valid", name="conv3")(X)
X = PReLU(shared_axes=[1, 2], name="PReLU3")(X)
X = MaxPool2D(pool_size=2)(X)
# (4, 4, 64) --> (3, 3, 128)
X = Conv2D(128, (2, 2), strides=1, padding="valid", name="conv4")(X)
X = PReLU(shared_axes=[1, 2], name="PReLU4")(X)
X = Permute((3, 2, 1))(X)
X = Flatten()(X)
# Fully connected 256 vector
X = Dense(256, name="conv5")(X)
X = PReLU(name="PReLU5")(X)
classifier = Dense(2, activation="softmax", name="conv6-1")(X)
bbox_regress = Dense(4, name="conv6-2")(X)
landmark_regress = Dense(10, name="conv6-3")(X)
model = Model([X_input], [classifier, bbox_regress, landmark_regress])
model.load_weights(weight_path, by_name=True)
return model