-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsiampain_train.py
297 lines (202 loc) · 9.76 KB
/
siampain_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
#####################################################################################
# #
# Model Name : Siampain #
# Model Purpose : Face Recognition #
# Backbone : VGG-16 #
# Reference : Siamese Neural Networks for One-shot Image Recognition #
# Library : Pytorch #
# #
# siampain_train.py & Preprocessing.py written by hayoung Lee([email protected]) #
# #
# Additional Information #
# - Dataset : AI Hub - 마스크 착용 한국인 안면 이미지 데이터 #
# #
#####################################################################################
'''
DATA PREPROCESSING
1. Converting Data type
- Converting data from tfrecord to numpy array for use with PyTorch
2. Making train_dataset, train_loader, test_dataset, test_loader
'''
import preprocessing
from sklearn.model_selection import train_test_split
kface_path = 'kface.tfrecord'
parsed_dataset = preprocessing.get_image_numpy_array(kface_path)
image, label = [], []
for i in range(len(parsed_dataset)):
image.append(parsed_dataset[i][2])
label.append(parsed_dataset[i][1][0])
X_train, X_test, y_train, y_test = train_test_split(image, label, test_size=0.20, random_state=425)
BATCH_SIZE = 16
train_dataset = preprocessing.CustomDataset(X_train, y_train)
train_loader = preprocessing.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)
test_dataset = preprocessing.CustomDataset(X_test, y_test)
test_loader =preprocessing.DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = True)
'''
VGG CLASS DEFINITION
1. Backbone : Pre-trained VGG-16 without fully connected layer
2. Functions:
1. __init__(self, base_dim, dimension):
- Initialize the VGG model
- Initialize parameter alpha according to normal distribution N(0, 0.1)
- Arges:
- base_dim : Skip
- dimension : Flattened tensor shape after passing through the forward pass
2. forward(self, x):
- Define the forward pass of the model
- Args:
- x: Input tensor to the model
- Returns:
- Output tensor after forward pass
3. distance(self, x_1, x_2):
- Calculate the weighted sum of the L1 Norm of (x _1 - x_2), using the alpha parameter as weights
- Weighted sum of L1 Norm after passing through a sigmoid function
'''
import torch
import torchvision
import torch.nn as nn
import torch.nn.init as init
import torch.functional as F
def conv_2(in_dim, out_dim):
model = nn.Sequential(
nn.Conv2d(in_dim, out_dim, kernel_size = 3, padding = 1),
nn.BatchNorm2d(out_dim),
nn.ReLU(),
nn.Conv2d(out_dim, out_dim, kernel_size = 3, padding = 1),
nn.BatchNorm2d(out_dim),
nn.ReLU(),
nn.MaxPool2d(2,2)
)
return model
def conv_3(in_dim, out_dim):
model = nn.Sequential(
nn.Conv2d(in_dim, out_dim, kernel_size = 3, padding = 1),
nn.BatchNorm2d(out_dim),
nn.ReLU(),
nn.Conv2d(out_dim, out_dim, kernel_size = 3, padding = 1),
nn.BatchNorm2d(out_dim),
nn.ReLU(),
nn.Conv2d(out_dim, out_dim, kernel_size = 3, padding = 1),
nn.BatchNorm2d(out_dim),
nn.ReLU(),
nn.MaxPool2d(2,2)
)
return model
class VGG(nn.Module):
def __init__(self, dimension, base_dim = 64):
super(VGG, self).__init__()
self.feature = nn.Sequential(
conv_2(3, base_dim),
conv_2(base_dim, base_dim*2),
conv_3(base_dim*2, base_dim*4),
conv_3(base_dim*4, base_dim*8),
conv_3(base_dim*8, base_dim*8)
)
self.alpha = nn.Parameter(torch.Tensor(dimension))
init.normal_(self.alpha, mean=0.0, std=0.01)
self.apply(self.initialize)
def forward(self, x):
x = self.feature(x)
x = torch.flatten(x, start_dim=1)
return x
def distance(self, x_1, x_2):
difference = torch.abs(x_1-x_2)
weighted_sum = torch.sum(self.alpha*difference, dim=-1)
prediction = torch.sigmoid(weighted_sum)
return prediction
'''
MODEL DECLARATION, HYPERPARAMETER INITIALIZATION, ETC.
Using Pre-trained VGG-16 without fully connected layer
Loss function: Binary Cross Entropy
Optimizer: Adam with learning rate set to 0.001
EPOCHS: 10
- I set EPOCHS to 10 based on experimental results, where the accuracy on the test set reached 95% after 7 epochs
'''
import torchvision.models as models
# Define the custom VGG model with 4608 dimension
model = VGG(dimension = 4608)
# Set the device to GPU if available, otherwise use CPU
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# code for transfer Learning
# Transfer the weights from pre-trained VGG-16 to the custom model
vgg16 = models.vgg16(pretrained=True)
vgg16_features = list(vgg16.features.children())
conv_layers = [layer for layer in model.feature.modules() if isinstance(layer, nn.Conv2d)]
vgg16_conv_layers = [layer for layer in vgg16_features if isinstance(layer, nn.Conv2d)]
for my_layer, vgg16_layer in zip(conv_layers, vgg16_conv_layers):
my_layer.weight.data = vgg16_layer.weight.data.clone()
if my_layer.bias is not None:
my_layer.bias.data = vgg16_layer.bias.data.clone()
# Setting
EPOCH = 5
loss = torch.nn.BCELoss()
optimizer =torch.optim.Adam(model.parameters(), lr = 0.001)
'''
UTILITY FUNCTIONS
1. compute_accuracy_and_loss(model, data_loader, device):
- Compute accuracy and loss for a given model on a given data loader
- Args :
- model : The neural network model to be evaluated
- data_loader : DataLoader object providing the dataset for evaluation
- device : Device to be used for computation (e.g., 'cuda' or 'cpu')
2. bool_to_int(boolean):
- Convert tensor of boolean data type to tensor of integer data type
- Args :
- boolean : Tensor of boolean data type
- Returns :
- Tensor of integer data type
'''
def compute_accuracy_and_loss(model, data_loader, device):
accuracy, cost_sum, num_samples = 0, 0, 0
for batch_idx, (image_1, label_1, image_2, label_2) in enumerate(data_loader):
image_1, image_2 = image_1.to(DEVICE), image_2.to(DEVICE)
image_1_feature, image_2_feature = model(image_1), model(image_2)
predicted_similarity = model.distance(image_1_feature, image_2_feature)
# If the predicted similarity is greater than 0.5, the scaled predicted similarity is set to 1
# If the predicted similarity is less than or equal to 0.5, the scaled predicted similarity is set to 0
scaled_predicted_similarity = [1 if predicted_similarity[i] > 0.5 else 0 for i in range(len(predicted_similarity))]
# Calculate loss using Binary Cross Entropy
cost = loss(scaled_predicted_similarity, bool_to_int(label_1==label_2))
num_samples += (label_1==label_2).size(0)
# If the scaled predicted similarity matches the target, increase accuracy by 1
accuracy += (torch.tensor(scaled_predicted_similarity) == bool_to_int(label_1==label_2)).sum()
cost_sum += cost.sum()
print (f'Batch {batch_idx:03d}/{len(data_loader):03d} |'
f' Cost: {cost:.4f}')
return accuracy/num_samples * 100, cost_sum/num_samples
def bool_to_int(boolean):
target = [1 if b else 0 for b in boolean]
return torch.tensor(target).float()
'''
TRAINING
'''
import time
start_time = time.time()
train_acc_lst, train_loss_lst, test_acc_lst, test_loss_lst = [], [], [], []
model.to(DEVICE)
for epoch in range(EPOCH):
model.train()
for batch_idx, (image_1, label_1, image_2, label_2) in enumerate(train_loader):
image_1, image_2 = image_1.to(DEVICE), image_2.to(DEVICE)
image_1_feature, image_2_feature = model(image_1), model(image_2)
prediction = model.distance(image_1_feature, image_2_feature)
# Calculate loss using Binary Cross Entropy
cost = loss(prediction, bool_to_int(label_1==label_2))
optimizer.zero_grad()
cost.backward()
optimizer.step()
print (f'Epoch: {epoch:03d} | '
f'Batch {batch_idx:03d}/{len(train_loader):03d} |'
f'Cost: {cost:.4f}')
model.eval()
with torch.no_grad():
train_acc, train_loss = compute_accuracy_and_loss(model, train_loader, device=DEVICE)
test_acc, test_loss = compute_accuracy_and_loss(model, test_loader, device=DEVICE)
train_acc_lst.append(train_acc)
test_acc_lst.append(test_acc)
print(f'Epoch: {epoch:03d}/{EPOCH:03d} Train Acc.: {train_acc:.2f}%'
f' | Test Acc.: {test_acc:.2f}%')
elapsed = (time.time() - start_time)/60
print(f'Time elapsed: {elapsed:.2f} min')
elapsed = (time.time() - start_time)/60
print(f'Total Training Time: {elapsed:.2f} min')