forked from miemie2013/Keras-YOLOv4
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
437 lines (375 loc) · 21.6 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
#! /usr/bin/env python
# coding=utf-8
# ================================================================
#
# Author : miemie2013
# Created date: 2020-05-20 15:35:27
# Description : keras_yolov4
#
# ================================================================
import cv2
from collections import deque
import math
import json
import time
import threading
import datetime
import keras
import random
import copy
import numpy as np
import keras.layers as layers
import os
import tensorflow as tf
from keras import backend as K
from config import TrainConfig
from model.yolov4 import YOLOv4
from tools.cocotools import get_classes, catid2clsid, clsid2catid
from model.decode_np import Decode
from tools.cocotools import eval
from tools.data_process import data_clean, get_samples
from tools.transform import *
from pycocotools.coco import COCO
import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
# 显存分配
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 1.0
set_session(tf.Session(config=config))
def bbox_ciou(boxes1, boxes2):
'''
计算ciou = iou - p2/c2 - av
:param boxes1: (8, 13, 13, 3, 4) pred_xywh
:param boxes2: (8, 13, 13, 3, 4) label_xywh
:return:
举例时假设pred_xywh和label_xywh的shape都是(1, 4)
'''
# 变成左上角坐标、右下角坐标
boxes1_x0y0x1y1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
boxes2_x0y0x1y1 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
'''
逐个位置比较boxes1_x0y0x1y1[..., :2]和boxes1_x0y0x1y1[..., 2:],即逐个位置比较[x0, y0]和[x1, y1],小的留下。
比如留下了[x0, y0]
这一步是为了避免一开始w h 是负数,导致x0y0成了右下角坐标,x1y1成了左上角坐标。
'''
boxes1_x0y0x1y1 = tf.concat([tf.minimum(boxes1_x0y0x1y1[..., :2], boxes1_x0y0x1y1[..., 2:]),
tf.maximum(boxes1_x0y0x1y1[..., :2], boxes1_x0y0x1y1[..., 2:])], axis=-1)
boxes2_x0y0x1y1 = tf.concat([tf.minimum(boxes2_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., 2:]),
tf.maximum(boxes2_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., 2:])], axis=-1)
# 两个矩形的面积
boxes1_area = (boxes1_x0y0x1y1[..., 2] - boxes1_x0y0x1y1[..., 0]) * (
boxes1_x0y0x1y1[..., 3] - boxes1_x0y0x1y1[..., 1])
boxes2_area = (boxes2_x0y0x1y1[..., 2] - boxes2_x0y0x1y1[..., 0]) * (
boxes2_x0y0x1y1[..., 3] - boxes2_x0y0x1y1[..., 1])
# 相交矩形的左上角坐标、右下角坐标,shape 都是 (8, 13, 13, 3, 2)
left_up = tf.maximum(boxes1_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., :2])
right_down = tf.minimum(boxes1_x0y0x1y1[..., 2:], boxes2_x0y0x1y1[..., 2:])
# 相交矩形的面积inter_area。iou
inter_section = tf.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
iou = inter_area / (union_area + 1e-9)
# 包围矩形的左上角坐标、右下角坐标,shape 都是 (8, 13, 13, 3, 2)
enclose_left_up = tf.minimum(boxes1_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., :2])
enclose_right_down = tf.maximum(boxes1_x0y0x1y1[..., 2:], boxes2_x0y0x1y1[..., 2:])
# 包围矩形的对角线的平方
enclose_wh = enclose_right_down - enclose_left_up
enclose_c2 = K.pow(enclose_wh[..., 0], 2) + K.pow(enclose_wh[..., 1], 2)
# 两矩形中心点距离的平方
p2 = K.pow(boxes1[..., 0] - boxes2[..., 0], 2) + K.pow(boxes1[..., 1] - boxes2[..., 1], 2)
# 增加av。加上除0保护防止nan。
atan1 = tf.atan(boxes1[..., 2] / (boxes1[..., 3] + 1e-9))
atan2 = tf.atan(boxes2[..., 2] / (boxes2[..., 3] + 1e-9))
v = 4.0 * K.pow(atan1 - atan2, 2) / (math.pi ** 2)
a = v / (1 - iou + v)
ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v
return ciou
def bbox_iou(boxes1, boxes2):
'''
预测框 boxes1 (?, grid_h, grid_w, 3, 1, 4),神经网络的输出(tx, ty, tw, th)经过了后处理求得的(bx, by, bw, bh)
图片中所有的gt boxes2 (?, 1, 1, 1, 70, 4)
'''
boxes1_area = boxes1[..., 2] * boxes1[..., 3] # 所有格子的3个预测框的面积
boxes2_area = boxes2[..., 2] * boxes2[..., 3] # 所有ground truth的面积
# (x, y, w, h)变成(x0, y0, x1, y1)
boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
# 所有格子的3个预测框 分别 和 70个ground truth 计算iou。 所以left_up和right_down的shape = (?, grid_h, grid_w, 3, 70, 2)
left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2]) # 相交矩形的左上角坐标
right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:]) # 相交矩形的右下角坐标
inter_section = tf.maximum(right_down - left_up, 0.0) # 相交矩形的w和h,是负数时取0 (?, grid_h, grid_w, 3, 70, 2)
inter_area = inter_section[..., 0] * inter_section[..., 1] # 相交矩形的面积 (?, grid_h, grid_w, 3, 70)
union_area = boxes1_area + boxes2_area - inter_area # union_area (?, grid_h, grid_w, 3, 70)
iou = 1.0 * inter_area / union_area # iou (?, grid_h, grid_w, 3, 70)
return iou
def loss_layer(conv, pred, label, bboxes, stride, num_class, iou_loss_thresh):
conv_shape = tf.shape(conv)
batch_size = conv_shape[0]
output_size = conv_shape[1]
input_size = stride * output_size
conv = tf.reshape(conv, (batch_size, output_size, output_size,
3, 5 + num_class))
conv_raw_prob = conv[:, :, :, :, 5:]
pred_xywh = pred[:, :, :, :, 0:4]
pred_conf = pred[:, :, :, :, 4:5]
label_xywh = label[:, :, :, :, 0:4]
respond_bbox = label[:, :, :, :, 4:5]
label_prob = label[:, :, :, :, 5:]
ciou = tf.expand_dims(bbox_ciou(pred_xywh, label_xywh), axis=-1) # (8, 13, 13, 3, 1)
input_size = tf.cast(input_size, tf.float32)
# 每个预测框xxxiou_loss的权重 = 2 - (ground truth的面积/图片面积)
bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
ciou_loss = respond_bbox * bbox_loss_scale * (1 - ciou) # 1. respond_bbox作为mask,有物体才计算xxxiou_loss
# 2. respond_bbox作为mask,有物体才计算类别loss
prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
# 等价于
# pred_prob = pred[:, :, :, :, 5:]
# prob_pos_loss = label_prob * (0 - K.log(pred_prob + 1e-9))
# prob_neg_loss = (1 - label_prob) * (0 - K.log(1 - pred_prob + 1e-9))
# prob_mask = tf.tile(respond_bbox, [1, 1, 1, 1, num_class])
# prob_loss = prob_mask * (prob_pos_loss + prob_neg_loss)
# 3. xxxiou_loss和类别loss比较简单。重要的是conf_loss,是一个二值交叉熵损失
# 分两步:第一步是确定 grid_h * grid_w * 3 个预测框 哪些作为反例;第二步是计算二值交叉熵损失。
expand_pred_xywh = pred_xywh[:, :, :, :, np.newaxis, :] # 扩展为(?, grid_h, grid_w, 3, 1, 4)
expand_bboxes = bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :] # 扩展为(?, 1, 1, 1, 70, 4)
iou = bbox_iou(expand_pred_xywh, expand_bboxes) # 所有格子的3个预测框 分别 和 70个ground truth 计算iou。 (?, grid_h, grid_w, 3, 70)
max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1) # 与70个ground truth的iou中,保留最大那个iou。 (?, grid_h, grid_w, 3, 1)
# respond_bgd代表 这个分支输出的 grid_h * grid_w * 3 个预测框是否是 反例(背景)
# label有物体,respond_bgd是0。 没物体的话:如果和某个gt(共70个)的iou超过iou_loss_thresh,respond_bgd是0;如果和所有gt(最多70个)的iou都小于iou_loss_thresh,respond_bgd是1。
# respond_bgd是0代表有物体,不是反例(或者是忽略框); 权重respond_bgd是1代表没有物体,是反例。
# 有趣的是,模型训练时由于不断更新,对于同一张图片,两次预测的 grid_h * grid_w * 3 个预测框(对于这个分支输出) 是不同的。用的是这些预测框来与gt计算iou来确定哪些预测框是反例。
# 而不是用固定大小(不固定位置)的先验框。
respond_bgd = (1.0 - respond_bbox) * tf.cast(max_iou < iou_loss_thresh, tf.float32)
# 二值交叉熵损失
pos_loss = respond_bbox * (0 - K.log(pred_conf + 1e-9))
neg_loss = respond_bgd * (0 - K.log(1 - pred_conf + 1e-9))
conf_loss = pos_loss + neg_loss
# 回顾respond_bgd,某个预测框和某个gt的iou超过iou_loss_thresh,不被当作是反例。在参与“预测的置信位 和 真实置信位 的 二值交叉熵”时,这个框也可能不是正例(label里没标这个框是1的话)。这个框有可能不参与置信度loss的计算。
# 这种框一般是gt框附近的框,或者是gt框所在格子的另外两个框。它既不是正例也不是反例不参与置信度loss的计算。(论文里称之为ignore)
ciou_loss = tf.reduce_mean(tf.reduce_sum(ciou_loss, axis=[1, 2, 3, 4])) # 每个样本单独计算自己的ciou_loss,再求平均值
conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1, 2, 3, 4])) # 每个样本单独计算自己的conf_loss,再求平均值
prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1, 2, 3, 4])) # 每个样本单独计算自己的prob_loss,再求平均值
return ciou_loss, conf_loss, prob_loss
def decode(conv_output, anchors, stride, num_class):
conv_shape = tf.shape(conv_output)
batch_size = conv_shape[0]
output_size = conv_shape[1]
anchor_per_scale = len(anchors)
conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, anchor_per_scale, 5 + num_class))
conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
conv_raw_conf = conv_output[:, :, :, :, 4:5]
conv_raw_prob = conv_output[:, :, :, :, 5: ]
y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size])
x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1])
xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, anchor_per_scale, 1])
xy_grid = tf.cast(xy_grid, tf.float32)
pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * stride
pred_wh = (tf.exp(conv_raw_dwdh) * anchors)
pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
pred_conf = tf.sigmoid(conv_raw_conf)
pred_prob = tf.sigmoid(conv_raw_prob)
return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
def yolo_loss(args, num_classes, iou_loss_thresh, anchors):
conv_lbbox = args[0] # (?, ?, ?, 3*(num_classes+5))
conv_mbbox = args[1] # (?, ?, ?, 3*(num_classes+5))
conv_sbbox = args[2] # (?, ?, ?, 3*(num_classes+5))
label_sbbox = args[3] # (?, ?, ?, 3, num_classes+5)
label_mbbox = args[4] # (?, ?, ?, 3, num_classes+5)
label_lbbox = args[5] # (?, ?, ?, 3, num_classes+5)
true_bboxes = args[6] # (?, 50, 4)
pred_sbbox = decode(conv_sbbox, anchors[0], 8, num_classes)
pred_mbbox = decode(conv_mbbox, anchors[1], 16, num_classes)
pred_lbbox = decode(conv_lbbox, anchors[2], 32, num_classes)
sbbox_ciou_loss, sbbox_conf_loss, sbbox_prob_loss = loss_layer(conv_sbbox, pred_sbbox, label_sbbox, true_bboxes, 8, num_classes, iou_loss_thresh)
mbbox_ciou_loss, mbbox_conf_loss, mbbox_prob_loss = loss_layer(conv_mbbox, pred_mbbox, label_mbbox, true_bboxes, 16, num_classes, iou_loss_thresh)
lbbox_ciou_loss, lbbox_conf_loss, lbbox_prob_loss = loss_layer(conv_lbbox, pred_lbbox, label_lbbox, true_bboxes, 32, num_classes, iou_loss_thresh)
ciou_loss = sbbox_ciou_loss + mbbox_ciou_loss + lbbox_ciou_loss
conf_loss = sbbox_conf_loss + mbbox_conf_loss + lbbox_conf_loss
prob_loss = sbbox_prob_loss + mbbox_prob_loss + lbbox_prob_loss
return [ciou_loss, conf_loss, prob_loss]
def multi_thread_op(i, samples, decodeImage, context, train_dataset, with_mixup, mixupImage,
photometricDistort, randomCrop, randomFlipImage, normalizeBox, padBox, bboxXYXY2XYWH):
samples[i] = decodeImage(samples[i], context, train_dataset)
if with_mixup:
samples[i] = mixupImage(samples[i], context)
samples[i] = photometricDistort(samples[i], context)
samples[i] = randomCrop(samples[i], context)
samples[i] = randomFlipImage(samples[i], context)
samples[i] = normalizeBox(samples[i], context)
samples[i] = padBox(samples[i], context)
samples[i] = bboxXYXY2XYWH(samples[i], context)
if __name__ == '__main__':
cfg = TrainConfig()
class_names = get_classes(cfg.classes_path)
num_classes = len(class_names)
_anchors = copy.deepcopy(cfg.anchors)
num_anchors = len(cfg.anchor_masks[0]) # 每个输出层有几个先验框
_anchors = np.array(_anchors)
_anchors = np.reshape(_anchors, (-1, num_anchors, 2))
_anchors = _anchors.astype(np.float32)
# 步id,无需设置,会自动读。
iter_id = 0
# 多尺度训练
inputs = layers.Input(shape=(None, None, 3))
model_body = YOLOv4(inputs, num_classes, num_anchors)
_decode = Decode(cfg.conf_thresh, cfg.nms_thresh, cfg.input_shape, model_body, class_names)
# 模式。 0-从头训练,1-读取之前的模型继续训练(model_path可以是'yolov4.h5'、'./weights/step00001000.h5'这些。)
pattern = cfg.pattern
if pattern == 1:
model_body.load_weights(cfg.model_path, by_name=True, skip_mismatch=True)
strs = cfg.model_path.split('step')
if len(strs) == 2:
iter_id = int(strs[1][:8])
# 冻结,使得需要的显存减少。6G的卡建议这样配置。11G的卡建议不冻结。
# freeze_before = 'conv2d_60'
# freeze_before = 'conv2d_72'
freeze_before = 'conv2d_73'
# freeze_before = 'conv2d_86'
for i in range(len(model_body.layers)):
ly = model_body.layers[i]
if ly.name == freeze_before:
break
else:
ly.trainable = False
elif pattern == 0:
pass
y_true = [
layers.Input(name='input_2', shape=(None, None, 3, (num_classes + 5))), # label_sbbox
layers.Input(name='input_3', shape=(None, None, 3, (num_classes + 5))), # label_mbbox
layers.Input(name='input_4', shape=(None, None, 3, (num_classes + 5))), # label_lbbox
layers.Input(name='input_5', shape=(cfg.num_max_boxes, 4)), # true_bboxes
]
loss_list = layers.Lambda(yolo_loss, name='yolo_loss',
arguments={'num_classes': num_classes, 'iou_loss_thresh': cfg.iou_loss_thresh,
'anchors': _anchors})([*model_body.output, *y_true])
model = keras.models.Model([model_body.input, *y_true], loss_list)
model.summary(line_length=130)
# keras.utils.vis_utils.plot_model(model_body, to_file='yolov4.png', show_shapes=True)
# 种类id
_catid2clsid = copy.deepcopy(catid2clsid)
_clsid2catid = copy.deepcopy(clsid2catid)
if num_classes != 80: # 如果不是COCO数据集,而是自定义数据集
_catid2clsid = {}
_clsid2catid = {}
for k in range(num_classes):
_catid2clsid[k] = k
_clsid2catid[k] = k
# 训练集
train_dataset = COCO(cfg.train_path)
train_img_ids = train_dataset.getImgIds()
train_records = data_clean(train_dataset, train_img_ids, _catid2clsid, cfg.train_pre_path)
num_train = len(train_records)
train_indexes = [i for i in range(num_train)]
# 验证集
with open(cfg.val_path, 'r', encoding='utf-8') as f2:
for line in f2:
line = line.strip()
dataset = json.loads(line)
val_images = dataset['images']
batch_size = cfg.batch_size
with_mixup = cfg.with_mixup
context = cfg.context
# 预处理
# sample_transforms
decodeImage = DecodeImage(with_mixup=with_mixup) # 对图片解码。最开始的一步。
mixupImage = MixupImage() # mixup增强
photometricDistort = PhotometricDistort() # 颜色扭曲
randomCrop = RandomCrop() # 随机裁剪
randomFlipImage = RandomFlipImage() # 随机翻转
normalizeBox = NormalizeBox() # 将物体的左上角坐标、右下角坐标中的横坐标/图片宽、纵坐标/图片高 以归一化坐标。
padBox = PadBox(cfg.num_max_boxes) # 如果gt_bboxes的数量少于num_max_boxes,那么填充坐标是0的bboxes以凑够num_max_boxes。
bboxXYXY2XYWH = BboxXYXY2XYWH() # sample['gt_bbox']被改写为cx_cy_w_h格式。
# batch_transforms
randomShape = RandomShape() # 多尺度训练。随机选一个尺度。也随机选一种插值方式。
normalizeImage = NormalizeImage(is_scale=True, is_channel_first=False) # 图片归一化。直接除以255。
gt2YoloTarget = Gt2YoloTarget(cfg.anchors,
cfg.anchor_masks,
cfg.downsample_ratios,
num_classes) # 填写target0、target1、target2张量。
# 保存模型的目录
if not os.path.exists('./weights'): os.mkdir('./weights')
model.compile(loss={'yolo_loss': lambda y_true, y_pred: y_pred}, optimizer=keras.optimizers.Adam(lr=cfg.lr))
time_stat = deque(maxlen=20)
start_time = time.time()
end_time = time.time()
# 一轮的步数。丢弃最后几个样本。
train_steps = num_train // batch_size
best_ap_list = [0.0, 0] #[map, iter]
while True: # 无限个epoch
# 每个epoch之前洗乱
np.random.shuffle(train_indexes)
for step in range(train_steps):
iter_id += 1
# 估计剩余时间
start_time = end_time
end_time = time.time()
time_stat.append(end_time - start_time)
time_cost = np.mean(time_stat)
eta_sec = (cfg.max_iters - iter_id) * time_cost
eta = str(datetime.timedelta(seconds=int(eta_sec)))
# ==================== train ====================
samples = get_samples(train_records, train_indexes, step, batch_size, with_mixup)
# sample_transforms用多线程
threads = []
for i in range(batch_size):
t = threading.Thread(target=multi_thread_op, args=(i, samples, decodeImage, context, train_dataset, with_mixup, mixupImage,
photometricDistort, randomCrop, randomFlipImage, normalizeBox, padBox, bboxXYXY2XYWH))
threads.append(t)
t.start()
# 等待所有线程任务结束。
for t in threads:
t.join()
# batch_transforms
samples = randomShape(samples, context)
samples = normalizeImage(samples, context)
batch_image, batch_label, batch_gt_bbox = gt2YoloTarget(samples, context)
batch_xs = [batch_image, batch_label[2], batch_label[1], batch_label[0], batch_gt_bbox]
y_true = [np.zeros(batch_size), np.zeros(batch_size), np.zeros(batch_size)]
losses = model.train_on_batch(batch_xs, y_true)
# ==================== log ====================
if iter_id % 20 == 0:
strs = 'Train iter: {}, all_loss: {:.6f}, ciou_loss: {:.6f}, conf_loss: {:.6f}, prob_loss: {:.6f}, eta: {}'.format(
iter_id, losses[0], losses[1], losses[2], losses[3], eta)
logger.info(strs)
# ==================== save ====================
if iter_id % cfg.save_iter == 0:
save_path = './weights/step%.8d.h5' % iter_id
model.save(save_path)
path_dir = os.listdir('./weights')
steps = []
names = []
for name in path_dir:
if name[len(name) - 2:len(name)] == 'h5' and name[0:4] == 'step':
step = int(name[4:12])
steps.append(step)
names.append(name)
if len(steps) > 10:
i = steps.index(min(steps))
os.remove('./weights/'+names[i])
logger.info('Save model to {}'.format(save_path))
# ==================== eval ====================
if iter_id % cfg.eval_iter == 0:
box_ap = eval(_decode, val_images, cfg.val_pre_path, cfg.val_path, cfg.eval_batch_size, _clsid2catid, cfg.draw_image)
logger.info("box ap: %.3f" % (box_ap[0], ))
# 以box_ap作为标准
ap = box_ap
if ap[0] > best_ap_list[0]:
best_ap_list[0] = ap[0]
best_ap_list[1] = iter_id
model.save('./weights/best_model.h5')
logger.info("Best test ap: {}, in iter: {}".format(
best_ap_list[0], best_ap_list[1]))
# ==================== exit ====================
if iter_id == cfg.max_iters:
logger.info('Done.')
exit(0)