-
Notifications
You must be signed in to change notification settings - Fork 6
/
tools.py
446 lines (381 loc) · 19.3 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
import numpy as np
from data import *
import torch.nn as nn
import torch.nn.functional as F
CLASS_COLOR = [(np.random.randint(255),np.random.randint(255),np.random.randint(255)) for _ in range(len(VOC_CLASSES))]
# We use ignore thresh to decide which anchor box can be kept.
ignore_thresh = IGNORE_THRESH
class BCELoss(nn.Module):
def __init__(self, weight=None, ignore_index=-100, reduce=None, reduction='mean'):
super(BCELoss, self).__init__()
self.reduction = reduction
def forward(self, inputs, targets, mask):
pos_id = (mask==1.0).float()
neg_id = (mask==0.0).float()
pos_loss = -pos_id * (targets * torch.log(inputs + 1e-14) + (1 - targets) * torch.log(1.0 - inputs + 1e-14))
neg_loss = -neg_id * torch.log(1.0 - inputs + 1e-14)
if self.reduction == 'mean':
pos_loss = torch.mean(torch.sum(pos_loss, 1))
neg_loss = torch.mean(torch.sum(neg_loss, 1))
return pos_loss, neg_loss
else:
return pos_loss, neg_loss
class MSELoss(nn.Module):
def __init__(self, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean'):
super(MSELoss, self).__init__()
self.reduction = reduction
def forward(self, inputs, targets, mask):
# We ignore those whose tarhets == -1.0.
pos_id = (mask==1.0).float()
neg_id = (mask==0.0).float()
pos_loss = pos_id * (inputs - targets)**2
neg_loss = neg_id * (inputs)**2
if self.reduction == 'mean':
pos_loss = torch.mean(torch.sum(pos_loss, 1))
neg_loss = torch.mean(torch.sum(neg_loss, 1))
return pos_loss, neg_loss
else:
return pos_loss, neg_loss
def generate_anchor(input_size, stride, anchor_scale, anchor_aspect):
"""
The function is used to design anchor boxes by ourselves as long as you provide the scale and aspect of anchor boxes.
Input:
input_size : list -> the image resolution used in training stage and testing stage.
stride : int -> the downSample of the CNN, such as 32, 64 and so on.
anchor_scale : list -> it contains the area ratio of anchor boxes. For example, anchor_scale = [0.1, 0.5]
anchor_aspect : list -> it contains the aspect ratios of anchor boxes for various anchor area.
For example, anchor_aspect = [[1.0, 2.0], [3.0, 1/3]]. And len(anchor_aspect) must
be equal to len(anchor_scale).
Output:
total_anchor_size : list -> [[h_1, w_1], [h_2, w_2], ..., [h_n, w_n]].
"""
assert len(anchor_scale) == len(anchor_aspect)
h, w = input_size
hs, ws = h // stride, w // stride
S_fmap = hs * ws
total_anchor_size = []
for ab_scale, aspect_ratio in zip(anchor_scale, anchor_aspect):
for a in aspect_ratio:
S_ab = S_fmap * ab_scale
ab_w = np.floor(np.sqrt(S_ab))
ab_h =ab_w * a
total_anchor_size.append([ab_w, ab_h])
return total_anchor_size
def compute_iou(anchor_boxes, gt_box):
"""
Input:
anchor_boxes : ndarray -> [[c_x_s, c_y_s, anchor_w, anchor_h], ..., [c_x_s, c_y_s, anchor_w, anchor_h]].
gt_box : ndarray -> [c_x_s, c_y_s, anchor_w, anchor_h].
Output:
iou : ndarray -> [iou_1, iou_2, ..., iou_m], and m is equal to the number of anchor boxes.
"""
# compute the iou between anchor box and gt box
# First, change [c_x_s, c_y_s, anchor_w, anchor_h] -> [xmin, ymin, xmax, ymax]
# anchor box :
ab_x1y1_x2y2 = np.zeros([len(anchor_boxes), 4])
ab_x1y1_x2y2[:, 0] = anchor_boxes[:, 0] - anchor_boxes[:, 2] / 2 # xmin
ab_x1y1_x2y2[:, 1] = anchor_boxes[:, 1] - anchor_boxes[:, 3] / 2 # ymin
ab_x1y1_x2y2[:, 2] = anchor_boxes[:, 0] + anchor_boxes[:, 2] / 2 # xmax
ab_x1y1_x2y2[:, 3] = anchor_boxes[:, 1] + anchor_boxes[:, 3] / 2 # ymax
w_ab, h_ab = anchor_boxes[:, 2], anchor_boxes[:, 3]
# gt_box :
# We need to expand gt_box(ndarray) to the shape of anchor_boxes(ndarray), in order to compute IoU easily.
gt_box_expand = np.repeat(gt_box, len(anchor_boxes), axis=0)
gb_x1y1_x2y2 = np.zeros([len(anchor_boxes), 4])
gb_x1y1_x2y2[:, 0] = gt_box_expand[:, 0] - gt_box_expand[:, 2] / 2 # xmin
gb_x1y1_x2y2[:, 1] = gt_box_expand[:, 1] - gt_box_expand[:, 3] / 2 # ymin
gb_x1y1_x2y2[:, 2] = gt_box_expand[:, 0] + gt_box_expand[:, 2] / 2 # xmax
gb_x1y1_x2y2[:, 3] = gt_box_expand[:, 1] + gt_box_expand[:, 3] / 2 # ymin
w_gt, h_gt = gt_box_expand[:, 2], gt_box_expand[:, 3]
# Then we compute IoU between anchor_box and gt_box
S_gt = w_gt * h_gt
S_ab = w_ab * h_ab
I_w = np.minimum(gb_x1y1_x2y2[:, 2], ab_x1y1_x2y2[:, 2]) - np.maximum(gb_x1y1_x2y2[:, 0], ab_x1y1_x2y2[:, 0])
I_h = np.minimum(gb_x1y1_x2y2[:, 3], ab_x1y1_x2y2[:, 3]) - np.maximum(gb_x1y1_x2y2[:, 1], ab_x1y1_x2y2[:, 1])
S_I = I_h * I_w
U = S_gt + S_ab - S_I + 1e-20
IoU = S_I / U
return IoU
def set_anchors(anchor_size):
"""
Input:
anchor_size : list -> [[h_1, w_1], [h_2, w_2], ..., [h_n, w_n]].
Output:
anchor_boxes : ndarray -> [[0, 0, anchor_w, anchor_h],
[0, 0, anchor_w, anchor_h],
...
[0, 0, anchor_w, anchor_h]].
"""
anchor_number = len(anchor_size)
anchor_boxes = np.zeros([anchor_number, 4])
for index, size in enumerate(anchor_size):
anchor_w, anchor_h = size
anchor_boxes[index] = np.array([0, 0, anchor_w, anchor_h])
return anchor_boxes
def generate_txtytwth(gt_label, w, h, s, all_anchor_size):
xmin, ymin, xmax, ymax = gt_label[:-1]
# compute the center, width and height
c_x = (xmax + xmin) / 2 * w
c_y = (ymax + ymin) / 2 * h
box_w = (xmax - xmin) * w
box_h = (ymax - ymin) * h
if box_w < 1. or box_h < 1.:
# print('A dirty data !!!')
return False
# map the center, width and height to the feature map size
c_x_s = c_x / s
c_y_s = c_y / s
box_ws = box_w / s
box_hs = box_h / s
# the grid cell location
grid_x = int(c_x_s)
grid_y = int(c_y_s)
# generate anchor boxes
anchor_boxes = set_anchors(all_anchor_size)
gt_box = np.array([[0, 0, box_ws, box_hs]])
# compute the IoU
iou = compute_iou(anchor_boxes, gt_box)
# We consider those anchor boxes whose IoU is more than ignore thresh,
iou_mask = (iou > ignore_thresh)
result = []
if iou_mask.sum() == 0:
# We assign the anchor box with highest IoU score.
index = np.argmax(iou)
p_w, p_h = all_anchor_size[index]
tx = c_x_s - grid_x
ty = c_y_s - grid_y
tw = np.log(box_ws / p_w)
th = np.log(box_hs / p_h)
weight = 2.0 - (box_w / w) * (box_h / h)
result.append([index, grid_x, grid_y, tx, ty, tw, th, weight, xmin, ymin, xmax, ymax])
return result
else:
# There are more than one anchor boxes whose IoU are higher than ignore thresh.
# But we only assign only one anchor box whose IoU is the best(objectness target is 1) and ignore other
# anchor boxes whose(we set their objectness as -1 which means we will ignore them during computing obj loss )
# iou_ = iou * iou_mask
# We get the index of the best IoU
best_index = np.argmax(iou)
for index, iou_m in enumerate(iou_mask):
if iou_m:
if index == best_index:
p_w, p_h = all_anchor_size[index]
tx = c_x_s - grid_x
ty = c_y_s - grid_y
tw = np.log(box_ws / p_w)
th = np.log(box_hs / p_h)
weight = 2.0 - (box_w / w) * (box_h / h)
result.append([index, grid_x, grid_y, tx, ty, tw, th, weight, xmin, ymin, xmax, ymax])
else:
# we ignore other anchor boxes even if their iou scores all higher than ignore thresh
result.append([index, grid_x, grid_y, 0., 0., 0., 0., -1.0, 0., 0., 0., 0.])
return result
def gt_creator(input_size, stride, label_lists, anchor_size):
"""
Input:
input_size : list -> the size of image in the training stage.
stride : int or list -> the downSample of the CNN, such as 32, 64 and so on.
label_list : list -> [[[xmin, ymin, xmax, ymax, cls_ind], ... ], [[xmin, ymin, xmax, ymax, cls_ind], ... ]],
and len(label_list) = batch_size;
len(label_list[i]) = the number of class instance in a image;
(xmin, ymin, xmax, ymax) : the coords of a bbox whose valus is between 0 and 1;
cls_ind : the corresponding class label.
Output:
gt_tensor : ndarray -> shape = [batch_size, anchor_number, 1+1+4, grid_cell number ]
"""
assert len(input_size) > 0 and len(label_lists) > 0
# prepare the all empty gt datas
batch_size = len(label_lists)
w = input_size[1]
h = input_size[0]
# We make gt labels by anchor-free method and anchor-based method.
ws = round(w / stride)
hs = round(h / stride)
s = stride
# We use anchor boxes to build training target.
all_anchor_size = anchor_size
anchor_number = len(all_anchor_size)
gt_tensor = np.zeros([batch_size, hs, ws, anchor_number, 1+1+4+1+4])
for batch_index in range(batch_size):
for gt_label in label_lists[batch_index]:
# get a bbox coords
gt_class = int(gt_label[-1])
results = generate_txtytwth(gt_label, w, h, s, all_anchor_size)
if results:
for result in results:
index, grid_x, grid_y, tx, ty, tw, th, weight, xmin, ymin, xmax, ymax = result
if weight > 0.:
if grid_y < gt_tensor.shape[1] and grid_x < gt_tensor.shape[2]:
gt_tensor[batch_index, grid_y, grid_x, index, 0] = 1.0
gt_tensor[batch_index, grid_y, grid_x, index, 1] = gt_class
gt_tensor[batch_index, grid_y, grid_x, index, 2:6] = np.array([tx, ty, tw, th])
gt_tensor[batch_index, grid_y, grid_x, index, 6] = weight
gt_tensor[batch_index, grid_y, grid_x, index, 7:] = np.array([xmin, ymin, xmax, ymax])
else:
gt_tensor[batch_index, grid_y, grid_x, index, 0] = -1.0
gt_tensor[batch_index, grid_y, grid_x, index, 6] = -1.0
gt_tensor = gt_tensor.reshape(batch_size, hs * ws * anchor_number, 1+1+4+1+4)
return gt_tensor
def multi_gt_creator(input_size, strides, label_lists, anchor_size):
"""creator multi scales gt"""
# prepare the all empty gt datas
batch_size = len(label_lists)
h, w = input_size
num_scale = len(strides)
gt_tensor = []
# generate gt datas
all_anchor_size = anchor_size
anchor_number = len(all_anchor_size) // num_scale
for s in strides:
gt_tensor.append(np.zeros([batch_size, h//s, w//s, anchor_number, 1+1+4+1+4]))
for batch_index in range(batch_size):
for gt_label in label_lists[batch_index]:
# get a bbox coords
gt_class = int(gt_label[-1])
xmin, ymin, xmax, ymax = gt_label[:-1]
# compute the center, width and height
c_x = (xmax + xmin) / 2 * w
c_y = (ymax + ymin) / 2 * h
box_w = (xmax - xmin) * w
box_h = (ymax - ymin) * h
if box_w < 1. or box_h < 1.:
# print('A dirty data !!!')
continue
# compute the IoU
anchor_boxes = set_anchors(all_anchor_size)
gt_box = np.array([[0, 0, box_w, box_h]])
iou = compute_iou(anchor_boxes, gt_box)
# We only consider those anchor boxes whose IoU is more than ignore thresh,
iou_mask = (iou > ignore_thresh)
if iou_mask.sum() == 0:
# We assign the anchor box with highest IoU score.
index = np.argmax(iou)
# s_indx, ab_ind = index // num_scale, index % num_scale
s_indx = index // anchor_number
ab_ind = index - s_indx * anchor_number
# get the corresponding stride
s = strides[s_indx]
# get the corresponding anchor box
p_w, p_h = anchor_boxes[index, 2], anchor_boxes[index, 3]
# compute the gride cell location
c_x_s = c_x / s
c_y_s = c_y / s
grid_x = int(c_x_s)
grid_y = int(c_y_s)
# compute gt labels
tx = c_x_s - grid_x
ty = c_y_s - grid_y
tw = np.log(box_w / p_w)
th = np.log(box_h / p_h)
weight = 2.0 - (box_w / w) * (box_h / h)
if grid_y < gt_tensor[s_indx].shape[1] and grid_x < gt_tensor[s_indx].shape[2]:
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 0] = 1.0
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 1] = gt_class
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 2:6] = np.array([tx, ty, tw, th])
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 6] = weight
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 7:] = np.array([xmin, ymin, xmax, ymax])
else:
# There are more than one anchor boxes whose IoU are higher than ignore thresh.
# But we only assign only one anchor box whose IoU is the best(objectness target is 1) and ignore other
# anchor boxes whose(we set their objectness as -1 which means we will ignore them during computing obj loss )
# iou_ = iou * iou_mask
# We get the index of the best IoU
best_index = np.argmax(iou)
for index, iou_m in enumerate(iou_mask):
if iou_m:
if index == best_index:
# s_indx, ab_ind = index // num_scale, index % num_scale
s_indx = index // anchor_number
ab_ind = index - s_indx * anchor_number
# get the corresponding stride
s = strides[s_indx]
# get the corresponding anchor box
p_w, p_h = anchor_boxes[index, 2], anchor_boxes[index, 3]
# compute the gride cell location
c_x_s = c_x / s
c_y_s = c_y / s
grid_x = int(c_x_s)
grid_y = int(c_y_s)
# compute gt labels
tx = c_x_s - grid_x
ty = c_y_s - grid_y
tw = np.log(box_w / p_w)
th = np.log(box_h / p_h)
weight = 2.0 - (box_w / w) * (box_h / h)
if grid_y < gt_tensor[s_indx].shape[1] and grid_x < gt_tensor[s_indx].shape[2]:
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 0] = 1.0
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 1] = gt_class
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 2:6] = np.array([tx, ty, tw, th])
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 6] = weight
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 7:] = np.array([xmin, ymin, xmax, ymax])
else:
# we ignore other anchor boxes even if their iou scores are higher than ignore thresh
# s_indx, ab_ind = index // num_scale, index % num_scale
s_indx = index // anchor_number
ab_ind = index - s_indx * anchor_number
s = strides[s_indx]
c_x_s = c_x / s
c_y_s = c_y / s
grid_x = int(c_x_s)
grid_y = int(c_y_s)
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 0] = -1.0
gt_tensor[s_indx][batch_index, grid_y, grid_x, ab_ind, 6] = -1.0
gt_tensor = [gt.reshape(batch_size, -1, 1+1+4+1+4) for gt in gt_tensor]
gt_tensor = np.concatenate(gt_tensor, 1)
return gt_tensor
def iou_score(bboxes_a, bboxes_b):
"""
bbox_1 : [B*N, 4] = [x1, y1, x2, y2]
bbox_2 : [B*N, 4] = [x1, y1, x2, y2]
"""
tl = torch.max(bboxes_a[:, :2], bboxes_b[:, :2])
br = torch.min(bboxes_a[:, 2:], bboxes_b[:, 2:])
area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
en = (tl < br).type(tl.type()).prod(dim=1)
area_i = torch.prod(br - tl, 1) * en # * ((tl < br).all())
return area_i / (area_a + area_b - area_i)
def loss(pred_conf, pred_cls, pred_txtytwth, label, num_classes, obj_loss_f='mse'):
if obj_loss_f == 'bce':
# In yolov3, we use bce as conf loss_f
conf_loss_function = BCELoss(reduction='mean')
obj = 1.0
noobj = 1.0
elif obj_loss_f == 'mse':
# In yolov2, we use mse as conf loss_f.
conf_loss_function = MSELoss(reduction='mean')
obj = 5.0
noobj = 1.0
cls_loss_function = nn.CrossEntropyLoss(reduction='none')
txty_loss_function = nn.BCEWithLogitsLoss(reduction='none')
twth_loss_function = nn.MSELoss(reduction='none')
pred_conf = torch.sigmoid(pred_conf[:, :, 0])
pred_cls = pred_cls.permute(0, 2, 1)
txty_pred = pred_txtytwth[:, :, :2]
twth_pred = pred_txtytwth[:, :, 2:]
gt_conf = label[:, :, 0].float()
gt_obj = label[:, :, 1].float()
gt_cls = label[:, :, 2].long()
gt_txtytwth = label[:, :, 3:-1].float()
gt_box_scale_weight = label[:, :, -1]
gt_mask = (gt_box_scale_weight > 0.).float()
# objectness loss
pos_loss, neg_loss = conf_loss_function(pred_conf, gt_conf, gt_obj)
conf_loss = obj * pos_loss + noobj * neg_loss
# class loss
cls_loss = torch.mean(torch.sum(cls_loss_function(pred_cls, gt_cls) * gt_mask, 1))
# box loss
txty_loss = torch.mean(torch.sum(torch.sum(txty_loss_function(txty_pred, gt_txtytwth[:, :, :2]), 2) * gt_box_scale_weight * gt_mask, 1))
twth_loss = torch.mean(torch.sum(torch.sum(twth_loss_function(twth_pred, gt_txtytwth[:, :, 2:]), 2) * gt_box_scale_weight * gt_mask, 1))
txtytwth_loss = txty_loss + twth_loss
total_loss = conf_loss + cls_loss + txtytwth_loss
return conf_loss, cls_loss, txtytwth_loss, total_loss
if __name__ == "__main__":
gt_box = np.array([[0.0, 0.0, 10, 10]])
anchor_boxes = np.array([[0.0, 0.0, 10, 10],
[0.0, 0.0, 4, 4],
[0.0, 0.0, 8, 8],
[0.0, 0.0, 16, 16]
])
iou = compute_iou(anchor_boxes, gt_box)
print(iou)