def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):#获得bx,by,bw,bh,置信度和分类信息
"""Convert final layer features to bounding box parameters."""
num_anchors = len(anchors)
anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
grid_shape = K.shape(feats)[1:3]
grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
[1, grid_shape[1], 1, 1])
grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
[grid_shape[0], 1, 1, 1])
grid = K.concatenate([grid_x, grid_y])#获得grid的总坐标
grid = K.cast(grid, K.dtype(feats))#将grid的数据类型转换为与feats一致
feats = K.reshape(
feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
# Adjust preditions to each spatial grid point and anchor size.
box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))#feats[...,2:4]对feats进行切片,得到feats第2列和第3列的数据(含0)即tw,th
box_confidence = K.sigmoid(feats[..., 4:5])#获得to
box_class_probs = K.sigmoid(feats[..., 5:])
if calc_loss == True:
return grid, feats, box_xy, box_wh
return box_xy, box_wh, box_confidence, box_class_probs
def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
'''Get corrected boxes'''
box_yx = box_xy[..., ::-1]#倒叙
box_hw = box_wh[..., ::-1]
input_shape = K.cast(input_shape, K.dtype(box_yx))#input_shape是哪个input
image_shape = K.cast(image_shape, K.dtype(box_yx))
new_shape = K.round(image_shape * K.min(input_shape/image_shape))#tf.round 将张量中的元素四舍五入成为最接近的整数x = tf.constant([0.9, 2.5, 2.3, 1.5, -4.5]) tf.round(x) # [ 1.0, 2.0, 2.0, 2.0, -4.0 ]
#K.min返回张量中的最小值,
offset = (input_shape-new_shape)/2./input_shape
scale = input_shape/new_shape
box_yx = (box_yx - offset) * scale
box_hw *= scale
box_mins = box_yx - (box_hw / 2.)
box_maxes = box_yx + (box_hw / 2.)
boxes = K.concatenate([
box_mins[..., 0:1], # y_min
box_mins[..., 1:2], # x_min
box_maxes[..., 0:1], # y_max
box_maxes[..., 1:2] # x_max
])
# Scale boxes back to original image shape.
boxes *= K.concatenate([image_shape, image_shape])
return boxes
请问:
1.第一个代码中的features传入的是什么值,我的猜测1:是一个feature_map2:是三个scale的feture_map
2.如果传入的是一个feature_map 语句feats = K.reshape(
feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])中的-1所代表的维度是什么,一个feture_map的size不应该是grid_shape[0]*grid_shape[1]*(num_class+5)吗?怎么会reshape成为上面的格式
3.box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) 分母的含义是什么,原论文中 bx = sigma(tx)+cx 好像没有分母出现
4.第二个代码段所实现的功能是什么?是如何实现的?