def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):#获得bx，by，bw，bh，置信度和分类信息
"""Convert final layer features to bounding box parameters."""
num_anchors = len(anchors)
anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
grid_shape = K.shape(feats)[1:3]
grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
[1, grid_shape[1], 1, 1])
grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
[grid_shape[0], 1, 1, 1])
grid = K.concatenate([grid_x, grid_y])#获得grid的总坐标
grid = K.cast(grid, K.dtype(feats))#将grid的数据类型转换为与feats一致

feats = K.reshape(
feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
# Adjust preditions to each spatial grid point and anchor size.
box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))#feats[...,2:4]对feats进行切片，得到feats第2列和第3列的数据（含0）即tw,th
box_confidence = K.sigmoid(feats[..., 4:5])#获得to
box_class_probs = K.sigmoid(feats[..., 5:])
if calc_loss == True:
return grid, feats, box_xy, box_wh
return box_xy, box_wh, box_confidence, box_class_probs
def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
'''Get corrected boxes'''
box_yx = box_xy[..., ::-1]#倒叙
box_hw = box_wh[..., ::-1]
input_shape = K.cast(input_shape, K.dtype(box_yx))#input_shape是哪个input
image_shape = K.cast(image_shape, K.dtype(box_yx))
new_shape = K.round(image_shape * K.min(input_shape/image_shape))#tf.round  将张量中的元素四舍五入成为最接近的整数x = tf.constant([0.9, 2.5, 2.3, 1.5, -4.5]) tf.round(x)  # [ 1.0, 2.0, 2.0, 2.0, -4.0 ]
#K.min返回张量中的最小值，
offset = (input_shape-new_shape)/2./input_shape
scale = input_shape/new_shape
box_yx = (box_yx - offset) * scale
box_hw *= scale
box_mins = box_yx - (box_hw / 2.)
box_maxes = box_yx + (box_hw / 2.)
boxes =  K.concatenate([
box_mins[..., 0:1],  # y_min
box_mins[..., 1:2],  # x_min
box_maxes[..., 0:1],  # y_max
box_maxes[..., 1:2]  # x_max
])

# Scale boxes back to original image shape.
boxes *= K.concatenate([image_shape, image_shape])
return boxes

1.第一个代码中的features传入的是什么值，我的猜测1：是一个feature_map2：是三个scale的feture_map
2.如果传入的是一个feature_map 语句feats = K.reshape(
feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])中的-1所代表的维度是什么，一个feture_map的size不应该是grid_shape[0]*grid_shape[1]*(num_class+5)吗？怎么会reshape成为上面的格式
3.box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) 分母的含义是什么，原论文中 bx = sigma(tx)+cx 好像没有分母出现
4.第二个代码段所实现的功能是什么？是如何实现的？

2个回答

1.是一个scale的feature map
2.-1应该是batchsize
3.不知道
4.将训练时用的(416,416)得出的bbox转化成原图size的bbox

3的目的是除以grid的长度宽度 用来做归一化的

