最近尝试deploy deeplab v2 的模型, 官方没有deploy.prototxt 所以自己从test.prototxt 改写了一个。但结果是:
如果去掉CRF层,跑出来就是一团浆糊; 如果保留CRF层,物体的轮廓很清晰,有胳膊有腿的,但是每个物体每次跑出来的颜色都不一样,也就是说每次分配的标签也不一样,也印证了前面没有CRF层的时候完全没有预测能力这个事实。。。
我直接用了** http://liangchiehchen.com/projects/DeepLabv2_vgg.html ** 这个链接下载的 train_iter_20000.caffemodel,我以为这是在voc 2012 上 fine tune 过的权重,因此我的deploy.prototxt 也是按照这个假设写的。所以,求助啊!为什么这样直接deploy完全不行的样子?
以下是deploy.prototxt:
# VGG 16-layer network convolutional finetuning
Network modified to have smaller receptive field (128 pixels)
and smaller stride (8 pixels) when run in convolutional mode.
#
In this model we also change max pooling size in the first 4 layer
from 2 to 3 while retaining stride = 2
which makes it easier to exactly align responses at different layer.
#
name: "233"
input: "data"
input_dim: 1
input_dim: 3
input_dim: 513
input_dim: 513
input: "data_dim"
input_dim: 1
input_dim: 1
input_dim: 1
input_dim: 2
#layer {
name: "data"
type: "MemoryData"
top: "data"
top: "data_dim"
memory_data_param {
batch_size: 1
channels: 3
height: 865
width: 1297
}
#}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
convolution_param {
num_output: 64
pad: 1
kernel_size: 3
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
pad: 1
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
convolution_param {
num_output: 128
pad: 1
kernel_size: 3
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
pad: 1
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
pad: 1
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
convolution_param {
num_output: 512
pad: 1
kernel_size: 3
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
bottom: "conv4_3"
top: "pool4"
name: "pool4"
type: "Pooling"
pooling_param {
pool: MAX
kernel_size: 3
pad: 1
stride: 1
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
convolution_param {
num_output: 512
pad: 2
kernel_size: 3
dilation: 2
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
convolution_param {
num_output: 512
pad: 2
kernel_size: 3
dilation: 2
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
convolution_param {
num_output: 512
pad: 2
kernel_size: 3
dilation: 2
}
}
layer {
name: "relu5_3"
type: "ReLU"
bottom: "conv5_3"
top: "conv5_3"
}
layer {
bottom: "conv5_3"
top: "pool5"
name: "pool5"
type: "Pooling"
pooling_param {
pool: MAX
kernel_size: 3
stride: 1
pad: 1
}
}
hole = 6
layer {
name: "fc6_1"
type: "Convolution"
bottom: "pool5"
top: "fc6_1"
convolution_param {
num_output: 1024
pad: 6
kernel_size: 3
dilation: 6
}
}
layer {
name: "relu6_1"
type: "ReLU"
bottom: "fc6_1"
top: "fc6_1"
}
layer {
name: "drop6_1"
type: "Dropout"
bottom: "fc6_1"
top: "fc6_1"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7_1"
type: "Convolution"
bottom: "fc6_1"
top: "fc7_1"
convolution_param {
num_output: 1024
kernel_size: 1
}
}
layer {
name: "relu7_1"
type: "ReLU"
bottom: "fc7_1"
top: "fc7_1"
}
layer {
name: "drop7_1"
type: "Dropout"
bottom: "fc7_1"
top: "fc7_1"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8_0_1"
type: "Convolution"
bottom: "fc7_1"
top: "fc8_0_1"
convolution_param {
num_output: 21
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
hole = 12
layer {
name: "fc6_2"
type: "Convolution"
bottom: "pool5"
top: "fc6_2"
convolution_param {
num_output: 1024
pad: 12
kernel_size: 3
dilation: 12
}
}
layer {
name: "relu6_2"
type: "ReLU"
bottom: "fc6_2"
top: "fc6_2"
}
layer {
name: "drop6_2"
type: "Dropout"
bottom: "fc6_2"
top: "fc6_2"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7_2"
type: "Convolution"
bottom: "fc6_2"
top: "fc7_2"
convolution_param {
num_output: 1024
kernel_size: 1
}
}
layer {
name: "relu7_2"
type: "ReLU"
bottom: "fc7_2"
top: "fc7_2"
}
layer {
name: "drop7_2"
type: "Dropout"
bottom: "fc7_2"
top: "fc7_2"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8_0_2"
type: "Convolution"
bottom: "fc7_2"
top: "fc8_0_2"
convolution_param {
num_output: 21
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
hole = 18
layer {
name: "fc6_3"
type: "Convolution"
bottom: "pool5"
top: "fc6_3"
convolution_param {
num_output: 1024
pad: 18
kernel_size: 3
dilation: 18
}
}
layer {
name: "relu6_3"
type: "ReLU"
bottom: "fc6_3"
top: "fc6_3"
}
layer {
name: "drop6_3"
type: "Dropout"
bottom: "fc6_3"
top: "fc6_3"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7_3"
type: "Convolution"
bottom: "fc6_3"
top: "fc7_3"
convolution_param {
num_output: 1024
kernel_size: 1
}
}
layer {
name: "relu7_3"
type: "ReLU"
bottom: "fc7_3"
top: "fc7_3"
}
layer {
name: "drop7_3"
type: "Dropout"
bottom: "fc7_3"
top: "fc7_3"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8_0_3"
type: "Convolution"
bottom: "fc7_3"
top: "fc8_0_3"
convolution_param {
num_output: 21
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
hole = 24
layer {
name: "fc6_4"
type: "Convolution"
bottom: "pool5"
top: "fc6_4"
convolution_param {
num_output: 1024
pad: 24
kernel_size: 3
dilation: 24
}
}
layer {
name: "relu6_4"
type: "ReLU"
bottom: "fc6_4"
top: "fc6_4"
}
layer {
name: "drop6_4"
type: "Dropout"
bottom: "fc6_4"
top: "fc6_4"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7_4"
type: "Convolution"
bottom: "fc6_4"
top: "fc7_4"
convolution_param {
num_output: 1024
kernel_size: 1
}
}
layer {
name: "relu7_4"
type: "ReLU"
bottom: "fc7_4"
top: "fc7_4"
}
layer {
name: "drop7_4"
type: "Dropout"
bottom: "fc7_4"
top: "fc7_4"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8_0_4"
type: "Convolution"
bottom: "fc7_4"
top: "fc8_0_4"
convolution_param {
num_output: 21
kernel_size: 1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
SUM the four branches
layer {
bottom: "fc8_0_1"
bottom: "fc8_0_2"
bottom: "fc8_0_3"
bottom: "fc8_0_4"
top: "fc8_0"
name: "fc8_0"
type: "Eltwise"
eltwise_param {
operation: SUM
}
}
layer {
bottom: "fc8_0"
top: "fc8_interp"
name: "fc8_interp"
type: "Interp"
interp_param {
zoom_factor: 8
}
}
#layer {
bottom: "fc8_interp"
bottom: "data_dim"
bottom: "data"
top: "crf_inf"
name: "crf"
type: "DenseCRF"
dense_crf_param {
max_iter: 10
pos_w: 3
pos_xy_std: 3
bi_w: 6
bi_xy_std: 50
bi_rgb_std: 4
}
#}
layer {
name: "argmax"
type: "ArgMax"
bottom: "fc8_interp"
top: "argmax"
argmax_param {
axis: 1
}
}