caffe编写loss层时,
我一个global函数里有多个核函数,但是有时前两个核函数不执行,有时候又执行,不清楚问题出在哪里?
template <typename Dtype>
void PixelClustingLossLayer<Dtype>::Forward_gpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
const int num = bottom[0]->num();
const int data_channels = bottom[0]->channels();
const int mask_channels = bottom[1]->channels();
const int height = bottom[0]->height();
const int width = bottom[0]->width();
const int spatial_dim = height * width;
const int nc = num * data_channels;
const int data_count = bottom[0]->count();
const int mask_count = bottom[1]->count();
Dtype* pos_num_data = pos_num_.mutable_cpu_data();
Dtype* neg_num_data = neg_num_.mutable_cpu_data();
caffe_gpu_set(mask_count, Dtype(0.), pixel_mask_.mutable_gpu_data());
caffe_gpu_set(num, Dtype(0.), loss_mask_.mutable_gpu_data());
caffe_gpu_set(num*data_channels, Dtype(0.), pos_ave_.mutable_gpu_data());
caffe_gpu_set(num*data_channels, Dtype(0.), neg_ave_.mutable_gpu_data());
caffe_gpu_set(num, Dtype(0.), pos_loss_.mutable_gpu_data());
caffe_gpu_set(num, Dtype(0.), neg_loss_.mutable_gpu_data());
caffe_gpu_set(num, Dtype(0.), center_loss_.mutable_gpu_data());
for(int n=0; n<num; ++n) {
caffe_gpu_asum(spatial_dim, bottom[1]->gpu_data() + n * spatial_dim, pos_num_.mutable_cpu_data() + n);
neg_num_data[n] = spatial_dim - pos_num_data[n];
}
//LOG(INFO)<<"There are "<<pos_num_.cpu_data()[0]<<" pos pixels and "<<neg_num_.cpu_data()[0]<<" neg pixels.";
GetTotalValue<Dtype> <<<CAFFE_GET_BLOCKS(data_count), CAFFE_CUDA_NUM_THREADS>>>(data_count, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
pos_ave_.mutable_gpu_data(), neg_ave_.mutable_gpu_data(), data_channels, height, width);
//LOG(INFO)<<"There are 111 neg pixels.";
GetAveValue<Dtype> <<<CAFFE_GET_BLOCKS(nc), CAFFE_CUDA_NUM_THREADS>>>(nc, pos_num_.gpu_data(), neg_num_.gpu_data(),
pos_ave_.mutable_gpu_data(), neg_ave_.mutable_gpu_data(), center_loss_.mutable_gpu_data(), data_channels);
//LOG(INFO)<<"There are 222 neg pixels.";
PowerEuclideanDistance<Dtype> <<<CAFFE_GET_BLOCKS(mask_count), CAFFE_CUDA_NUM_THREADS>>>(mask_count, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
pos_ave_.gpu_data(), neg_ave_.gpu_data(), euclidean_dis_.mutable_gpu_data(), mask_channels, data_channels, height, width);
ComputePixelLoss<Dtype> <<<CAFFE_GET_BLOCKS(mask_count), CAFFE_CUDA_NUM_THREADS>>>(mask_count, bottom[1]->gpu_data(), euclidean_dis_.gpu_data(),
pos_loss_.mutable_gpu_data(), neg_loss_.mutable_gpu_data(), pos_num_.gpu_data(), neg_num_.gpu_data(), pixel_mask_.mutable_gpu_data(), mask_channels, height, width, alpha_);
ComputeClassLoss<Dtype> <<<CAFFE_GET_BLOCKS(num), CAFFE_CUDA_NUM_THREADS>>>(num, center_loss_.mutable_gpu_data(), loss_mask_.mutable_gpu_data(), beta_);
caffe_gpu_add(num, neg_loss_.gpu_data(), pos_loss_.gpu_data(), loss_.mutable_gpu_data());
caffe_gpu_add(num, loss_.gpu_data(), center_loss_.gpu_data(), loss_.mutable_gpu_data());
Dtype loss;
caffe_gpu_asum(num, loss_.gpu_data(), &loss);
LOG(INFO)<<loss/Dtype(num);
top[0]->mutable_cpu_data()[0] = loss / num;
}
主要是GetTotalValue()函数和GetAveValue()函数,偶尔执行,偶尔不执行,头都晕了。
有没有大神指点迷津。