xuexigua44 2019-12-30 14:39 采纳率: 0%
浏览 690

cuda一个global函数里调用多个核函数出问题。

caffe编写loss层时,
我一个global函数里有多个核函数,但是有时前两个核函数不执行,有时候又执行,不清楚问题出在哪里?

template <typename Dtype>
void PixelClustingLossLayer<Dtype>::Forward_gpu(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {

    const int num = bottom[0]->num();
    const int data_channels = bottom[0]->channels();
    const int mask_channels = bottom[1]->channels();
    const int height = bottom[0]->height();
    const int width = bottom[0]->width();
    const int spatial_dim = height * width;
    const int nc = num * data_channels;
    const int data_count = bottom[0]->count();
    const int mask_count = bottom[1]->count();

    Dtype* pos_num_data = pos_num_.mutable_cpu_data();
    Dtype* neg_num_data = neg_num_.mutable_cpu_data();

    caffe_gpu_set(mask_count, Dtype(0.), pixel_mask_.mutable_gpu_data());
    caffe_gpu_set(num, Dtype(0.), loss_mask_.mutable_gpu_data());
    caffe_gpu_set(num*data_channels, Dtype(0.), pos_ave_.mutable_gpu_data());
    caffe_gpu_set(num*data_channels, Dtype(0.), neg_ave_.mutable_gpu_data());
    caffe_gpu_set(num, Dtype(0.), pos_loss_.mutable_gpu_data());
    caffe_gpu_set(num, Dtype(0.), neg_loss_.mutable_gpu_data());
    caffe_gpu_set(num, Dtype(0.), center_loss_.mutable_gpu_data());

    for(int n=0; n<num; ++n) {
        caffe_gpu_asum(spatial_dim, bottom[1]->gpu_data() + n * spatial_dim, pos_num_.mutable_cpu_data() + n);   
        neg_num_data[n] = spatial_dim - pos_num_data[n];
    }

    //LOG(INFO)<<"There are "<<pos_num_.cpu_data()[0]<<" pos pixels and "<<neg_num_.cpu_data()[0]<<" neg pixels.";
    GetTotalValue<Dtype> <<<CAFFE_GET_BLOCKS(data_count), CAFFE_CUDA_NUM_THREADS>>>(data_count, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
    pos_ave_.mutable_gpu_data(), neg_ave_.mutable_gpu_data(), data_channels, height, width);
    //LOG(INFO)<<"There are 111 neg pixels.";
    GetAveValue<Dtype> <<<CAFFE_GET_BLOCKS(nc), CAFFE_CUDA_NUM_THREADS>>>(nc, pos_num_.gpu_data(), neg_num_.gpu_data(),
    pos_ave_.mutable_gpu_data(), neg_ave_.mutable_gpu_data(), center_loss_.mutable_gpu_data(), data_channels);
    //LOG(INFO)<<"There are 222 neg pixels.";
    PowerEuclideanDistance<Dtype> <<<CAFFE_GET_BLOCKS(mask_count), CAFFE_CUDA_NUM_THREADS>>>(mask_count, bottom[0]->gpu_data(), bottom[1]->gpu_data(),
    pos_ave_.gpu_data(), neg_ave_.gpu_data(), euclidean_dis_.mutable_gpu_data(), mask_channels, data_channels, height, width);

    ComputePixelLoss<Dtype> <<<CAFFE_GET_BLOCKS(mask_count), CAFFE_CUDA_NUM_THREADS>>>(mask_count, bottom[1]->gpu_data(), euclidean_dis_.gpu_data(),
    pos_loss_.mutable_gpu_data(), neg_loss_.mutable_gpu_data(), pos_num_.gpu_data(), neg_num_.gpu_data(), pixel_mask_.mutable_gpu_data(), mask_channels, height, width, alpha_);

    ComputeClassLoss<Dtype> <<<CAFFE_GET_BLOCKS(num), CAFFE_CUDA_NUM_THREADS>>>(num, center_loss_.mutable_gpu_data(), loss_mask_.mutable_gpu_data(), beta_);

    caffe_gpu_add(num, neg_loss_.gpu_data(), pos_loss_.gpu_data(), loss_.mutable_gpu_data());
    caffe_gpu_add(num, loss_.gpu_data(), center_loss_.gpu_data(), loss_.mutable_gpu_data());
    Dtype loss;
    caffe_gpu_asum(num, loss_.gpu_data(), &loss);
    LOG(INFO)<<loss/Dtype(num);
    top[0]->mutable_cpu_data()[0] = loss / num;
}

主要是GetTotalValue()函数和GetAveValue()函数,偶尔执行,偶尔不执行,头都晕了。
有没有大神指点迷津。

  • 写回答

1条回答 默认 最新

  • CSDN-Ada助手 CSDN-AI 官方账号 2022-09-20 21:22
    关注
    不知道你这个问题是否已经解决, 如果还没有解决的话:

    如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^
    评论

报告相同问题?

悬赏问题

  • ¥15 交替优化波束形成和ris反射角使保密速率最大化
  • ¥15 树莓派与pix飞控通信
  • ¥15 自动转发微信群信息到另外一个微信群
  • ¥15 outlook无法配置成功
  • ¥30 这是哪个作者做的宝宝起名网站
  • ¥60 版本过低apk如何修改可以兼容新的安卓系统
  • ¥25 由IPR导致的DRIVER_POWER_STATE_FAILURE蓝屏
  • ¥50 有数据,怎么建立模型求影响全要素生产率的因素
  • ¥50 有数据,怎么用matlab求全要素生产率
  • ¥15 TI的insta-spin例程