liuxin95 2019-05-11 15:48 采纳率: 0%
浏览 1712
已结题

pycuda报错cuMemFree failed

报错内存释失败:Traceback (most recent call last):
File "filter_cuda.py", line 103, in
bilateral = Bilateral_filter(img,template_size,sigma[0],sigma[1])
File "filter_cuda.py", line 93, in Bilateral_filter
block=(template_size,template_size,3),grid=(rows,cols))
File "/usr/local/lib/python3.5/dist-packages/pycuda/driver.py", line 405, in function_call
Context.synchronize()
pycuda._driver.LogicError: cuCtxSynchronize failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuModuleUnload failed: an illegal memory access was encountered

import os,math,cv2,numpy
from PIL import Image
import numpy as np
import skimage
import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule
from timeit import default_timer as timer

mod = SourceModule("""
#include<math.h>
__global__ void Gauss_cuda(int ***img,int ***im,float **disTemplate,int template_size)
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    int z = threadIdx.z;
    int count = 0;
    for(int i=x;i<x+template_size;i++)
    {
        for(int j=y;j<y+template_size;j++)
        {
            count += im[i][j][z]*disTemplate[i][j]; 
        }
    }
    img[x][y][z] = count;  
}
__global__ void bilateral_cuda(float ***img,float ***orig,float **disTemplate,float *ourArg)
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    int z = threadIdx.z;
    int xmin = max(int(x-int(ourArg[0])),0);
    int xmax = min(int(x+int(ourArg[0])+1),int(ourArg[1]));
    int ymin = max(int(y-int(ourArg[0])),0);
    int ymax = min(int(y+int(ourArg[0])+1),int(ourArg[2]));
    int w = 0;
    int v = 0;
    for(int i=xmin;i<xmax;i++)
    {
        for(int j=ymin;j<ymax;j++)
        {
            w += w+disTemplate[xmin-x+int(ourArg[0])+i][ymin-y+int(ourArg[0])+i]*exp((orig[i][j][z]-orig[x][y][z])*(orig[i][j][z]-orig[x][y][z])/(2*ourArg[3]*ourArg[3]));
            v += orig[x][y][z]*w;               
        }
    }
    img[x][y][z] = v/w;  
}

""")

Gauss_cuda = mod.get_function("Gauss_cuda")
bilateral_cuda = mod.get_function("bilateral_cuda")


def generateGaussianTemplate(template_size,sigma):
    template = np.zeros((template_size,template_size))
    mid = template_size/2
    sum = 0
    pi = 3.1415926
    for i in range(template_size):
        x = pow(i-mid,2)
        for j in range(template_size):
            y = pow(j-mid,2)
            g = math.exp((x+y)/(-2*sigma*sigma))/(2*pi*sigma)
            template[i][j] = g
            sum+=g
    #归一化
    template = template/sum
    return template

def Gauss_filter(img,template_size,sigma):
    [rows,cols,channel] = img.shape
    border = int(template_size/2)
    template = generateGaussianTemplate(template_size,sigma)
    im = cv2.copyMakeBorder(img,border,border,border,border,cv2.BORDER_REPLICATE)
    Gauss_cuda(
                drv.InOut(img), drv.In(im), drv.In(template),template_size,
                block=(template_size,template_size,3),grid=(rows,cols))
    return img

def Bilateral_filter(img,template_size,sigma1,sigma2):
    img = img/255
    border = int(template_size/2)
    [rows,cols,channel] = img.shape
    tmp = np.arange(-border,border+1)
    [x,y] = np.meshgrid(tmp,tmp)
    X = np.power(x,2)
    Y = np.power(y,2)
    d = np.exp(-(X+Y)/(2*sigma1*sigma1))
    orig_img = img
    bilateral_cuda(
                drv.InOut(img), drv.In(orig_img), drv.In(d),drv.In(np.array([border,rows,cols,sigma2])),
                block=(template_size,template_size,3),grid=(rows,cols))
    return img*255

if __name__=='__main__':
    img = cv2.imread('demo2.jpg')
    #noise = skimage.util.random_noise(img,'gaussian')*255
    #cv2.imwrite('noise.jpg',noise)
    template_size = 3
    sigma_g = 0.8
    sigma = [3,0.1]
    bilateral = Bilateral_filter(img,template_size,sigma[0],sigma[1])
    Gauss = Gauss_filter(img,template_size,sigma_g)
    cv2.imwrite('bilateral_result_{}_{}_{}.jpg'.format(template_size,sigma[0],sigma[1]),bilateral)
    cv2.imwrite('Gauss_result_{}_{}.jpg'.format(template_size,sigma_g),Gauss)

  • 写回答

3条回答 默认 最新

  • 叛逆&无情 2019-05-11 16:16
    关注

    import os,math,cv2,numpy
    from PIL import Image
    import numpy as np
    import skimage
    import pycuda.autoinit
    import pycuda.driver as drv
    from pycuda.compiler import SourceModule
    from timeit import default_timer as timer

    mod = SourceModule("""
    #include
    global void Gauss_cuda(int **img,int **im,float **disTemplate,int template_size)
    {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    int z = threadIdx.z;
    int count = 0;
    for(int i=x;i<x+template_size;i++)
    {
    for(int j=y;j<y+template_size;j++)
    {
    count += im[i][j][z]*disTemplate[i][j];
    }
    }
    img[x][y][z] = count;

    }
    global void bilateral_cuda(float **img,float **orig,float **disTemplate,float ourArg)
    {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    int z = threadIdx.z;
    int xmin = max(int(x-int(ourArg[0])),0);
    int xmax = min(int(x+int(ourArg[0])+1),int(ourArg[1]));
    int ymin = max(int(y-int(ourArg[0])),0);
    int ymax = min(int(y+int(ourArg[0])+1),int(ourArg[2]));
    int w = 0;
    int v = 0;
    for(int i=xmin;i<xmax;i++)
    {
    for(int j=ymin;j<ymax;j++)
    {
    w += w+disTemplate[xmin-x+int(ourArg[0])+i][ymin-y+int(ourArg[0])+i]*exp((orig[i][j][z]-orig[x][y][z])
    (orig[i][j][z]-orig[x][y][z])/(2*ourArg[3]*ourArg[3]));
    v += orig[x][y][z]*w;

    }
    }
    img[x][y][z] = v/w;

    }

    """)

    Gauss_cuda = mod.get_function("Gauss_cuda")
    bilateral_cuda = mod.get_function("bilateral_cuda")

    def generateGaussianTemplate(template_size,sigma):
    template = np.zeros((template_size,template_size))
    mid = template_size/2
    sum = 0
    pi = 3.1415926
    for i in range(template_size):
    x = pow(i-mid,2)
    for j in range(template_size):
    y = pow(j-mid,2)
    g = math.exp((x+y)/(-2*sigma*sigma))/(2*pi*sigma)
    template[i][j] = g
    sum+=g
    #归一化
    template = template/sum
    return template

    def Gauss_filter(img,template_size,sigma):
    [rows,cols,channel] = img.shape
    border = int(template_size/2)
    template = generateGaussianTemplate(template_size,sigma)
    im = cv2.copyMakeBorder(img,border,border,border,border,cv2.BORDER_REPLICATE)
    Gauss_cuda(
    drv.InOut(img), drv.In(im), drv.In(template),template_size,
    block=(template_size,template_size,3),grid=(rows,cols))
    return img

    def Bilateral_filter(img,template_size,sigma1,sigma2):
    img = img/255
    border = int(template_size/2)
    [rows,cols,channel] = img.shape
    tmp = np.arange(-border,border+1)
    [x,y] = np.meshgrid(tmp,tmp)
    X = np.power(x,2)
    Y = np.power(y,2)
    d = np.exp(-(X+Y)/(2*sigma1*sigma1))
    orig_img = img
    Gauss_cuda(
    drv.InOut(img), drv.In(orig_img), drv.In(d),drv.In(np.array([border,rows,cols,sigma2])),
    block=(template_size,template_size,3),grid=(rows,cols))
    return img*255

    if name=='__main__':
    img = cv2.imread('demo2.jpg')
    #noise = skimage.util.random_noise(img,'gaussian')*255
    #cv2.imwrite('noise.jpg',noise)
    template_size = 3
    sigma_g = 0.8
    sigma = [3,0.1]
    bilateral = Bilateral_filter(img,template_size,sigma[0],sigma[1])
    Gauss = Gauss_filter(img,template_size,sigma_g)
    cv2.imwrite('bilateral_result_{}_{}_{}.jpg'.format(template_size,sigma[0],sigma[1]),bilateral)
    cv2.imwrite('Gauss_result_{}_{}.jpg'.format(template_size,sigma_g),Gauss)

        试试这个!!!!!!!!!!!!!!!!!
    
    评论

报告相同问题?

悬赏问题

  • ¥15 求帮我调试一下freefem代码
  • ¥15 R语言Rstudio突然无法启动
  • ¥15 关于#matlab#的问题:提取2个图像的变量作为另外一个图像像元的移动量,计算新的位置创建新的图像并提取第二个图像的变量到新的图像
  • ¥15 改算法,照着压缩包里边,参考其他代码封装的格式 写到main函数里
  • ¥15 用windows做服务的同志有吗
  • ¥60 求一个简单的网页(标签-安全|关键词-上传)
  • ¥35 lstm时间序列共享单车预测,loss值优化,参数优化算法
  • ¥15 Python中的request,如何使用ssr节点,通过代理requests网页。本人在泰国,需要用大陆ip才能玩网页游戏,合法合规。
  • ¥100 为什么这个恒流源电路不能恒流?
  • ¥15 有偿求跨组件数据流路径图