报错内存释失败:Traceback (most recent call last):
File "filter_cuda.py", line 103, in
bilateral = Bilateral_filter(img,template_size,sigma[0],sigma[1])
File "filter_cuda.py", line 93, in Bilateral_filter
block=(template_size,template_size,3),grid=(rows,cols))
File "/usr/local/lib/python3.5/dist-packages/pycuda/driver.py", line 405, in function_call
Context.synchronize()
pycuda._driver.LogicError: cuCtxSynchronize failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuMemFree failed: an illegal memory access was encountered
PyCUDA WARNING: a clean-up operation failed (dead context maybe?)
cuModuleUnload failed: an illegal memory access was encountered
import os,math,cv2,numpy
from PIL import Image
import numpy as np
import skimage
import pycuda.autoinit
import pycuda.driver as drv
from pycuda.compiler import SourceModule
from timeit import default_timer as timer
mod = SourceModule("""
#include<math.h>
__global__ void Gauss_cuda(int ***img,int ***im,float **disTemplate,int template_size)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int z = threadIdx.z;
int count = 0;
for(int i=x;i<x+template_size;i++)
{
for(int j=y;j<y+template_size;j++)
{
count += im[i][j][z]*disTemplate[i][j];
}
}
img[x][y][z] = count;
}
__global__ void bilateral_cuda(float ***img,float ***orig,float **disTemplate,float *ourArg)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int z = threadIdx.z;
int xmin = max(int(x-int(ourArg[0])),0);
int xmax = min(int(x+int(ourArg[0])+1),int(ourArg[1]));
int ymin = max(int(y-int(ourArg[0])),0);
int ymax = min(int(y+int(ourArg[0])+1),int(ourArg[2]));
int w = 0;
int v = 0;
for(int i=xmin;i<xmax;i++)
{
for(int j=ymin;j<ymax;j++)
{
w += w+disTemplate[xmin-x+int(ourArg[0])+i][ymin-y+int(ourArg[0])+i]*exp((orig[i][j][z]-orig[x][y][z])*(orig[i][j][z]-orig[x][y][z])/(2*ourArg[3]*ourArg[3]));
v += orig[x][y][z]*w;
}
}
img[x][y][z] = v/w;
}
""")
Gauss_cuda = mod.get_function("Gauss_cuda")
bilateral_cuda = mod.get_function("bilateral_cuda")
def generateGaussianTemplate(template_size,sigma):
template = np.zeros((template_size,template_size))
mid = template_size/2
sum = 0
pi = 3.1415926
for i in range(template_size):
x = pow(i-mid,2)
for j in range(template_size):
y = pow(j-mid,2)
g = math.exp((x+y)/(-2*sigma*sigma))/(2*pi*sigma)
template[i][j] = g
sum+=g
#归一化
template = template/sum
return template
def Gauss_filter(img,template_size,sigma):
[rows,cols,channel] = img.shape
border = int(template_size/2)
template = generateGaussianTemplate(template_size,sigma)
im = cv2.copyMakeBorder(img,border,border,border,border,cv2.BORDER_REPLICATE)
Gauss_cuda(
drv.InOut(img), drv.In(im), drv.In(template),template_size,
block=(template_size,template_size,3),grid=(rows,cols))
return img
def Bilateral_filter(img,template_size,sigma1,sigma2):
img = img/255
border = int(template_size/2)
[rows,cols,channel] = img.shape
tmp = np.arange(-border,border+1)
[x,y] = np.meshgrid(tmp,tmp)
X = np.power(x,2)
Y = np.power(y,2)
d = np.exp(-(X+Y)/(2*sigma1*sigma1))
orig_img = img
bilateral_cuda(
drv.InOut(img), drv.In(orig_img), drv.In(d),drv.In(np.array([border,rows,cols,sigma2])),
block=(template_size,template_size,3),grid=(rows,cols))
return img*255
if __name__=='__main__':
img = cv2.imread('demo2.jpg')
#noise = skimage.util.random_noise(img,'gaussian')*255
#cv2.imwrite('noise.jpg',noise)
template_size = 3
sigma_g = 0.8
sigma = [3,0.1]
bilateral = Bilateral_filter(img,template_size,sigma[0],sigma[1])
Gauss = Gauss_filter(img,template_size,sigma_g)
cv2.imwrite('bilateral_result_{}_{}_{}.jpg'.format(template_size,sigma[0],sigma[1]),bilateral)
cv2.imwrite('Gauss_result_{}_{}.jpg'.format(template_size,sigma_g),Gauss)