#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include
#include
#include
typedef struct
{
BYTE r;
BYTE b;
BYTE g;
}RGB;
global void func(BYTE *gray, RGB img[][500], int width)
{
int i = threadIdx.x;
for (int j = 0; j < width; j++)
{
gray[(img[i][j].r * 38 + img[i][j].g * 75 + img[i][j].b * 15) >> 7]++;
}
}
int main()
{
BITMAPFILEHEADER fileHeader;
BITMAPINFOHEADER infoHeader;
FILE* pfin;
BYTE gray[256] = { 0 };
RGB img[256][256] = { 0 };
cudaError_t cudaStatus;
pfin = fopen("1.bmp", "rb");
//ReadtheBitmapfileheader;
fread(&fileHeader, sizeof(BITMAPFILEHEADER), 1, pfin);
//ReadtheBitmapinfoheader;
fread(&infoHeader, sizeof(BITMAPINFOHEADER), 1, pfin);
//the size of the picture
int size = infoHeader.biWidth*infoHeader.biHeight;
//read the file
fread(img, sizeof(RGB), size, pfin);
int width = infoHeader.biWidth;
BYTE *dev_gray;
cudaMalloc((void **)&dev_gray,width*sizeof(BYTE));
func<<<1,width>>>(dev_gray, img, width);
cudaMemcpy(gray, dev_gray, width*sizeof(BYTE),cudaMemcpyDeviceToHost);
cudaFree(dev_gray);
getchar();
return 0;
}
想要的结果是结果在gray数组中 但无结果 是内存的问题还是什么问题
应该如何修改呢