Error :hip2.cpp:36 , code : 1011 , reason : hipErrorInvalidValue
#include <hip/hip_runtime.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#define HIP_CHECK(command){\
hipError_t status = command;\
if(status != hipSuccess){\
printf("Error :%s:%d , ", __FILE__,__LINE__); \
printf("code : %d , reason : %s \n", status,hipGetErrorString(status));exit(-1);}}
//kernel代码
__global__ void square(int* d_out, int* d_in) {
int idx = threadIdx.x;
//首先从全局内存读取对应此线程索引的数组元素,储存到浮点型变量f
int f = d_in[idx];
d_out[idx] = f * f;
}
int main(int argc, char** argv) {
const int ARRAY_SIZE = 64;
size_t ARRAY_BYTES = ARRAY_SIZE * sizeof(int);
//h:cpu d:gpu
int h_in[ARRAY_SIZE];
for (int i = 0; i < ARRAY_SIZE; i++)
{
h_in[i] = int(i); //类型转换
}
int h_out[ARRAY_SIZE];
//在GPU声明指针
int *d_in;
int *d_out;
//cudaMalloc:分配GPU的数据
HIP_CHECK(hipMalloc(&d_in, ARRAY_BYTES));
HIP_CHECK(hipMalloc(&d_out, ARRAY_BYTES));
//从CPU拷贝到GPU
//目标地址,原地址,字节数,转移方向
//转移方向有三个
HIP_CHECK(hipMemcpy(d_in, h_in, ARRAY_BYTES, hipMemcpyHostToDevice));
//启动运算符
dim3 dimGrid(64);
dim3 dimBlock(1);
hipLaunchKernelGGL(square, dimGrid, dimBlock, 0, 0 , d_out, d_in);
///将结果数组从d_out复制回h_out(cpu里)
HIP_CHECK(hipMemcpy(h_out, d_out, ARRAY_BYTES, hipMemcpyDeviceToHost));
//打印出h_out数据
for (int i = 0; i < ARRAY_SIZE; i++) {
printf("%d", h_out[i]);
//每行打印4个,4个以后开始新行
printf(((i % 4) != 3) ? "\t" : "\n");
}
//释放GPU内存
HIP_CHECK(hipFree(d_in));
HIP_CHECK(hipFree(d_out));
d_in = NULL;
d_out = NULL;
return 0;
}