Ck_Zheng 2021-07-07 20:20 采纳率: 33.3%
浏览 149
已结题

刚学cusparse库,做spmv乘法时代码没报错没有得到自己想要的结果

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <cuda_runtime.h>
#include <cusparse.h>

// error check macros
#define CUSPARSE_CHECK(x) {cusparseStatus_t _c=x; if (_c != CUSPARSE_STATUS_SUCCESS) {printf("cusparse fail: %d, line: %d\n", (int)_c, LINE); exit(-1);}}

#define cudaCheckErrors(msg)
do {
cudaError_t __err = cudaGetLastError();
if (__err != cudaSuccess) {
fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n",
msg, cudaGetErrorString(__err),
FILE, LINE);
fprintf(stderr, "*** FAILED - ABORTING\n");
exit(1);
}
} while (0)

int main()
{
const int n = 3;
const int nnzA = 5;
const double h_valA[nnzA] = { 3.0, 2.0, 2.0, 2.0, 1.0 };
const int h_csrColIndA[nnzA] = { 0, 2, 1, 0, 2 };
const int h_csrRowPtrA[n+1] = { 0, 2, 3, 5 };
const double h_b[n] = { 3.5, 1.5, 2.0 };
int y[n] = { 0.0, 0.0, 0.0 };

//CSR format of matrix A and Vector b (device)
double* valA;
int* csrRowPtrA;
int* csrColIndA;
double* b;
double* Y;

cudaMalloc((void**)&valA, nnzA * sizeof(double));
cudaMalloc((void**)&csrRowPtrA, (n + 1) * sizeof(int));
cudaMalloc((void**)&csrColIndA, nnzA * sizeof(int));
cudaMalloc((void**)&b, n * sizeof(double));
cudaMalloc((void**)&Y, n * sizeof(double));
cudaCheckErrors("cudaMalloc fail");

cudaMemcpy(valA, h_valA, (size_t)(nnzA * sizeof(double)), cudaMemcpyHostToDevice);
cudaMemcpy(csrRowPtrA, h_csrRowPtrA, (size_t)((n + 1) * sizeof(int)), cudaMemcpyHostToDevice);
cudaMemcpy(csrColIndA, h_csrColIndA, (size_t)(nnzA * sizeof(int)), cudaMemcpyHostToDevice);
cudaMemcpy(b, h_b, (size_t)(n * sizeof(double)), cudaMemcpyHostToDevice);
cudaMemcpy(Y, y, (size_t)(n * sizeof(double)), cudaMemcpyHostToDevice);
cudaCheckErrors("cudaMemcpy fail");

float      alpha = 1.0f;
float      beta = 0.0f;

cusparseHandle_t handle = NULL;
cusparseSpMatDescr_t matA;
cusparseDnVecDescr_t vecX, vecY;
void* dBuffer = NULL;
size_t               bufferSize = 0;
CUSPARSE_CHECK(cusparseCreate(&handle));
CUSPARSE_CHECK(cusparseCreateCsr(&matA, n, n, nnzA, csrRowPtrA, csrColIndA, valA,
    CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_32F));

CUSPARSE_CHECK(cusparseCreateDnVec(&vecX, n, b, CUDA_R_32F));

CUSPARSE_CHECK(cusparseCreateDnVec(&vecY, n, Y, CUDA_R_32F));

CUSPARSE_CHECK(cusparseSpMV_bufferSize(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, CUSPARSE_MV_ALG_DEFAULT, &bufferSize));

cudaMalloc(&dBuffer, bufferSize);
cudaCheckErrors("cudaMalloc fail");
CUSPARSE_CHECK(cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY, CUDA_R_32F, CUSPARSE_MV_ALG_DEFAULT, dBuffer));


CUSPARSE_CHECK(cusparseDestroySpMat(matA));
CUSPARSE_CHECK(cusparseDestroyDnVec(vecX));
CUSPARSE_CHECK(cusparseDestroyDnVec(vecY));
CUSPARSE_CHECK(cusparseDestroy(handle));

cudaMemcpy(y, Y, n * sizeof(double), cudaMemcpyDeviceToHost);
cudaCheckErrors("cudaMemcpy fail");
for (int i = 0; i < n; i++)
{
    printf("x[%i] = %f\n", i, y[i]);
}
cudaFree(dBuffer);
cudaFree(csrRowPtrA);
cudaFree(csrColIndA);
cudaFree(valA);
cudaFree(Y);
cudaFree(b);

return 0;

}

  • 写回答

2条回答 默认 最新

  • unipx2008 2022-11-07 09:26
    关注

    CUDA_R_32F数据类型与double不匹配

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论
查看更多回答(1条)

报告相同问题?

问题事件

  • 系统已结题 1月3日
  • 已采纳回答 12月26日

悬赏问题

  • ¥15 目详情-五一模拟赛详情页
  • ¥15 有了解d3和topogram.js库的吗?有偿请教
  • ¥100 任意维数的K均值聚类
  • ¥15 stamps做sbas-insar,时序沉降图怎么画
  • ¥15 买了个传感器,根据商家发的代码和步骤使用但是代码报错了不会改,有没有人可以看看
  • ¥15 关于#Java#的问题,如何解决?
  • ¥15 加热介质是液体,换热器壳侧导热系数和总的导热系数怎么算
  • ¥100 嵌入式系统基于PIC16F882和热敏电阻的数字温度计
  • ¥15 cmd cl 0x000007b
  • ¥20 BAPI_PR_CHANGE how to add account assignment information for service line