cuda 9.2 curand_init extremely slow

cuda 9.2 curand_init extremely slow

本文关键字:init extremely slow curand cuda      更新时间:2023-10-16

我有一个程序,我使用 cuda 生成带有随机元素的数组。自从我从 cuda 9.1 升级到 cuda 9.2 以来,所需的时间已经从几分之一秒(约 0.1 秒(增加到近两分钟(不更改任何代码(。问题似乎是 curand_init(( 函数,因为其余部分以大致相同的速度运行。我在库中是否遗漏了更改,这是一个错误还是我的代码有问题? 这是一个例子

#include <iostream>
#include <curand.h>
#include <curand_kernel.h>
#define cudaErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess) 
{
std::cerr << "cudaAssert: " << cudaGetErrorString(code) << " " << file << ": " << line << std::endl;
if (abort) exit(code);
}
}
__global__
void setup_curand_state (curandState *state, int seed, int dim)
{
int index = threadIdx.x+blockDim.x*blockIdx.x;
if (index < dim)
curand_init(seed, index, 0, &state[index]);
}
__global__
void set_random (float* to, curandState* curand_state, int dim)
{
int index = threadIdx.x+ blockIdx.x* blockDim.x;
if (index < dim)
to [index] = curand_normal (&curand_state[index]);
}
int main () {
int dim = 100000;
float *data;
cudaErrchk (cudaMallocManaged ((void**) &data, dim * sizeof (float)));
curandState* curand_state;
cudaErrchk (cudaMalloc (&curand_state, (dim * sizeof (curandState))));
setup_curand_state <<<(dim + 1023) / 1024, 1024>>> (curand_state, time(NULL), dim);
cudaErrchk (cudaDeviceSynchronize());
set_random <<<(dim + 1023) / 1024, 1024>>> (data, curand_state, dim);
cudaFree (data);
return 0;
}

Mr. Bonobo 在上面的评论中回答:

显然,通过 apt 更新 cuda 默默地破坏了安装。法典 为 9.1 编译仍然可以工作,但速度慢了大约 100/1000 倍。 重新安装 nvidia-cuda-toolkit 解决了错误