Debugging CUDA - CudaUnknownError
Debugging CUDA - CudaUnknownError
我正在尝试使用CUDA创建mandlebrot集的位图图像。我看了一些教程,已经在这里获得了一些关于将非托管CUDAdll与托管C#gui集成的过程的帮助。我现在遇到的问题是我的CUDA dll没有正确形成位图,当我在内核启动后在cudaDeviceSynchronize()上使用错误检查宏时,我会得到cudaUnknowError。
以下是相关代码:
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %dn", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
struct complex
{
float r, i;
__device__ complex(float _r, float _i) : r(_r), i(_i) {}
__device__ float magnitudeSquared(){ return (r*r + i*i) ; }
__device__ complex& operator*=(const complex& rhs)
{
r = (r * rhs.r - i * rhs.i);
i = (r * rhs.i + i * rhs.r);
return *this;
}
__device__ complex& operator+=(const complex& rhs)
{
r = (r + rhs.r);
i = (i + rhs.i);
return *this;
}
};
__device__ int mandlebrotDiverge(complex *z)
{
complex c(*z);
int i = 0;
for(i = 0; i < MAX_ITERATIONS; i++)
{
*z *= *z;
*z += c;
if(z->magnitudeSquared() >= 2)
{
return 1;
}
}
return 0;
}
__global__ void kernel(int *ptr, int width, int height)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
float scale = 1.5f;
complex z(scale*(float)(width/2 - x)/(width/2), scale*(float)(height/2 - y)/(height/2));
if(offset < (1920*1080))
{
int mValue = mandlebrotDiverge(&z);
ptr[offset*3 + (uint8_t)0] = (uint8_t)(mValue*255);
ptr[offset*3 + (uint8_t)1] = (uint8_t)(mValue*255);
ptr[offset*3 + (uint8_t)2] = (uint8_t)(mValue*255);
}
}
extern "C" __declspec(dllexport) void __cdecl generateBitmap(void *bitmap)
{
int width = 1920;
int height = 1080;
int *dev_bmp;
dim3 blocks(width/16, height/16);
dim3 threads(16, 16);
gpuErrchk(cudaMalloc((void**)&dev_bmp, (3*width*height)));
kernel<<<blocks, threads>>>(dev_bmp, width, height);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
gpuErrchk(cudaMemcpy(bitmap, dev_bmp, (width*height*3), cudaMemcpyDeviceToHost));
cudaFree(dev_bmp);
}
当我逐步完成代码时,一切似乎都正常工作,直到我到达gpuErrchk(cudaDeviceSynchronize());-当我进入时,错误代码只是简单地说"cudaUnknowError"。我不知道我在这一点上做错了什么。如有任何关于如何改进此解决方案的帮助或建议,我们将不胜感激。
编辑:
好吧,看看CUDA_memcheck,我得到了这个错误(数百次):
========= CUDA-MEMCHECK
========= Invalid __global__ write of size 4
========= at 0x00000a10 in C:/.../kernel.cu:77:kernel(int*, int, int)
========= by thread (15,11,0) in block (1,17,0)
========= Address 0x05a37f74 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
所以我从*int改为*unsigned char,因为我试图分配单个字节的数组,而不是int。很多错误都被清除了,但现在我得到了这个:
========= CUDA-MEMCHECK
========= Program hit error 6 on CUDA API call to cudaDeviceSynchronize
========= Saved host backtrace up to driver entry point at error
========= Host Frame:C:Windowssystem32nvcuda.dll (cuD3D11CtxCreate + 0x102459) [0x11e4b9]
========= Host Frame:C:...cudart32_55.dll (cudaDeviceSynchronize + 0xdd) [0x1149d]
========= Host Frame:C:...FractalMaxUnmanaged.dll (generateBitmap + 0xf0) [0x97c0]
=========
========= ERROR SUMMARY: 1 error
好吧,我正在取得进展,但现在当我遍历c#应用程序时,字节缓冲区中每个字节的值都是255,这没有意义。这是c#代码:
public unsafe class NativeMethods
{
[DllImport(@"C:UsersBillDocumentsVisual Studio 2012ProjectsFractalMaxUnmanagedDebugFractalMaxUnmanaged.dll", CallingConvention=CallingConvention.Cdecl)]
public static extern void generateBitmap(void *bitmap);
public static Bitmap create()
{
byte[] buf = new byte[1920 * 1080 * 3];
fixed (void* pBuffer = buf)
{
generateBitmap(pBuffer);
}
IntPtr unmanagedPtr = Marshal.AllocHGlobal(buf.Length);
Marshal.Copy(buf, 0, unmanagedPtr, buf.Length);
Bitmap img = new Bitmap(1920, 1080, 1920*3, PixelFormat.Format24bppRgb, unmanagedPtr);
Marshal.FreeHGlobal(unmanagedPtr);
return img;
}
}
您的问题是您的内存分配和副本是错误的,您忘记了cudaMalloc/cudaMemcpy需要以字节为单位的大小。由于int
使用了4个字节,所以实际分配的内存少于内核所需的内存。使用这个(或者使用只需要1个字节的unsigned char
):
cudaMalloc((void**)&dev_bmp, (3*width*height)*sizeof(int));
cudaMemcpy(bitmap, dev_bmp, (3*width*height)*sizeof(int), cudaMemcpyDeviceToHost);
还要确保bitmap
已正确分配。正如@Eugene所说,使用cuda-memcheck
是找到此类错误来源的好方法。
相关文章:
- 没有找到相关文章