简单的 CUDA 测试总是失败并出现错误"an illegal memory access was encountered"
Simple CUDA Test always fails with "an illegal memory access was encountered" error
如果我运行这个程序,我会得到"在matrixMulti.cu的第48〃行遇到非法存储器存取;错误我搜索并尝试了很多。所以我希望有人能帮助我。
第48行:HANDLE_ERROR(cudaMemcpy(array,devarray,NNsizeof(int),cudaMemcpyDeviceToHost));
这个项目只是为了进入CUDA。我试着实现矩阵乘法。
#include <iostream>
#include<cuda.h>
#include <stdio.h>
using namespace std;
#define HANDLE_ERROR( err ) ( HandleError( err, __FILE__, __LINE__ ) )
void printVec(int** a, int n);
static void HandleError( cudaError_t err, const char *file, int line )
{
if (err != cudaSuccess)
{
printf( "%s in %s at line %dn", cudaGetErrorString( err ),
file, line );
exit( EXIT_FAILURE );
}
}
void checkCUDAError(const char *msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, "Cuda error: %s: %s.n", msg,
cudaGetErrorString( err) );
exit(EXIT_FAILURE);
}
}
__global__ void MatrixMulti(int** a, int** b) {
b[0][0]=4;
}
int main() {
int N =10;
int** array, **devarray;
array = new int*[N];
for(int i = 0; i < N; i++) {
array[i] = new int[N];
}
HANDLE_ERROR ( cudaMalloc((void**)&devarray, N*N*sizeof(int) ) );
HANDLE_ERROR ( cudaMemcpy(devarray, array, N*N*sizeof(int), cudaMemcpyHostToDevice) );
MatrixMulti<<<1,1>>>(array,devarray);
HANDLE_ERROR ( cudaMemcpy(array, devarray, N*N*sizeof(int), cudaMemcpyDeviceToHost) );
HANDLE_ERROR ( cudaFree(devarray) );
printVec(array,N);
return 0;
}
void printVec(int** a , int n) {
for(int i =0 ; i < n; i++) {
for ( int j = 0; j <n; j++) {
cout<< a[i][j] <<" ";
}
cout<<" "<<endl;
}
}
通常,分配和复制双下标C数组的方法不起作用。cudaMemcpy
需要平坦、连续分配、单指针、单下标数组。
由于这种混乱,传递到内核(int** a, int** b
)的指针无法正确(安全)地取消引用两次:
b[0][0]=4;
当您尝试在内核代码中执行上述操作时,您会获得非法的内存访问,因为您没有在设备上正确分配指针到指针样式的分配。
如果您使用cuda-memcheck
运行代码,您将在内核代码中得到另一个非法内存访问的指示。
在这些情况下,通常的建议是将2D数组"展平"为一维,并使用适当的指针或索引算法来模拟2D访问。分配2D数组(即双下标、双指针)是可能的,但它相当复杂(部分原因是需要"深度复制")。如果您想了解更多信息,请在右上角搜索CUDA 2D array
。
以下是您的代码的一个版本,它对设备端阵列进行了阵列扁平化:
$ cat t60.cu
#include <iostream>
#include <cuda.h>
#include <stdio.h>
using namespace std;
#define HANDLE_ERROR( err ) ( HandleError( err, __FILE__, __LINE__ ) )
void printVec(int** a, int n);
static void HandleError( cudaError_t err, const char *file, int line )
{
if (err != cudaSuccess)
{
printf( "%s in %s at line %dn", cudaGetErrorString( err ),
file, line );
exit( EXIT_FAILURE );
}
}
void checkCUDAError(const char *msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, "Cuda error: %s: %s.n", msg,
cudaGetErrorString( err) );
exit(EXIT_FAILURE);
}
}
__global__ void MatrixMulti(int* b, unsigned n) {
for (int row = 0; row < n; row++)
for (int col=0; col < n; col++)
b[(row*n)+col]=col; //simulate 2D access in kernel code
}
int main() {
int N =10;
int** array, *devarray; // flatten device-side array
array = new int*[N];
array[0] = new int[N*N]; // host allocation needs to be contiguous
for (int i = 1; i < N; i++) array[i] = array[i-1]+N; //2D on top of contiguous allocation
HANDLE_ERROR ( cudaMalloc((void**)&devarray, N*N*sizeof(int) ) );
HANDLE_ERROR ( cudaMemcpy(devarray, array[0], N*N*sizeof(int), cudaMemcpyHostToDevice) );
MatrixMulti<<<1,1>>>(devarray, N);
HANDLE_ERROR ( cudaMemcpy(array[0], devarray, N*N*sizeof(int), cudaMemcpyDeviceToHost) );
HANDLE_ERROR ( cudaFree(devarray) );
printVec(array,N);
return 0;
}
void printVec(int** a , int n) {
for(int i =0 ; i < n; i++) {
for ( int j = 0; j <n; j++) {
cout<< a[i][j] <<" ";
}
cout<<" "<<endl;
}
}
$ nvcc -arch=sm_20 -o t60 t60.cu
$ ./t60
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
0 1 2 3 4 5 6 7 8 9
$
相关文章:
- Atom gpp编译器给出"'C:UsersadminUser' is not recognized as an internal or..."错误
- 尝试在类中编译内核,出现错误"__init__() got an unexpected keyword argument 'kernel'"
- C++:将运算符作为参数传递会导致错误"expected an identifier"
- 为什么日食总是发生错误"An internal error occurred during: "通知选择侦听器". java.lang.StackOverflowError"
- C++中的继承,"...is an ambiguous base of ..."错误
- 为什么我的某些数据成员"expression must be an lvalue or a function designator"收到此错误?
- 代码块在尝试编译或创建新文件时出现"An assertion failed!"错误
- 如何修复"static assertion failed: template argument not an integral type"错误?
- 获取此地址时出现"expression must be an l-value or function designator"错误
- 为什么在使用dynamic_cast和模板时出现错误"A is an inaccessible base of B"?
- 错误"<url> is not recognized as an internal or external command, operable program or batch file.
- "Allocating an object of abstract class type"错误,尽管所有函数都有实现
- gcc 未给出的 Clang 错误"attempted to construct a reference element in a tuple with an rvalue"
- MFC 错误仅在发布模式下"Failed to create an empty document"
- 使用指针时遇到错误"The NTVDM CPU has encountered an illegal instruction"
- Windows 7 下带有 VC++ 的 LPT 错误:"External component has thrown an exception."
- C++错误,显示创建链接列表调用"error LinkedList Interface is an inaccessable base of linkedlist"
- C++ 错误"expected an unqualified id before ')' token"(第 1 行)
- 在 C 枚举中使用单词 "SING" 错误以"expected an identifier"
- 推力变换引发错误:"bulk_kernel_by_value: an illegal memory access was encountered"