CUDA:访问违规读取位置

CUDA: Access violation reading location

本文关键字:读取 位置 访问 CUDA      更新时间:2023-10-16

我是 CUDA 的新手,当我遇到访问冲突读取位置运行时异常时,我正在尝试制作一个简单的程序来模糊.tga文件。因为我对 CUDA 很陌生,我不知道如何解决它,谷歌也不是很有帮助。代码如下:

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <math.h>
#include <time.h>
#define println(...) std::cout << __VA_ARGS__ << "n";
#define WIDTH 1920
#define HEIGHT 1080
#define BLUR 5
unsigned char in[HEIGHT][WIDTH][3];
unsigned char out[HEIGHT][WIDTH][3];
unsigned char header[18];
void read(char input[256]) {
    FILE* f;
    f = fopen(input, "rb");
    if (!f) {
        printf("File Reading Failedn");
    }
    fread(&header, 1, 18, f);
    fread(&in, 1, HEIGHT*WIDTH * 3, f);
    fclose(f);
}
void write(char output[256]) {
    FILE* fw;
    fw = fopen(output, "wb+");
    if (!fw) {
        printf("File Writing Failedn");
    }
    header[16] = 24;
    header[13] = WIDTH / 256;
    header[12] = WIDTH % 256;
    header[15] = HEIGHT / 256;
    header[14] = HEIGHT % 256;
    fwrite(&header, 1, 18, fw);
    fwrite(&out, 1, HEIGHT*WIDTH * 3, fw);
    fclose(fw);
}
__device__
int toIndex(int x, int y) {
    return x + y / WIDTH;
}
__device__
void doPixel(int x, int y, char *red, char *green, char *blue) {
    int avgRed = 0;
    int avgGreen = 0;
    int avgBlue = 0;
    for (int i = -BLUR; i <= BLUR; i++) {
        for (int j = -BLUR; j <= BLUR; j++) {
            avgRed += red[toIndex(i, j)];
            avgBlue += blue[toIndex(i, j)];
            avgGreen += green[toIndex(i, j)];
        }
    }
    red[toIndex(x, y)] = avgRed / (BLUR*BLUR);
    green[toIndex(x, y)] = avgGreen / (BLUR*BLUR);
    blue[toIndex(x, y)] = avgBlue / (BLUR*BLUR);
}
__global__
void setValue(char *red, char *green, char *blue) {
    int x;
    int y;
    for (int i = threadIdx.x; i < WIDTH * HEIGHT; i += 1024) {
        x = i % WIDTH;
        y = i / WIDTH;
        doPixel(x, y, red, green, blue);
    }
}
int main(void) {
    char *red, *green, *blue;
    double time;
    read("test.tga");
    cudaMallocManaged(&red, WIDTH * HEIGHT);
    cudaMallocManaged(&green, WIDTH * HEIGHT);
    cudaMallocManaged(&blue, WIDTH * HEIGHT);
    for (int x = 0; x < WIDTH; x++) {
        for (int y = 0; y < HEIGHT; y++) {
            red[x + y*WIDTH] = in[y][x][2];
            green[x + y*WIDTH] = in[y][x][1];
            blue[x + y*WIDTH] = in[y][x][0];
        }
    }
    time = clock();
    setValue<<<1, 1024>>>(red, green, blue);
    cudaDeviceSynchronize();
    println((clock() - time) / CLOCKS_PER_SEC);
    int x;
    int y;
    for (int i = 0; i < WIDTH * HEIGHT; i++) {
        x = i % WIDTH;
        y = i / WIDTH;
        out[y][x][0] = blue[i];      //Program gives error here
        out[y][x][1] = green[i];
        out[y][x][2] = red[i];
    }
    write("test.tga");
    cudaFree(red);
    cudaFree(green);
    cudaFree(blue);
    getchar();
}

我读到cudaDeviceSynchronize()是解决此问题的方法,但这似乎不起作用。 cudaThreadSynchronize() 也不能解决这个问题。

查找非法内存访问错误的最简单方法是使用 cuda-gdb 运行二进制文件。确保在编译时提供-g -G -O0标志

在您的情况下,您可能会在此代码段中发现一些错误

for (int i = -BLUR; i <= BLUR; i++) {
    for (int j = -BLUR; j <= BLUR; j++) {
        avgRed   += red[toIndex(i, j)];
        avgBlue  += blue[toIndex(i, j)];
        avgGreen += green[toIndex(i, j)];
    }
}