clequeuewritebuffer将错误的数据写入VRAM
clEnqueueWriteBuffer writes wrong data into VRAM
我有一个非常奇怪的问题与clEnqueueWriteBuffer。在我目前的项目中,我想复制~500张图像(1GB)到图形卡上,并平均一些像素。图像存储在一个大的双*数组(大小:宽度*高度*nImages)。如果我将300个图像复制到VRAM中并使用clEnqueueReadBuffer读取它,我就会得到我在RAM中存储的内容:
RAM: 14450、5006076793 14450、5006076793 14456、8079379383 14455、2294939826 14444、7361060619
VRAM: 14450,5006076793 14450,5006076793 14456,8079379383 14455,2294939826 14444,7361060619
但是,如果加载超过350张图像,cl_mem对象的内容就会损坏:
RAM:14450、5006076793 14450、5006076793 14456、8079379383 14455、2294939826 14444、7361060619
VRAM:- 6,27743856220419e +66 - 6,27743856220419e +66 - 6,27743856220419e +66 - 6,27743856220419e +66 - 6,27743856220419e +66 - 6,27743856220419e +66 - 6,27743856220419e +66
如果你能帮我的话,我会很高兴的。下面是我的代码:private: System::Void button7_Click(System::Object^ sender, System::EventArgs^ e) {
std::string text;
text = StringConvA(maskedTextBox1->Text);
textBox1->Text += "You want a bin size of " + atoi(text.c_str()) + ". You have "+ nforegroundImages+" images.rn";
binWidth = atoi(text.c_str());
nbins = (int)ceil((double)nforegroundImages / (double)binWidth);
textBox1->Text += "That is going to give you "+nbins+" binsrn";
//create context and cmd_queue
context = clCreateContext(NULL, nDevices, &deviceID[0], NULL, NULL, &err);
cmd_queue = clCreateCommandQueue(context, deviceID[0], NULL, &err);
//allocate result memory
//each result image will have width*height double entries. res_im is an array of pointer to double.
res_im = (double*)malloc(width*height*sizeof(double)*nbins);
cl_mem imageData_mem, result_mem, nWavenumber_mem, binSize_mem, imageSizeInPixels_mem, nbins_mem;
imageData_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, width * height * sizeof(double)*nforegroundImages, NULL, NULL);
result_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, width * height * sizeof(double)*nbins, NULL, NULL);
nWavenumber_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
binSize_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
imageSizeInPixels_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
nbins_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
clFinish(cmd_queue);
int imageSizeInPixels = width*height;
err = clEnqueueWriteBuffer(cmd_queue, imageData_mem, CL_TRUE, 0, width*height*sizeof(double)*nforegroundImages, (void*)images, 0, NULL, NULL); //this is where the images are copied into VRAM. If nforegroundImages>300, the data in VRAM is wrong, otherwise it is the same as in the images array
err = clEnqueueWriteBuffer(cmd_queue, nWavenumber_mem, CL_TRUE, 0, sizeof(int), (void*)&nforegroundImages, 0, NULL, NULL);
err = clEnqueueWriteBuffer(cmd_queue, binSize_mem, CL_TRUE, 0, sizeof(int), (void*)&binWidth, 0, NULL, NULL);
err = clEnqueueWriteBuffer(cmd_queue, imageSizeInPixels_mem, CL_TRUE, 0, sizeof(int), (void*)&imageSizeInPixels, 0, NULL, NULL);
err = clEnqueueWriteBuffer(cmd_queue, nbins_mem, CL_TRUE, 0, sizeof(int), (void*)&nbins, 0, NULL, NULL);
clFinish(cmd_queue);
//read the content of imageData_mem and store it in test array
double * test = (double*)malloc(width*height*sizeof(double)*nforegroundImages);
err = clEnqueueReadBuffer(cmd_queue, imageData_mem, CL_TRUE, 0, width*height*sizeof(double)*nforegroundImages,
test, 0, NULL, NULL);
clFinish(cmd_queue);
//compare original value from the images array to the value retrieved from the VRAM
textBox1->Text += images[1] + "t" + images[1] + "t" + images[10] + "t" + images[100] + "t" + images[1000] + "trn"; //original data
textBox1->Text += test[1] + "t" + test[1] + "t" + test[10] + "t" + test[100] + "t" + test[1000] + "trn"; //retrieved from imageData_mem
free(test);
//build the program from the source file and print the program build log
cl_program program[2];
cl_kernel kernel[2];
const char * filename = "addKernel.c";
char *program_source = load_program_source(filename);
program[0] = clCreateProgramWithSource(context, 1, (const char**)&program_source,
NULL, &err);
if (err == CL_OUT_OF_HOST_MEMORY){
textBox1->Text += "Error: out of Host Memory!rn";
}
else if (err == CL_INVALID_CONTEXT){
textBox1->Text += "Error: invalid Context!rn";
}
else if (err == CL_INVALID_VALUE){
textBox1->Text += "Error: invalid Value!rn";
}
err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
textBox1->Text += "Program build error: " + err + "rn";
cl_build_status status;
size_t logSize;
clGetProgramBuildInfo(program[0], deviceID[0], CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL);
clGetProgramBuildInfo(program[0], deviceID[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
char* programLog;
programLog = (char*)calloc(logSize + 1, sizeof(char));
clGetProgramBuildInfo(program[0], deviceID[0], CL_PROGRAM_BUILD_LOG, logSize + 1, programLog, NULL);
this->textBox1->Text += "Program build info: error=" + err + ", status=" + status + ", programLog:rn" + *programLog + "rn" + "In case of an error please make sure that openCL has been initializedrn";
kernel[0] = clCreateKernel(program[0], "filterSpectrum", &err);
//(__global double *imageData, __global double *result, __constant int *nWavenumbers, __constant int *binSize, __constant int *imageSizeInPixels,__constant int * nbins)
// Now setup the arguments to our kernel
err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), &imageData_mem);
err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), &result_mem);
err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), &nWavenumber_mem);
err |= clSetKernelArg(kernel[0], 3, sizeof(cl_mem), &binSize_mem);
err |= clSetKernelArg(kernel[0], 4, sizeof(cl_mem), &imageSizeInPixels_mem);
err |= clSetKernelArg(kernel[0], 5, sizeof(cl_mem), &nbins_mem);
size_t local_work_size = 32;
// Run the calculation by enqueuing it and forcing the
// command queue to complete the task
size_t global_work_size = width*height;
err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1, NULL,&global_work_size, &local_work_size, 0, NULL, NULL);
clFinish(cmd_queue);
// Once finished read back the results from the answer
// array into the results array
err = clEnqueueReadBuffer(cmd_queue, result_mem, CL_TRUE, 0, width*height*sizeof(double)*nbins,
res_im, 0, NULL, NULL);
clFinish(cmd_queue);
textBox1->Text += "result values " + res_im[1] + "t" + res_im[100] + "t" + res_im[1000] + "t" + res_im[10000] + "t" + res_im[100000] + "t" + res_im[1000000] + "rn";
hScrollBar2->Maximum = nbins+3;
clReleaseMemObject(imageSizeInPixels_mem);
clReleaseMemObject(imageData_mem);
clReleaseMemObject(result_mem);
clReleaseMemObject(nWavenumber_mem);
clReleaseMemObject(binSize_mem);
clReleaseMemObject(nbins_mem);
clReleaseCommandQueue(cmd_queue);
clReleaseContext(context);
}
您请求的内存很可能超过驱动程序在单个分配中允许的内存。看起来你没有检查OpenCL运行时函数返回的大多数错误代码;这样做使得更容易诊断OpenCL程序的问题。你真的应该为每个 API调用都这样做。
你可以用下面的代码片段找出你的设备支持的最大的单个内存分配是多少:
cl_ulong maxMemAlloc;
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &maxMemAlloc, NULL);
textBox1->Text += "Maximum memory allocation size is " + maxMemAlloc + " bytesrn";
通常情况下,最大的内存分配远远小于GPU内存的总大小。OpenCL规范只要求它至少是最大大小的1/4,或者至少128 MB。
- 防止主数据类型C++的隐式转换
- 用于访问容器<T>数据成员的正确 API
- 嵌套在类中时无法设置成员数据
- 使用流处理接收到的数据
- 静态数据成员的问题-修复链接错误会导致编译器错误
- 处理小于cpu数据总线的数据类型.(c++转换为机器代码)
- 在cuda线程之间共享大量常量数据
- C++将文本文件中的数据读取到结构数组中
- 如何在C++中序列化结构数据
- 在C++中打印指向不同基元数据类型的指针的内存地址
- 通过套接字[TCP]传输数据 如何在C / C ++中打包多个整数并使用send() recv()传输数据
- 在c代码之间共享数据的最佳方式
- 链表,反向函数,数据结构
- 数据成员SFINAE的C++17测试:gcc vs clang
- C++浮点数据类型和字符串数据类型无法子到模板函数中
- 如何对点云数据进行排序
- 从矢量<无符号字符>转换为字符* 包括垃圾数据
- 尝试通过OCI例程从Oracle获取blob数据,但出现错误:ORA-01008:并非所有变量都绑定
- 读取VRAM数据
- clequeuewritebuffer将错误的数据写入VRAM