clequeuewritebuffer将错误的数据写入VRAM

clEnqueueWriteBuffer writes wrong data into VRAM

本文关键字:VRAM 数据 错误 clequeuewritebuffer      更新时间:2023-10-16

我有一个非常奇怪的问题与clEnqueueWriteBuffer。在我目前的项目中,我想复制~500张图像(1GB)到图形卡上,并平均一些像素。图像存储在一个大的双*数组(大小:宽度*高度*nImages)。如果我将300个图像复制到VRAM中并使用clEnqueueReadBuffer读取它,我就会得到我在RAM中存储的内容:

RAM: 14450、5006076793 14450、5006076793 14456、8079379383 14455、2294939826 14444、7361060619

VRAM: 14450,5006076793 14450,5006076793 14456,8079379383 14455,2294939826 14444,7361060619

但是,如果加载超过350张图像,cl_mem对象的内容就会损坏:

RAM:14450、5006076793 14450、5006076793 14456、8079379383 14455、2294939826 14444、7361060619

VRAM:- 6,27743856220419e +66 - 6,27743856220419e +66 - 6,27743856220419e +66 - 6,27743856220419e +66 - 6,27743856220419e +66 - 6,27743856220419e +66 - 6,27743856220419e +66

如果你能帮我的话,我会很高兴的。下面是我的代码:
private: System::Void button7_Click(System::Object^  sender, System::EventArgs^  e) {
         std::string text;
         text = StringConvA(maskedTextBox1->Text);
         textBox1->Text += "You want a bin size of " + atoi(text.c_str()) + ". You have "+ nforegroundImages+" images.rn";
         binWidth = atoi(text.c_str());
         nbins = (int)ceil((double)nforegroundImages / (double)binWidth);
         textBox1->Text += "That is going to give you "+nbins+" binsrn";
         //create context and cmd_queue
         context = clCreateContext(NULL, nDevices, &deviceID[0], NULL, NULL, &err);
         cmd_queue = clCreateCommandQueue(context, deviceID[0], NULL, &err);

         //allocate result memory
         //each result image will have width*height double entries. res_im is an array of pointer to double.

         res_im = (double*)malloc(width*height*sizeof(double)*nbins);

         cl_mem imageData_mem, result_mem, nWavenumber_mem, binSize_mem, imageSizeInPixels_mem, nbins_mem;
         imageData_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, width * height * sizeof(double)*nforegroundImages, NULL, NULL);
         result_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, width * height * sizeof(double)*nbins, NULL, NULL);
         nWavenumber_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
         binSize_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
         imageSizeInPixels_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
         nbins_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int), NULL, NULL);
         clFinish(cmd_queue);
         int imageSizeInPixels = width*height;
         err = clEnqueueWriteBuffer(cmd_queue, imageData_mem, CL_TRUE, 0, width*height*sizeof(double)*nforegroundImages, (void*)images, 0, NULL, NULL); //this is where the images are copied into VRAM. If nforegroundImages>300, the data in VRAM is wrong, otherwise it is the same as in the images array
         err = clEnqueueWriteBuffer(cmd_queue, nWavenumber_mem, CL_TRUE, 0, sizeof(int), (void*)&nforegroundImages, 0, NULL, NULL);
         err = clEnqueueWriteBuffer(cmd_queue, binSize_mem, CL_TRUE, 0, sizeof(int), (void*)&binWidth, 0, NULL, NULL);
         err = clEnqueueWriteBuffer(cmd_queue, imageSizeInPixels_mem, CL_TRUE, 0, sizeof(int), (void*)&imageSizeInPixels, 0, NULL, NULL);
         err = clEnqueueWriteBuffer(cmd_queue, nbins_mem, CL_TRUE, 0, sizeof(int), (void*)&nbins, 0, NULL, NULL);
         clFinish(cmd_queue);
         //read the content of imageData_mem and store it in test array
         double * test = (double*)malloc(width*height*sizeof(double)*nforegroundImages);
         err = clEnqueueReadBuffer(cmd_queue, imageData_mem, CL_TRUE, 0, width*height*sizeof(double)*nforegroundImages,
             test, 0, NULL, NULL);
         clFinish(cmd_queue);
         //compare original value from the images array to the value retrieved from the VRAM
         textBox1->Text += images[1] + "t" + images[1] + "t" + images[10] + "t" + images[100] + "t" + images[1000] + "trn"; //original data
         textBox1->Text += test[1] + "t" + test[1] + "t" + test[10] + "t" + test[100] + "t" + test[1000] + "trn"; //retrieved from imageData_mem
         free(test);
         //build the program from the source file and print the program build log
         cl_program program[2];
         cl_kernel kernel[2];
         const char * filename = "addKernel.c";
         char *program_source = load_program_source(filename);
         program[0] = clCreateProgramWithSource(context, 1, (const char**)&program_source,
             NULL, &err);
         if (err == CL_OUT_OF_HOST_MEMORY){
             textBox1->Text += "Error: out of Host Memory!rn";
         }
         else if (err == CL_INVALID_CONTEXT){
             textBox1->Text += "Error: invalid Context!rn";
         }
         else if (err == CL_INVALID_VALUE){
             textBox1->Text += "Error: invalid Value!rn";
         }

         err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
         textBox1->Text += "Program build error: " + err + "rn";
         cl_build_status status;
         size_t logSize;
         clGetProgramBuildInfo(program[0], deviceID[0], CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status), &status, NULL);
         clGetProgramBuildInfo(program[0], deviceID[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
         char* programLog;
         programLog = (char*)calloc(logSize + 1, sizeof(char));
         clGetProgramBuildInfo(program[0], deviceID[0], CL_PROGRAM_BUILD_LOG, logSize + 1, programLog, NULL);
         this->textBox1->Text += "Program build info: error=" + err + ", status=" + status + ", programLog:rn" + *programLog + "rn" + "In case of an error please make sure that openCL has been initializedrn";
         kernel[0] = clCreateKernel(program[0], "filterSpectrum", &err);
         //(__global double *imageData, __global double *result, __constant int *nWavenumbers, __constant int *binSize, __constant int *imageSizeInPixels,__constant int * nbins)
         // Now setup the arguments to our kernel
         err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), &imageData_mem);
         err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), &result_mem);
         err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), &nWavenumber_mem);
         err |= clSetKernelArg(kernel[0], 3, sizeof(cl_mem), &binSize_mem);
         err |= clSetKernelArg(kernel[0], 4, sizeof(cl_mem), &imageSizeInPixels_mem);
         err |= clSetKernelArg(kernel[0], 5, sizeof(cl_mem), &nbins_mem);
         size_t local_work_size = 32;
         // Run the calculation by enqueuing it and forcing the 
         // command queue to complete the task
         size_t global_work_size = width*height;
         err = clEnqueueNDRangeKernel(cmd_queue, kernel[0], 1, NULL,&global_work_size, &local_work_size, 0, NULL, NULL);
         clFinish(cmd_queue);
         // Once finished read back the results from the answer 
         // array into the results array
         err = clEnqueueReadBuffer(cmd_queue, result_mem, CL_TRUE, 0, width*height*sizeof(double)*nbins,
             res_im, 0, NULL, NULL);

         clFinish(cmd_queue);
         textBox1->Text += "result values " + res_im[1] + "t" + res_im[100] + "t" + res_im[1000] + "t" + res_im[10000] + "t" + res_im[100000] + "t" + res_im[1000000] + "rn";
         hScrollBar2->Maximum = nbins+3;
         clReleaseMemObject(imageSizeInPixels_mem);
         clReleaseMemObject(imageData_mem);
         clReleaseMemObject(result_mem);
         clReleaseMemObject(nWavenumber_mem);
         clReleaseMemObject(binSize_mem);
         clReleaseMemObject(nbins_mem);
         clReleaseCommandQueue(cmd_queue);
         clReleaseContext(context);

}

您请求的内存很可能超过驱动程序在单个分配中允许的内存。看起来你没有检查OpenCL运行时函数返回的大多数错误代码;这样做使得更容易诊断OpenCL程序的问题。你真的应该为每个 API调用都这样做。

你可以用下面的代码片段找出你的设备支持的最大的单个内存分配是多少:

cl_ulong maxMemAlloc;
clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &maxMemAlloc, NULL);
textBox1->Text += "Maximum memory allocation size is " + maxMemAlloc + " bytesrn";

通常情况下,最大的内存分配远远小于GPU内存的总大小。OpenCL规范只要求它至少是最大大小的1/4,或者至少128 MB。