CudaMemCpy 在复制 vector<cv::P oint3f 时返回 cudaErrorInvalidValue>

CudaMemCpy returns cudaErrorInvalidValue on copying vector<cv::Point3f>

本文关键字:oint3f gt cudaErrorInvalidValue 返回 cv 复制 vector lt CudaMemCpy      更新时间:2023-10-16

CudaMemCpy在将矢量复制到设备上时返回cudaErrorInvalidValue。我试过给出"&input","&input[0]",…我总是得到同样的错误,但不明白为什么?

你可以使用cudaMemcpy复制一个向量,或者我需要在一个新的数组中复制该向量的内容吗?

void computeDepthChangeMap(unsigned char* depthChangeMap, size_t size, std::vector<cv::Point3f>* input, float dcf, int width, int height)                                           {
    unsigned char* dev_depthChangeMap = 0;
    float* dev_dcf = 0;
    int* dev_wdt = 0;
    int arraySize = size;
    cv::Point3f* dev_input = 0;
    cudaError_t cudaStatus;
    cudaStatus = cudaSetDevice(0);
    cudaStatus = cudaMalloc((void**)&dev_depthChangeMap, size);
    cudaStatus = cudaMalloc((void**)&dev_input, size);
    cudaStatus = cudaMalloc((void**)&dev_dcf, sizeof(float));
    cudaStatus = cudaMalloc((void**)&dev_wdt, sizeof(int));
    cudaStatus = cudaMemcpy(dev_depthChangeMap, depthChangeMap, size, cudaMemcpyHostToDevice);
    cudaStatus = cudaMemcpy(dev_wdt, &width, sizeof(int), cudaMemcpyHostToDevice);
    cudaStatus = cudaMemcpy(dev_dcf, &dcf, sizeof(float), cudaMemcpyHostToDevice);
    cudaStatus = cudaMemcpy(dev_input, &input[0], sizeof(cv::Point3f)*size, cudaMemcpyHostToDevice);
    //cuaStatus returns cudaErrorInvalidValue >> PROBLEM HERE << 
    dim3 threadsPerBlock(8, 8); //init x, y
    dim3 numBlocks(width / threadsPerBlock.x, height / threadsPerBlock.y);
    addKernel <<<numBlocks, threadsPerBlock >>>(dev_depthChangeMap, dev_dcf, dev_input, dev_wdt);

    cudaStatus = cudaGetLastError();   
    cudaStatus = cudaDeviceSynchronize();
    cudaStatus = cudaMemcpy(depthChangeMap, dev_depthChangeMap, size, cudaMemcpyDeviceToHost);
}
__global__ void addKernel(unsigned char* dev_depthChangeMap, float* dcf, cv::Point3f* inp, int* wdt)
{
    register int row_idx = (blockIdx.x * blockDim.x) + threadIdx.x;
    register int col_idx = (blockIdx.y * blockDim.y) + threadIdx.y;
    register int idx = row_idx * (*wdt) + col_idx;
    register float depth = inp[idx].z;
    register float depthR = inp[idx + 1].z;
    register float depthD = inp[idx + *wdt].z;
    //and so on
}

是的,你可以从std::vector复制到cudaMemcpy

你没有正确设置字体大小:

void computeDepthChangeMap(unsigned char* depthChangeMap, size_t size, std::vector<cv::Point3f>* input, float dcf, int width, int height)                                           {
...
cudaStatus = cudaMalloc((void**)&dev_input, size);
                                            ^^^^
cudaStatus = cudaMemcpy(dev_input, &input[0], sizeof(cv::Point3f)*size, cudaMemcpyHostToDevice);
                                                     ^^^^^^^^^^^^^^^^^

这些大小参数都应该在字节。不能将sizeof(cv::Point3f)*size字节的数据复制到size字节的分配中。

此外,您的函数参数似乎是指向向量的指针:

std::vector<cv::Point3f>* input,

根据您所展示的代码,这可能不是您想要的。您可能想要通过值传递向量:

std::vector<cv::Point3f> input,

或者更可能的是,通过引用:

std::vector<cv::Point3f> &input,