从模板中提取原始数据以供CUDA使用

Extracting raw data from template for use in CUDA

本文关键字：CUDA 使用原始数据提取更新时间：2023-10-16

以下代码是来自PCL(点云)库的代码片段。它计算图像的积分和。

template <class DataType, unsigned Dimension> class IntegralImage2D
{
    public:
        static const unsigned dim_fst = Dimension;
        typedef cv::Vec<typename TypeTraits<DataType>::IntegralType, dim_fst> FirstType;
        std::vector<FirstType>  img_fst;
        //.... lots of methods missing here that actually calculate the integral sum
        /** brief Compute the first order sum within a given rectangle
          * param[in] start_x x position of rectangle
          * param[in] start_y y position of rectangle
          * param[in] width width of rectangle
          * param[in] height height of rectangle
          */
        inline FirstType getFirstOrderSum(unsigned start_x, unsigned start_y, unsigned width, unsigned height) const
        {
            const unsigned upper_left_idx  = start_y * (wdt + 1) + start_x;
            const unsigned upper_right_idx = upper_left_idx + width;
            const unsigned lower_left_idx  =(start_y + height) * (wdt + 1) + start_x;
            const unsigned lower_right_idx = lower_left_idx + width;
            return(img_fst[lower_right_idx] + img_fst[upper_left_idx] - img_fst[upper_right_idx] - img_fst[lower_left_idx]);
        }

目前使用以下代码获得结果:

IntegralImage2D<float,3> iim_xyz;
IntegralImage2D<float, 3>::FirstType  fo_elements;
IntegralImage2D<float, 3>::SecondType so_elements;
fo_elements = iim_xyz.getFirstOrderSum(pos_x - rec_wdt_2, pos_y - rec_hgt_2, rec_wdt, rec_hgt);
so_elements = iim_xyz.getSecondOrderSum(pos_x - rec_wdt_2, pos_y - rec_hgt_2, rec_wdt, rec_hgt);

然而，我试图并行化代码(写getFirstOrderSum作为CUDA设备函数)。由于CUDA不识别这些FirstType和SecondType对象(或任何opencv对象)，我正在努力(我是c++新手)从模板中提取原始数据。

如果可能的话，我想将img_fst对象转换为某种向量或数组，我可以在cuda内核上分配。

img_fst的类型似乎是std::vector<cv::Matx<double,3,1>

事实证明，您可以像使用法向量一样传递原始数据。

void computation(ps::IntegralImage2D<float, 3> iim_xyz){
    cv::Vec<double, 3>* d_img_fst = 0;
    cudaErrorCheck(cudaMalloc((void**)&d_img_fst, sizeof(cv::Vec<double, 3>)*(iim_xyz.img_fst.size())));
    cudaErrorCheck(cudaMemcpy(d_img_fst, &iim_xyz.img_fst[0], sizeof(cv::Vec<double, 3>)*(iim_xyz.img_fst.size()), cudaMemcpyHostToDevice));
//..
}
__device__ double* getFirstOrderSum(unsigned start_x, unsigned start_y, unsigned width, unsigned height, int wdt, cv::Vec<double, 3>* img_fst)
{
    const unsigned upper_left_idx = start_y * (wdt + 1) + start_x;
    const unsigned upper_right_idx = upper_left_idx + width;
    const unsigned lower_left_idx = (start_y + height) * (wdt + 1) + start_x;
    const unsigned lower_right_idx = lower_left_idx + width;
    double* result = new double[3];
    result[0] = img_fst[lower_right_idx].val[0] + img_fst[upper_left_idx].val[0] - img_fst[upper_right_idx].val[0] - img_fst[lower_left_idx].val[0];
    result[1] = img_fst[lower_right_idx].val[1] + img_fst[upper_left_idx].val[1] - img_fst[upper_right_idx].val[1] - img_fst[lower_left_idx].val[1];
    result[2] = img_fst[lower_right_idx].val[2] + img_fst[upper_left_idx].val[2] - img_fst[upper_right_idx].val[2] - img_fst[lower_left_idx].val[2];
    return result; //i have to delete this pointer otherwise I will create memory leak
}