如何在设备上运行推力::count_if?(库达)
How is it possible to run thrust::count_if on device? ( Cuda )
我想实现RANSAC。我生成 60k 个点和 500 个平面,我想计算每个平面,它们附近有多少个点。然后选择具有最大值的那个。
在我生成向量(d_vec
)和平面(d_pl
)并将它们传输到GPU后,我使用thrust::transform
和内部的thrust:count_if
来计算接近点的数量。
不幸的是,我收到此错误:
1>D:ProjectscudaCudaTestCudaTest>"C:Program FilesNVIDIA GPU Computing ToolkitCUDAv9.0binnvcc.exe" -gencode=arch=compute_30,code="sm_30,compute_30" --use-local-env --cl-version 2015 -ccbin "C:Program Files (x86)Microsoft Visual Studio 14.0VCbinx86_amd64" -x cu -I"C:Program FilesNVIDIA GPU Computing ToolkitCUDAv9.0include" -I"C:Program FilesNVIDIA GPU Computing ToolkitCUDAv9.0include" --keep-dir x64Release -maxrregcount=0 --machine 64 --compile -cudart static -DWIN32 -DWIN64 -DNDEBUG -D_CONSOLE -D_MBCS -Xcompiler "/EHsc /W3 /nologo /O2 /FS /Zi /MD " -o x64Releasekernel.cu.obj "D:ProjectscudaCudaTestCudaTestkernel.cu"
1>C:Program FilesNVIDIA GPU Computing ToolkitCUDAv9.0includethrust/detail/type_traits/pointer_traits.h(201): error : calling a __host__ function("thrust::detail::vector_base< ::Vec3, ::thrust::device_malloc_allocator< ::Vec3> > ::begin") from a __device__ function("thrust::cuda_cub::__transform::unary_transform_f< ::thrust::detail::normal_iterator< ::thrust::device_ptr< ::Plane> > , ::thrust::detail::normal_iterator< ::thrust::device_ptr<int> > , ::thrust::cuda_cub::__transform::no_stencil_tag, ::plane_functor, ::thrust::cuda_cub::__transform::always_true_predicate> ::operator ()<long long> ") is not allowed
1>C:Program FilesNVIDIA GPU Computing ToolkitCUDAv9.0includethrust/detail/type_traits/pointer_traits.h(201): error : identifier "thrust::detail::vector_base< ::Vec3, ::thrust::device_malloc_allocator< ::Vec3> > ::begin" is undefined in device code
1>D:/Projects/cuda/CudaTest/CudaTest/kernel.cu(84): error : calling a __host__ function("thrust::detail::vector_base< ::Vec3, ::thrust::device_malloc_allocator< ::Vec3> > ::end") from a __device__ function("thrust::cuda_cub::__transform::unary_transform_f< ::thrust::detail::normal_iterator< ::thrust::device_ptr< ::Plane> > , ::thrust::detail::normal_iterator< ::thrust::device_ptr<int> > , ::thrust::cuda_cub::__transform::no_stencil_tag, ::plane_functor, ::thrust::cuda_cub::__transform::always_true_predicate> ::operator ()<long long> ") is not allowed
1>D:/Projects/cuda/CudaTest/CudaTest/kernel.cu(84): error : identifier "thrust::detail::vector_base< ::Vec3, ::thrust::device_malloc_allocator< ::Vec3> > ::end" is undefined in device code
如何从设备代码调用 thrust::count_if?我做错了什么? 这是完整的代码:
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/generate.h>
#include <thrust/sort.h>
#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <algorithm>
#include <iostream>
#include <cstdlib>
#include <time.h>
#include <thrust/count.h>
#include <thrust/extrema.h>
struct Vec3 {
float x;
float y;
float z;
friend std::ostream& operator<<(std::ostream& os, const Vec3& dt);
};
std::ostream& operator<<(std::ostream& os, const Vec3& dt)
{
os << dt.x << ", " << dt.y << ", " << dt.z;
return os;
}
struct Plane {
float a;
float b;
float c;
float d;
// https://keisan.casio.com/exec/system/1223596129
static Plane FromPoints(Vec3 A, Vec3 B, Vec3 C) {
Plane ret;
ret.a = (B.y - A.y)*(C.z - A.z) - (C.y - A.y)*(B.z - A.z);
ret.b = (B.z - A.z)*(C.x - A.x) - (C.z - A.z)*(B.x - A.x);
ret.c = (B.x - A.x)*(C.y - A.y) - (C.x - A.x)*(B.y - A.y);
ret.d = -(ret.a*A.x + ret.b*A.y + ret.c*A.z);
return ret;
}
};
Vec3 generator() {
return {
float(rand()) / float(RAND_MAX) * 1000.f,
float(rand()) / float(RAND_MAX) * 1000.f,
float(rand()) / float(RAND_MAX) * 1000.f
};
}
int index_generator() {
return rand() % 69632;
}
struct plane_distance {
const Plane pl;
__device__ plane_distance(const Plane pl) : pl(pl) {}
__device__ bool operator()(const Vec3& vv) const {
return fabsf(pl.a*vv.x + pl.b*vv.y + pl.c*vv.z + pl.d) / sqrtf(pl.a*pl.a + pl.b*pl.b + pl.c*pl.c) > 0.128f;
}
};
struct plane_functor
{
thrust::device_vector<Vec3>& d_vec;
plane_functor(thrust::device_vector<Vec3>& d_vec) : d_vec(d_vec) {}
__device__ int operator()(const Plane& pl) const {
return thrust::count_if(thrust::device, d_vec.begin(), d_vec.end(), plane_distance(pl));
}
};
int main(void)
{
// Generate random points for testing
std::cout << "Generating..." << std::endl;
// generate random vectors serially
thrust::host_vector<Vec3> h_vec(65536);
std::generate(h_vec.begin(), h_vec.end(), generator);
// Generate random planes
thrust::host_vector<Plane> h_pl(512);
std::generate(h_pl.begin(), h_pl.end(), [&h_vec]() {
return Plane::FromPoints(
h_vec[index_generator()],
h_vec[index_generator()],
h_vec[index_generator()]
);
});
std::cout << "Transfer" << std::endl;
// transfer data to the device
thrust::device_vector<Vec3> d_vec = h_vec;
thrust::device_vector<Plane> d_pl = h_pl;
thrust::device_vector<int> counts(512);
std::cout << "Searching" << std::endl;
thrust::transform(thrust::device, d_pl.begin(), d_pl.end(), counts.begin(), plane_functor(d_vec));
auto result = thrust::max_element(thrust::device, counts.begin(), counts.end());
std::cout << "Press any key to exit" << std::endl;
std::cin.get();
return 0;
}
如注释中所述,在设备代码中访问device_vector
是非法的。它们(尽管名称)是撰写本文时所有可用的 Thrust 版本中的主机端抽象。您收到错误是因为您的函子正在设备代码中调用device_vector的副本构造,这需要构造新容器,并且将调用内存分配并且无法编译。
您应该能够使用原始设备指针来使其工作,因此如下所示:
struct plane_functor
{
Vec3* d_vec0;
Vec3* d_vec1;
__host__ __device__ plane_functor(Vec3* d_vec0, Vec3* d_vec1) : d_vec0(d_vec0), d_vec1(d_vec1) {}
__device__ int operator()(const Plane& pl) const {
return thrust::count_if(thrust::device, d_vec0, d_vec1, plane_distance(pl));
}
};
// ....
Vec3* d_vec0 = thrust::raw_pointer_cast(d_vec.data());
Vec3* d_vec1 = d_vec0 + (d_vec.end() - d_vec.begin());
thrust::transform(d_pl.begin(), d_pl.end(), counts.begin(), plane_functor( d_vec0, d_vec1 ) );
请注意,虽然这为我编译,但我无法运行您的代码,因为当我尝试运行它时主机端初始化 lambda 会爆炸。还要密切注意混合使用标签和基于策略的执行。由于device_vector
迭代器和thrust::device
的组合,即使使用有效的函子,thrust::transform
调用也会失败。
相关文章:
- 我的简单if-else语句是如何无法访问的代码
- 如何将enable-if与模板参数和参数包一起使用
- 无论条件是否为true,if总是在c++中执行
- Arduino:for/while/if在void setup()或void loop()之前?——错误:之前需要不合格
- Insert函数不适用于2 if语句C++
- If语句未被求值C++
- C++嵌套if语句,基本货币交换
- 多个If语句与使用逻辑运算符计算条件的单个语句的比较
- 是否可以使用if constexpr删除控制流语句
- 要与"if constexpr"一起使用的编译时消息(在预处理器之后)
- 如何删除peer if else分支中的冗长句子
- 我似乎对if/else的基本语句有问题:/
- if数组上的随机数
- 将按位if条件转换为普通if条件
- If语句在c++中被忽略
- 比较if语句中的数组值和int值
- 使用if-else将数字转换为单词
- 为什么简单的算术减法在"if"条件下不起作用?
- 以在Qt中的IF语句中设置时间延迟
- 访问可能不存在的const vector成员- try/catch或if (count != 0)