在transform_reduce中抛出异常bulk_kernel_by_value
thrust exception bulk_kernel_by_value in transform_reduce
我正在研究一个优化问题,其中包含各种类似于类似形式的数学函数,所以我在FunctionObj
template <typename T>
struct FunctionObj
{
T a;
FunctionObj(): a(1)
{
}
};
并定义一个FuncEval
来求
的值template <typename T>
__host__ __device__ inline T FuncEval(const FunctionObj<T> &f_obj, T x)
{
return f_obj.a+x;
}
我真正想做的是sum {func(x)}
,所以我定义了一个FuncEvalF
函子来利用thrust::tranform_reduce
template <typename T>
struct FuncEvalF
{
const FunctionObj<T>& f_obj;
__host__ __device__ inline FuncEvalF(const FunctionObj<T>& in_f_obj) :f_obj(in_f_obj)
{
}
__host__ __device__ inline T operator()(T x)
{
return FuncEval(f_obj, x);
}
};
template <typename T>
__host__ __device__ inline T BatchFuncEval(const FunctionObj<T> &f_obj, int size, const T *x_in);
template<>
inline float BatchFuncEval< float>(const FunctionObj<float> &f_obj, int size, const float *x_in)
{
return thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(x_in), thrust::device_pointer_cast(x_in + size), FuncEvalF<float>(f_obj), static_cast<float>(0), thrust::plus<float>());
}
最后在main.cu
中我调用transform_reduce
auto func = FuncEvalF<float>(FunctionObj<float>());
float result = 0;
try
{
result = thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(dev_a), thrust::device_pointer_cast(dev_a + 10000), func, static_cast<float>(0), thrust::plus<float>());
}
catch (std::exception e)
{
printf("%s in thurst n ", e.what());
}
这里出现了异常:bulk_kernel_by_value
,即使我将10000改为10。当我将FuncEval
的定义更改为
return x;
程序将输出正确但无意义的答案。我忍不住问我的代码出了什么问题?谢谢您的关注。完整的代码如下,cuda 7.0 sm_20
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <thrust/device_vector.h>
#include <thrust/functional.h>
#include <thrust/inner_product.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/reduce.h>
#include <thrust/execution_policy.h>
#include <thrust/transform_reduce.h>
#include <thrust/transform.h>
#include <stdio.h>
template <typename T>
struct FunctionObj
{
T a;
FunctionObj(): a(1)
{
}
};
template <typename T>
__host__ __device__ inline T FuncEval(const FunctionObj<T> &f_obj, T x)
{
return f_obj.a+x;
}
template <typename T>
struct FuncEvalF
{
const FunctionObj<T>& f_obj;
__host__ __device__ inline FuncEvalF(const FunctionObj<T>& in_f_obj) :f_obj(in_f_obj)
{
}
__host__ __device__ inline T operator()(T x)
{
return FuncEval(f_obj, x);
}
};
template <typename T>
__host__ __device__ inline T BatchFuncEval(const FunctionObj<T> &f_obj, int size, const T *x_in);
template<>
inline float BatchFuncEval< float>(const FunctionObj<float> &f_obj, int size, const float *x_in)
{
return thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(x_in), thrust::device_pointer_cast(x_in + size), FuncEvalF<float>(f_obj), static_cast<float>(0), thrust::plus<float>());
}
int main()
{
cudaError_t cudaE;
float a[10000] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
float* dev_a;
cudaE = cudaMalloc((void**)(&dev_a), sizeof(float) * 10000);
cudaE = cudaMemcpy(dev_a, a, sizeof(float) * 10000, cudaMemcpyHostToDevice);
auto func = FuncEvalF<float>(FunctionObj<float>());
float result = 0;
try
{
result = thrust::transform_reduce(thrust::device, thrust::device_pointer_cast(dev_a), thrust::device_pointer_cast(dev_a + 10000), func, static_cast<float>(0), thrust::plus<float>());
}
catch (std::exception e)
{
printf("%s in thurst n ", e.what());
}
printf("the gpu float result is %fn", result);
cudaFree(dev_a);
}
问题是struct FuncEvalF
中的f_obj
是const FunctionObj<T>&
。
它在主机FunctionObj<float>()
上作为临时实例化,但是对它的引用以后将不再有效。
template <typename T>
struct FuncEvalF
{
FunctionObj<T> f_obj;
....
}
相关文章:
- 使用模板进行堆栈实现; "name followed by :: must be a class or namespace"
- 将 std::thread by 值推送到列表中
- MATLAB to C++: csvread() not supported by MATLAB Coder
- Makefile by ocaml 和 cpp 扩展名
- C++ OpenCV Randu 函数抛出'Integer division by zero'
- 尝试在类中编译内核,出现错误"__init__() got an unexpected keyword argument 'kernel'"
- 如何使用 Eigen::Tensor::convolve with Multiple Kernel?
- 从函数 BY VALUE 返回数组,返回结构时会发生什么?
- 如何在不受其他文件影响的情况下"by itself" Visual Studio 项目中运行C++文件?
- 内核.cpp在制作 kernel.o 时显示错误和 Makefile 错误
- cyttsp4 (linux kernel) Makefile 中的错误
- MacPorts:"Error: clang-4.0 has been replaced by clang-8.0; please install that instead",但我已经安装了clang
- 为什么 c++ 向量没有"delete by index"?
- 这个给定的代码应该将给定的数字转换为尽可能滞后的数字,no.by 用 9.It 替换合适的数字是行不通的
- 传递类 by-value 时,调用方或被调用方是否调用析构函数
- By-ref 参数:这是 std::thread 和 std::bind 之间的不一致吗?
- 转到特定页面后,如何将滑动视图的当前索引设置为选项卡栏"by reference"的当前索引?
- base64 decode with openssl BIO block by block
- 如何在 C++ 中修复"/usr/bin/ld: warning: liblber-2.4.so.2, needed by //usr/lib/x86_64-linux-gnu/libcurl-gnu
- 由mpglib输出的"hip: Can't rewind stream by 74 bits"到底意味着什么?