使用推力的 ODE 求解器的 CUDA 编程

CUDA programming for ODE solver using thrust

本文关键字:CUDA 编程 ODE      更新时间:2023-10-16

我正在尝试使用推力在 CUDA 中用 6 个变量求解 ODE。我的程序在这里。

#include <iostream>
#include <cmath>
#include <utility>
#include <cstdlib>

#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/functional.h>
#include <boost/numeric/odeint.hpp>
#include <boost/numeric/odeint/external/thrust/thrust_algebra.hpp>
#include <boost/numeric/odeint/external/thrust/thrust_operations.hpp>
#include <boost/numeric/odeint/external/thrust/thrust_resize.hpp>

using namespace std;
using namespace boost::numeric::odeint;

typedef double value_type;
typedef thrust::device_vector< value_type > state_type;
const value_type n1 = 10.0;

struct Goodwin_system
{
struct Goodwin_functor
{
template< class T >
__host__ __device__
void operator()( T t ) const
{
value_type x1 = thrust::get< 0 >( t );
value_type x2 = thrust::get< 1 >( t );
value_type x3 = thrust::get< 2 >( t );
value_type x4 = thrust::get< 3 >( t );
value_type x5 = thrust::get< 4 >( t );
value_type x6 = thrust::get< 5 >( t );
value_type a  = thrust::get< 6 >( t );// For differnt values of a we will get different ODE
thrust::get< 7 >( t ) = a * (77.3*(pow(0.001,n1)/(pow(0.001,n1) + pow(x3,n1))) - x1);
thrust::get< 8 >( t ) = a * (x1-x2);
thrust::get< 9 >( t ) = a * (x2-x3);
thrust::get< 10 >( t ) = a * (x3-x4);
thrust::get< 11 >( t ) = a * (x4-x5);
thrust::get< 12 >( t ) = a * (x5-x6);
}
};
Goodwin_system( size_t N , const state_type &aa ) // aa is for different values of the parameter a
: m_N( N ) , m_aa( aa ) { }
template< class State , class Deriv >
void operator()(  const State &x , Deriv &dxdt , value_type t ) const
{
thrust::for_each(
thrust::make_zip_iterator( thrust::make_tuple(
boost::begin( x ) ,
boost::begin( x ) + m_N ,
boost::begin( x ) + 2 * m_N ,
boost::begin( x ) + 3 * m_N ,
boost::begin( x ) + 4 * m_N ,
boost::begin( x ) + 5 * m_N ,
m_aa.begin() ,
boost::begin( dxdt ) ,
boost::begin( dxdt ) + m_N ,
boost::begin( dxdt ) + 2 * m_N,
boost::begin( dxdt ) + 3 * m_N,
boost::begin( dxdt ) + 4 * m_N,
boost::begin( dxdt ) + 5 * m_N ) ) ,
thrust::make_zip_iterator( thrust::make_tuple(
boost::begin( x ) + m_N ,
boost::begin( x ) + 2 * m_N ,
boost::begin( x ) + 3 * m_N ,
boost::begin( x ) + 4 * m_N ,
boost::begin( x ) + 5 * m_N ,
boost::begin( x ) + 6 * m_N ,
m_aa.end() ,
boost::begin( dxdt ) + m_N ,
boost::begin( dxdt ) + 2 * m_N ,
boost::begin( dxdt ) + 3 * m_N,
boost::begin( dxdt ) + 4 * m_N,
boost::begin( dxdt ) + 5 * m_N,
boost::begin( dxdt ) + 6 * m_N)) ,
Goodwin_functor() );
}
size_t m_N;
const state_type &m_aa;
};

size_t N;
void write_ans( const state_type &x , const double t )// For writing the    results
{
cout<<t<<"t";
for( size_t i=0 ; i<6*N ; ++i ) 
{
cout<<x[i]<<"t";
}
cout<<endl;
}
const value_type dt = 0.1;
const value_type t_max = 1000.0;
int main( int argc , char* argv[] )
{

N = argc > 1 ? atoi(argv[1]) : 1000;// for 1000 oscillator
vector< value_type > aa_host( N );
const value_type aa_min = value_type(0.01);
for( size_t i=0 ; i<N ; ++i )
aa_host[i] =(i+1)*aa_min;// Generate differnt a values for each iteration
state_type aa = aa_host;
//[ thrust_Goodwin_parameters_integration
state_type x( 6 * N );
// initialize x,y,z
thrust::fill( x.begin() , x.end() , value_type(0.2) );

typedef runge_kutta4< state_type , value_type , state_type , value_type ,
thrust_algebra , thrust_operations > stepper_type;

Goodwin_system Goodwin(N , aa);

integrate_const( stepper_type() , Goodwin , x , value_type(0.1) , t_max , dt, write_ans);
return 0;
}

当我尝试编译它时,显示错误

"错误:重载函数"thrust::make_tuple"的实例与参数列表不匹配">

我可以用 4 个变量解决 ODE 而没有任何错误。是否仅元组最多支持 10

个元素?解决这个问题的方法是什么?

正如文档中明确定义的那样,thrust::tuple是静态模板化的,最多 10 个条目。除了使用更多条目重新实现您自己的版本之外,这是类的一个不可协商的限制。