如何通过Boost MPI使用c++ STL矢量发送矩阵的列

How to send columns of a matrix via Boost MPI using C++ STL vectors?

本文关键字：Boost 何通过 MPI 使用 STL c++ 更新时间：2023-10-16

我想发送一个矩阵的多列存储为STL矢量形式

    vector < vector < double > > A ( 10, vector <double> (10));

而不将内容复制到某个缓冲区(因为这里的计算时间至关重要)。

我发现了如何用MPI来做到这一点。以下是示例代码如何将10 × 10矩阵的第4、第5和第6列从一个进程(秩==0)发送到另一个进程(秩==1)。(即使我不知道为什么我必须在MPI_Typ_vector的第三个参数中添加'2'。有谁知道为什么吗?

    int rank, size;
    MPI_Init (&argc, &argv);        /* starts MPI */
    MPI_Comm_rank (MPI_COMM_WORLD, &rank);  /* get current process id */
    MPI_Comm_size (MPI_COMM_WORLD, &size);  /* get number of processes */
    // fill matrices
    vector< vector <float> >A(10, vector <float> (10));
    vector< vector <float> >A_copy(10, vector <float> (10));
    for (int i=0; i!=10; i++)
    {
            for (int j=0; j!=10; j++)
            {
                    A[i][j]=j+ i*10;
                    A_copy[i][j]=0.0;
            }
    }
    int dest=1;
    int tag=1;
    // define new type = two columns
    MPI_Datatype    newtype;
    MPI_Type_vector(10,     /* # column elements */
    3,                      /* 3 column only */
    10+2,                   /* skip 10 elements */
    MPI_FLOAT,              /* elements are float */
    &newtype);              /* MPI derived datatype */
    MPI_Type_commit(&newtype);
    if (rank==0)
    {
            MPI_Send(&A[0][4], 1, newtype, dest, tag, MPI_COMM_WORLD);
    }
    if (rank==1)
            MPI_Status status;
            MPI_Recv(&A_copy[0][4], 1, newtype, 0, tag, MPI_COMM_WORLD, &status);
    }
    MPI_Finalize();

在Boost网页上，他们声称MPI_Type_vector"在Boost中自动使用"。MPI"(http://www.boost.org/doc/libs/1_47_0/doc/html/mpi/tutorial.html mpi.c_mapping)。

但是我找不到一个详细的例子。只知道如何用Boost发送整个矩阵或每个元素。

提前谢谢你

Tobias

我通过编写自己的类'columns'并序列化它来解决这个问题。下面是一个示例代码:

#include<iostream>
#include<vector>
#include <boost/mpi/environment.hpp>
#include <boost/mpi/communicator.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/complex.hpp>
using namespace std;   
namespace mpi=boost::mpi;
class columns
{
public:
int Nr;
int Nc;
int J0;
int J1;
vector < vector <double> >* matrix;
columns(vector < vector <double> > & A, int j0, int j1)
{
    this->matrix=&A;
    this->Nr=A.size();
    this->Nc=A[0].size();
    this->J0=j0;
    this->J1=j1;
}
columns(vector < vector <double> > & A)
{
    this->matrix=&A;
    this->Nr=A.size();
    this->Nc=A[0].size();
}
columns(){};
};
namespace boost {
namespace serialization {
    template<class Archive>
    void serialize(Archive & ar, columns & g, const unsigned int version)
    {
        ar & g.Nr;
        ar & g.Nc;
        ar & g.J0;
        ar & g.J1;
        for (int i=0; i!=g.Nr; i++)
        {       
            for (int j=g.J0; j!=g.J1; j++)
            {       
                ar & (*g.matrix)[i][j];
            }
        }
    }
}
}

int main(int argc, char * argv[])
{
mpi::environment env(argc, argv);
mpi::communicator world;
int myid=world.rank();
int NN=world.size();
int Nl=3;
int Ng=5;
int myStart=myid*Ng/NN;
int myEnd=(myid+1)*Ng/NN;
int myN=myEnd-myStart;
if (myid==0)
{
    vector < vector <double> > input (Nl, vector <double> (Ng));
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            input[n][j]=n+j;
        }
    }
    cout << "##### process " << myid << " ############" << endl;
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            cout << input[n][j] << "t";
        }
        cout << endl;
    }
    cout << "############################" << endl;
    // divide grid for parallization
    vector<int> starts(NN);
    vector<int> ends(NN);
    vector<int> Nwork(NN);
    for (int p=0; p!=NN; p++)
    {
        starts[p]=p*Ng/NN;
        ends[p]=(p+1)*Ng/NN;
        Nwork[p]=ends[p]-starts[p];
    }

    vector<columns> input_columns(NN);
    for (int p=1; p!=NN; p++)
    {
        input_columns[p]=columns(input, starts[p], ends[p]);
    }

    for (int p=1; p!=NN; p++)
    {
        world.send(p, 1, input_columns[p]);
    }
}
if (myid!=0)
{
    vector < vector <double> > input (Nl, vector <double> (Ng));
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            input[n][j]=0.0;
        }
    }
    columns input_columns  = columns(input, myStart, myEnd);
    world.recv(0, 1, input_columns); 

    cout << "##### process " << myid << " ############" << endl;
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            cout << input[n][j] << "t";
        }
        cout << endl;
    }
    cout << "############################" << endl;
}
}

说明:'columns'类包含一个指向矩阵的指针和两个表示列开始和结束位置的数字。

class columns
{
    public:
    int Nr;              // number of rows in the matrix
    int Nc;              // number of columns in the matrix
    int J0;              // column start index
    int J1;              // column end index
    vector < vector <double> >* matrix;
    columns(vector < vector <double> > & A, int j0, int j1)
    {
            this->matrix=&A;
            this->Nr=A.size();
            this->Nc=A[0].size();
            this->J0=j0;
            this->J1=j1;
    }
    columns(vector < vector <double> > & A)
    {
            this->matrix=&A;
            this->Nr=A.size();
            this->Nc=A[0].size();
    }
    columns(){};
};

用下面的代码告诉boost-serialization如何序列化这个'columns'类:

namespace boost {
    namespace serialization {
            template<class Archive>
            void serialize(Archive & ar, columns & g, const unsigned int version)
            {
                    ar & g.Nr;
                    ar & g.Nc;
                    ar & g.J0;
                    ar & g.J1;
                    for (int i=0; i!=g.Nr; i++)
                    {
                            for (int j=g.J0; j!=g.J1; j++)
                            {
                                    ar & (*g.matrix)[i][j];
                            }
                    }
            }
    }
}

然后填充矩阵'input'

vector < vector <double> > input (Nl, vector <double>(Ng));
            for (int n=0; n!=Nl; n++)
            {
                    for (int j=0; j!=Ng; j++)
                    {
                            input[n][j]=n+j;
                    }
            }

和初始化一个列类对象(现在包含一个指向矩阵'input'的指针):

vector<columns> input_columns(NN)

并通过

发送给另一个(子)进程

world.send(p, 1, input_columns);

最后被

接收

world.recv(0, 1, input_columns);

如果你要在A上做很多列操作，也许你应该存储A的转置而不是A。这将把列放在连续的内存位置。这意味着您可以使用MPI_Send发送列，而无需执行任何复制操作。此外，列操作将更快。