
MPI_Bcast Matrix Multiplication Setup

本文关键字:设置 Bcast MPI      更新时间:2023-10-16



#include <iostream>
#include <stdlib.h>
#include <mpi.h>
#include <stdio.h>
#include <time.h>
using namespace std;

int main(int argc, char *argv[])
    int myid, nproc;
     int  Ibuffer[200];         // Integer buffer, use proper size and type
    double Dbuffer[2000];      // Double buffer, use proper size and type
    char Sbuffer[200];         // String Buffer
     int msg_len;
     int i, j, k;
    // initialize the MPI Environment and get the needed Data
    MPI_Init(&argc, &argv);
     MPI_Comm_size(MPI_COMM_WORLD, &nproc);
     MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    // Get the name of processor
    MPI_Get_processor_name(Sbuffer, &msg_len);
    int RowA = 5,
    ColA = 2,
    RowB = ColA,
    ColB = 3,
    RowC = RowA,
    ColC = ColB;
    // Start clock
    double start_time = MPI_Wtime();
    // Initialize matrices
    double **matA = new double*[RowA];
    for (int i = 0; i < RowA; ++i)
        matA[i] = new double[ColA];
    double **matB = new double*[RowB];
    for (int i = 0; i < RowB; ++i)
        matB[i] = new double[ColB];
    double **matC = new double*[RowC];
    for (int i = 0; i < RowC; ++i)
        matC[i] = new double[ColC];

    for (int i = 0; i < RowA; i++)  // MatA
        for (int j = 0; j < ColA; j++)
            matA[i][j] = 2;
    for (int i = 0; i < RowB; i++)  // MatB
        for (int j = 0; j < ColB; j++)
            matB[i][j] = 2;
    for (int i = 0; i < RowC; i++)  // MatC
        for (int j = 0; j < ColC; j++)
            matC[i][j] = 0;

    // All procs compute the chunk size, no need to send separate
    int chunk = RowA / nproc;
    int rest  = RowA % nproc;
    int my_start_row = myid * chunk;        // find my start row
    int my_end_row   = (myid + 1) * chunk;      // find my end row
    // assign rest ot last worker
    if (myid == nproc-1) my_end_row += rest;
    int Dcount = ColA * chunk;    // Data count for A to send to worker
    MPI_Status status;        // Status variable neede for the receive
    if (myid == 0)
        // Send the rows needed for workers (Don't know if I need this or not)
            //MPI_Bcast(matA, Dcount, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        // Then work on your own part
        for (int i= my_start_row; i < my_end_row; i++)
            for(int j=0; j < ColB; j++)
                for(int k=0; k < RowB; k++)
                    matC[i][j] = matC[i][j] + (matA[i][k] * matB[k][j]);
        for (int n=1; n<nproc; n++)
            MPI_Bcast(matC, Dcount, MPI_DOUBLE, n, MPI_COMM_WORLD);
            printf("n ==++ Master Receive Result by Worker[%d], n", n); 
        // This is worker, receive the needed info and start working
        //MPI_Bcast(matA, Dcount, MPI_DOUBLE, 0, MPI_COMM_WORLD);
        //printf("n +++ Worker[%d], recived %d rows from Master n", myid, myid*chunk);
        cout << "n === Master sent rows " << myid * chunk << " through " << (myid+1) * chunk << " to process #" << myid << endl;
        // Do the work first 
        for (int i= my_start_row; i < my_end_row; i++)
            for(int j=0; j < ColB; j++)
                for(int k=0; k < RowB; k++)
                    matC[i][j] = matC[i][j] + (matA[i][k] * matB[k][j]);
        // Send the result to the Master
        MPI_Bcast(matC, Dcount, MPI_DOUBLE, myid, MPI_COMM_WORLD);
        printf("n --- Worker[%d], Sent Result to Master n", myid);
    // End clock
    double end_time = MPI_Wtime();
    if (myid == 0) {
        cout << "nParallel Exec time: " << end_time - start_time << endl;


    // Clean up and release the storage
    for (int i=0; i< RowA; i++) 
        delete [] matA[i];
        matA[i] = NULL;
    delete [] matA;
    matA = NULL;
    for (int i=0; i< RowA; i++) 
        delete [] matC[i];
        matC[i] = NULL;
    delete [] matC;
    matC = NULL;



如果我没看错的话,这段代码在开头为每个处理器生成三个相同的矩阵a, B和C,然后计算a与B的乘法,但仅针对某些索引。通过这种方式,处理器rank的乘法结果是

C(rank) = A(begin;end) * B


C(rank) = 0




  • 使用一个函数进行添加操作和收集数据。您可以看到MPI_ReduceMPI_Allreduce(但将完成的操作是x+(nbprocs-1)*0,因此调用这样的函数并不真正有用)
  • 将A和C拆分为子大小矩阵,然后使用MPI_Gather将结果重新合并。
