OpenMPI for C++:函数内部的排名变化显然是无缘无故的

openmpi for c++: rank changes inside function apparently without reason.

本文关键字:变化 无缘无故 C++ 函数 内部 OpenMPI for      更新时间:2023-10-16

以下代码生成粘贴在下面的输出

#include "mpi.h"
#include <stdio.h>
#define NUM 5
#define TRANSACTIONS 1
main(int argc, char *argv[])  {
int numtasks, rank, dest, source, rc, count, tag=1;  
char outmsg[] = "Hello World, today is a not so fantastic day for programmers";
char inmsg[20] = "-------------------";
MPI_Status Stat[2];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
char* baseaddr;
if (rank == 0) {
  dest = 1;
  source = 1;
  printf("a) I am: %dn", rank);
  rc = MPI_Send(outmsg, NUM, MPI_CHAR, dest, tag, MPI_COMM_WORLD);
  printf("OUTBUFFER OF WORKER %d: %sn", rank, outmsg);
  printf("b) I am: %dn", rank);
  baseaddr=outmsg+5;
  //rc = MPI_Send(outmsg+5, NUM, MPI_CHAR, dest, 10, MPI_COMM_WORLD);
  rc = MPI_Send(baseaddr, NUM, MPI_CHAR, dest, 10, MPI_COMM_WORLD);
  printf("OUTBUFFER OF WORKER %d: %sn", rank, outmsg);
  //rc = MPI_Recv(inmsg, NUM, MPI_CHAR, source, tag, MPI_COMM_WORLD, &Stat);
  printf("c) I am: %dn", rank);
  } 
else if (rank == 1) {
  dest = 0;
  source = 0;
  printf("d) I am: %dn", rank); 
  baseaddr = inmsg+5;
  //rc = MPI_Recv(inmsg+5, NUM, MPI_CHAR, source, tag, MPI_COMM_WORLD, &Stat);
  rc = MPI_Recv(inmsg, NUM, MPI_CHAR, source, tag, MPI_COMM_WORLD, &Stat[0]);
  printf("e) I am: %dn", rank);
  printf("INBUFFER OF WORKER %d: %sn", rank, inmsg);
  rc = MPI_Recv(baseaddr, NUM, MPI_CHAR, source, 10, MPI_COMM_WORLD, &Stat[1]);
  //rc = MPI_Send(outmsg, NUM, MPI_CHAR, dest, tag, MPI_COMM_WORLD);
  printf("f) I am: %dn", rank);
  printf("INBUFFER OF WORKER %d: %sn", rank, inmsg);
  }

for(int i=0; i<=TRANSACTIONS; ++i){
    rc = MPI_Get_count(&Stat[i], MPI_CHAR, &count);
    printf("Task %d: Received %d char(s) from task %d with tag %d n",
           rank, count, Stat[i].MPI_SOURCE, Stat[i].MPI_TAG);
}
MPI_Finalize();
}

这里的输出:

d) I am: 1
e) I am: 1
INBUFFER OF WORKER 1: Hello--------------
f) I am: 1
a) I am: 0
OUTBUFFER OF WORKER 0: Hello World, today is a not so fantastic day for programmers
b) I am: 0
OUTBUFFER OF WORKER 0: Hello World, today is a not so fantastic day for programmers
c) I am: 0
Task 0: Received 0 char(s) from task 4227856 with tag 0 
Task 0: Received 6365232 char(s) from task 0 with tag 4227856 
INBUFFER OF WORKER 1: Hello Worl---------
Task 1: Received 5 char(s) from task 0 with tag 1 
Task 1: Received 5 char(s) from task 0 with tag 10 

此输出似乎是正确的。现在下面的代码应该产生相同的输出(据我有限的理解),但情况并非如此,如最后所示:

#include "mpi.h"
#include <stdio.h>
#define NUM 10
#define TRANSACTIONS 1
main(int argc, char *argv[])  {
int numtasks, rank, dest, source, rc, count, tag=1;  
char outmsg[] = "Hello world, today is a beautiful day.n";
char inmsg[30] = "-----------------------------";
MPI_Request req;
MPI_Status Stat[TRANSACTIONS];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
char* baseaddr;
if (rank == 0) {
  dest = 1;
  source = 1;
  rc = MPI_Send(outmsg, NUM, MPI_CHAR, dest, 99, MPI_COMM_WORLD);
  printf("OUTBUFFER OF WORKER %d: %sn", rank, outmsg);
  printf("a) I am: %dn", rank);
  baseaddr = outmsg + NUM; 
  //rc = MPI_Send(baseaddr, NUM, MPI_CHAR, dest, 999, MPI_COMM_WORLD);
  printf("OUTBUFFER OF WORKER %d: %sn", rank, outmsg);
  printf("b) I am: %dn", rank);
  //rc = MPI_Irecv(&inmsg, NUM, MPI_CHAR, source, 2, MPI_COMM_WORLD, &req);
  } 
else if (rank == 1) {
  dest = 0;
  source = 0;
  //rc = MPI_Recv(inmsg, NUM, MPI_CHAR, source, 99, MPI_COMM_WORLD, &Stat[0]);
  printf("INBUFFER OF WORKER %d: %sn", rank, inmsg);
  printf("c) I am: %dn", rank);
  baseaddr = inmsg + NUM; 
  rc = MPI_Recv(baseaddr, NUM, MPI_CHAR, source, 99, MPI_COMM_WORLD, &Stat[1]);
  printf("INBUFFER OF WORKER %d: %sn", rank, inmsg);
  printf("d) I am: %dn", rank);
  //rc = MPI_Isend(&outmsg, NUM, MPI_CHAR, dest, 2, MPI_COMM_WORLD, &req);
  }
for(int i=0; i<=TRANSACTIONS; ++i){
    rc = MPI_Get_count(&Stat[i], MPI_CHAR, &count);
    printf("Task %d: Received %d char(s) from task %d with tag %d n",
           rank, count, Stat[i].MPI_SOURCE, Stat[i].MPI_TAG);
}
MPI_Finalize();
}
这里的输出不

等同于上面的输出

OUTBUFFER OF WORKER 0: Hello world, today is a beautiful day.
a) I am: 0
OUTBUFFER OF WORKER 0: Hello world, today is a beautiful day.
b) I am: 0
Task 0: Received 0 char(s) from task 6362440 with tag 0 
Task 0: Received 2 char(s) from task 0 with tag 1969314218 
INBUFFER OF WORKER 1: Hello worl-------------------
c) I am: 1
INBUFFER OF WORKER 0: Hello world, today i---------
d) I am: 0
Task 0: Received 10 char(s) from task 0 with tag 99 
Task 0: Received 10 char(s) from task 0 with tag 999 

观察到在 c) 和 d) 之间,秩似乎从 1 变为零

c) I am: 1
d) I am: 0

另请注意工作线程 0 输出的 INBUFFER(而不是工作线程 1)。

我错过了什么?感谢

你缺少的是你声明Stat是一个元素的数组:

#define TRANSACTIONS 1
MPI_Status Stat[TRANSACTIONS];

但是你引用数组的(越界)第二个元素Stat[1]

rc = MPI_Recv(baseaddr, NUM, MPI_CHAR, source, 99, MPI_COMM_WORLD, &Stat[1]);
//                                                                  ^^^^^^^

这会导致一些堆栈内存被覆盖,包括保存rank的堆栈内存。