如何使用 Open MPI 使我的程序更快?我的 Open MPI 程序目前比原来慢,我有什么不明白的?

How to make my program faster with Open MPI? My Open MPI program is currently slower than the original, what am I not understanding?

本文关键字:我的 MPI 程序 Open 什么 明白 目前 何使用 原来      更新时间:2023-10-16

我已经为一个使用字典攻击破解密码的任务编写了一个程序,并试图使用 Open MPI 加速它,但我的 Open MPI 版本较慢,我不确定为什么或我不理解什么。加密的密码是使用盐和传递到 unix 函数"crypt"中的字符串生成的。

从我从课堂讲义中学到的东西来看,这就是我想出的。

main.cc:

//****************************************************
// File: main.cc
// Author: Jordan Ward
// Purpose: Crack passwords in the form word+number
// or number+word where number can be at most
// three digits long using
// Open MPI to make it more efficient.
//*****************************************************
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <unistd.h>
#include <mpi.h>
using namespace std;
// Builds the list of encrypted passwords,
// list of dictionary words, and list of salts.
void file_IO(int argc, char *argv[], vector<string> &encPass, vector<string> &words,
vector<string> &salts);
// Builds the list of possible guesses.
void build_guesses(vector<string> &guesses, vector<string> &words);
// Tries each of the salts with each of
// the strings in the list of guesses to see
// if they match the ecrypted passwords.
void crack(string pass, vector<string> &salts, vector<string> &guesses);
// Broadcasts the vectors to all other processes.
void broadcast_receive(vector<string> &encPass, vector<string> &words,
vector<string> &salts, vector<string> &guesses);
// Converts a vector of strings to a vector of chars
vector<char> convert(vector<string> &strings);
int main(int argc, char *argv[]) {
vector<string> encPass;
vector<string> words;
vector<string> salts;
vector<string> guesses;
int numProcesses;
int procNum;
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &numProcesses);   // Get the number of processes
MPI_Comm_rank(MPI_COMM_WORLD, &procNum);        // Get the process number
if(procNum == 0) {
file_IO(argc, argv, encPass, words, salts);
build_guesses(guesses, words);
}
broadcast_receive(encPass, words, salts, guesses, numProcesses, procNum);
if(procNum != 0) {
for(size_t i = 0; i < encPass.size(); i++) {
if(i % procNum == 0) {
size_t del = encPass[i].rfind("$");         // Finds the last "$" in the string
string pass = encPass[i].substr(del);       // Pass is a substring starting at the last "$"
crack(pass, salts, guesses);
}
}
}
MPI_Finalize();
return 0;
}

void file_IO(int argc, char *argv[], vector<string> &encPass, vector<string> &words,
vector<string> &salts) {
if(argc < 3) {
cout << "One or more files were not specified." << endl;
cout << "Correct format is 'mpiexec a.out file1 file2'" << endl;
exit(1);
}
ifstream secretPass(argv[1]);
string singlePass;
while(getline(secretPass, singlePass)) {
encPass.push_back(singlePass);
}
secretPass.close();
ifstream dictionary(argv[2]);
string word;
while(getline(dictionary, word)) {
words.push_back(word);
}
dictionary.close();
ifstream salt("salts");
string s;
while(getline(salt, s)) {
salts.push_back(s);
}
salt.close();
}
void build_guesses(vector<string> &guesses, vector<string> &words) {
//one word and one number
for(size_t i = 0; i < words.size(); i++) {
for(size_t j = 0; j < 10; j++) {
guesses.push_back(words[i] + to_string(j));
}
}
//one number and one word
for(size_t i = 0; i < 10; i++) {
for(size_t j = 0; j < words.size(); j++) {
guesses.push_back(to_string(i) + words[j]);
}
}
//one word and two numbers
for(size_t i = 0; i < words.size(); i++) {
for(size_t j = 0; j < 10; j++) {
for(size_t x = 0; x < 10; x++) {
guesses.push_back(words[i] + to_string(j) + to_string(x));
}
}
}
//two numbers and one word
for(size_t i = 0; i < 10; i++) {
for(size_t j = 0; j < 10; j++) {
for(size_t x = 0; x < words.size(); x++) {
guesses.push_back(to_string(i) + to_string(j) + words[x]);
}
}
}
//one word and three numbers
for(size_t i = 0; i < words.size(); i++) {
for(size_t j = 0; j < 10; j++) {
for(size_t x = 0; x < 10; x++) {
for(size_t y = 0; y < 10; y++) {
guesses.push_back(words[i] + to_string(j) + to_string(x) + to_string(y));
}
}
}
}
//three numbers and one word
for(size_t i = 0; i < 10; i++) {
for(size_t j = 0; j < 10; j++) {
for(size_t x = 0; x < 10; x++) {
for(size_t y = 0; y < words.size(); y++) {
guesses.push_back(to_string(i) + to_string(j) + to_string(x) + words[y]);
}
}
}
}
}
void crack(string pass, vector<string> &salts, vector<string> &guesses) {
for(size_t i = 0; i < salts.size(); i++) {
for(size_t j = 0; j < guesses.size(); j++) {
string ep = crypt(guesses[j].c_str(), salts[i].c_str());
if(ep.compare(salts[i] + pass) == 0) {
cout << "Password: " + guesses[j] << endl;
}
}
}
cout << "Password not found" << endl;
}
void broadcast_receive(vector<string> &encPass, vector<string> &words,
vector<string> &salts, vector<string> &guesses) {
int buffer[5];
buffer[0] = encPass.size();
buffer[1] = words.size();
buffer[2] = salts.size();
buffer[3] = guesses.size();
MPI_Bcast(buffer, 4, MPI_INT, 0, MPI_COMM_WORLD);
encPass.resize(buffer[0]);
words.resize(buffer[1]);
salts.resize(buffer[2]);
guesses.resize(buffer[3]);
vector<char> ep = convert(encPass);
vector<char> w = convert(words);
vector<char> s = convert(salts);
vector<char> g = convert(guesses);
MPI_Bcast(ep.data(), ep.size(), MPI_CHAR, 0, MPI_COMM_WORLD);
MPI_Bcast(w.data(), w.size(), MPI_CHAR, 0, MPI_COMM_WORLD);
MPI_Bcast(s.data(), s.size(), MPI_CHAR, 0, MPI_COMM_WORLD);
MPI_Bcast(g.data(), g.size(), MPI_CHAR, 0, MPI_COMM_WORLD);
}
vector<char> convert(vector<string> &strings) {
vector<char> cstrings;
cstrings.reserve(strings.size());
for(string s : strings) {
for(size_t i = 0; i < strlen(s.c_str()); i++) {
cstrings.push_back(s.c_str()[i]);
}
}
return cstrings;
}

我的思考过程是:

如果进程号为 0,则读入文件并使用文件中的字符串构建向量,然后构建猜测列表。

否则,接收所有列表并浏览每个加密密码,看看是否有任何盐与任何猜测相结合,与加密密码匹配。

我做得不正确或不理解什么,这使得它比没有 Open MPI 代码的原始代码慢?原始代码只是没有broadcast_receive和转换函数的相同文件,显然在主函数中没有 MPI 调用。

我正在使用mpic++ -std=c++11 -Wall main.cc -lcrypt进行编译,然后使用mpiexec a.out enc_passwords words运行,其中enc_passwords是一个小文件,其中包含从crypt函数生成的一些加密密码,单词是用于构建猜测的小单词列表。

关于你的第一个问题(为什么MPI"更快"?),你需要问两个问题:

问:工作实际上是否并行分区到多个处理器?

问:消息传递的开销是否超过了您尝试并行化的实际工作?

这应该有助于解决这两个问题:

OpenMPI 常见问题解答:性能工具

关于您的后续评论,"...但出于某种原因,它抛出了大量错误。或者干脆恢复到"工作"代码。