使用openMP进行并行求和-当我可以´不要使用减少条款
Parallel summation with openMP - what to do when i can´t use the reduction clause?
我在一个类中有一个函数,我在代码中调用了数百万次。这个函数中有一些要求很高的循环可以并行化。我的问题是,它们执行存储在非sclar变量中的求和。这是代码。
void Forces::ForRes(vector<vector<double> > & Posicoes,double epsilon,double sigma,double rc,double L)
{
double rij_2,rij_6,rij_12,rijx,rijy,rijz;
double t;
double sigma6 = pow(sigma,6);
double sigma12 = pow (sigma6,2);
for ( unsigned int i = 0 ; i < Posicoes.size() - 1 ; i++ )
{
for (unsigned int j = i + 1 ; j < Posicoes.size() ; j++)
{
rijx = (Posicoes[i][0]-Posicoes[j][0]) - L*round((Posicoes[i][0]-Posicoes[j][0])/L);
rijy = (Posicoes[i][1]-Posicoes[j][1]) - L*round((Posicoes[i][1]-Posicoes[j][1])/L);
rijz = (Posicoes[i][2]-Posicoes[j][2]) - L*round((Posicoes[i][2]-Posicoes[j][2])/L);
rij_2 = rijx*rijx + rijy*rijy + rijz*rijz;
rij_6 = pow(rij_2,3);
rij_12 = pow(rij_6,2);
if (rij_2 <= rc*rc)
{
U += 4*epsilon*((sigma12)/(rij_12)- (sigma6)/(rij_6));
for (int k =0 ; k <3 ; k++)
{
t = ((24*epsilon)/(rij_2))*(2*(sigma12)/(rij_12)- (sigma6)/(rij_6))*((Posicoes[i][k]-Posicoes[j][k])
- L*round((Posicoes[i][k]-Posicoes[j][k])/L));
F[i][k] += t;
F[j][k] -= t;
}
}
}
}
}
下面是我在代码的另一部分中做的一个例子:
#pragma omp parallel for default(shared) reduction(+:K) private(pi_2)
for (int i = 0 ; i < Nparticulas;i++)
{
for (int k = 0 ; k < 3 ; k++)
{
pi_2 += Momentos.M[i][k]*Momentos.M[i][k];
}
K += pi_2/2;
pi_2 = 0;
}
提前谢谢。
@phadjido建议后的代码:
void Forces::ForRes(vector<vector<double> > & Posicoes,double epsilon,double sigma,double rc,double L)
{
double rij_2,rij_6,rij_12,rijx,rijy,rijz;
double t;
double sigma6 = pow(sigma,6);
double sigma12 = pow (sigma6,2);
U = 0;
unsigned int j;
for ( unsigned int i = 0 ; i < Posicoes.size() - 1 ; i++ )
{
#pragma omp parallel private (rij_2,rij_6,rij_12,j)
{
double Up = 0;
vector <vector <double> > Fp(Posicoes.size() , vector<double>(Posicoes[0].size(),0));
#pragma omp for
for ( j = i + 1 ; j < Posicoes.size() ; j++)
{
rijx = (Posicoes[i][0]-Posicoes[j][0]) - L*round((Posicoes[i][0]-Posicoes[j][0])/L);
rijy = (Posicoes[i][1]-Posicoes[j][1]) - L*round((Posicoes[i][1]-Posicoes[j][1])/L);
rijz = (Posicoes[i][2]-Posicoes[j][2]) - L*round((Posicoes[i][2]-Posicoes[j][2])/L);
rij_2 = rijx*rijx + rijy*rijy + rijz*rijz;
rij_6 = pow(rij_2,3);
rij_12 = pow(rij_6,2);
if (rij_2 <= rc*rc)
{
Up += 4*epsilon*((sigma12)/(rij_12)- (sigma6)/(rij_6));
for (int k =0 ; k <3 ; k++)
{
t = ((24*epsilon)/(rij_2))*(2*(sigma12)/(rij_12)- (sigma6)/(rij_6))*((Posicoes[i][k]-Posicoes[j][k])
- L*round((Posicoes[i][k]-Posicoes[j][k])/L));
Fp[i][k] += t;
Fp[j][k] -= t;
}
}
}
#pragma omp atomic
U += Up;
for(j = i + 1 ; j < Posicoes.size() ; j++)
{
for ( int k = 0 ; k < 3; k++)
{
#pragma omp atomic
F[i][k] += Fp[i][j];
#pragma omp atomic
F[j][k] -= Fp[j][k];
}
}
}
}
}
如果编译器不支持用户定义的归约,您可以自己简单地实现归约操作。下面的代码显示了如何为您的第二个示例做到这一点。请注意,pi_2是在循环开始时初始化的。在您的示例中,pi_2是一个私有变量,可能尚未初始化为零。在并行区域之前,您需要对pi_2进行firstprivate和适当的初始化。
K = 0;
#pragma omp parallel private(pi_2)
{
double local_K = 0; /* initialize here */
#pragma omp for
for (int i = 0 ; i < Nparticulas;i++)
{
pi_2 = 0; /* be careful */
for (int k = 0 ; k < 3 ; k++)
{
pi_2 += Momentos.M[i][k]*Momentos.M[i][k];
}
local_K += pi_2/2;
}
#pragma omp atomic
K += local_K;
}
相关文章:
- OpenMP阵列性能较差
- OpenMP卸载说'fatal error: could not find accel/nvptx-none/mkoffload'
- 使用 GCC 卸载的 OpenMP 卸载失败,并出现"Ptx assembly aborted due to errors"
- OpenMP:并行更新数组总是需要减少数组吗
- 如何使用OpenMP并行这两个循环
- 从python调用openMP共享库时,未定义opnMP函数
- 如何使用OpenMP并行化此矩阵时间矢量运算
- 如何使用OpenMP使这个循环并行
- 如何通过替换顺序代码的while循环来添加OpenMP for循环
- 查找最近配对时的OpenMP竞赛条件
- 使用输入打破 OpenMP 中的循环
- 为什么 openmp 的并行不适用于矢量化色彩空间转换?
- 在 openmp 中,omp_get_thread_num是否绑定到物理线程?
- 在C++中使用并行化的预期速度是多少(不是 OpenMp,而是 <thread>)
- OpenMP 加上unordered_map<字符串、双字符串的缩减>
- OpenMP 与有序和关键指令并行
- 我使用 OpenMP 的线程越多,执行时间就越长,这是怎么回事?
- OpenMP for 循环并行性问题
- 两个连续的 OpenMP 并行区域会相互减慢速度
- 读取文件时无法使用 OpenMP 获得加速