From c0148c928ff827ee00b6266a4f41649a621698cf Mon Sep 17 00:00:00 2001
From: Adrian Kummerlaender
Date: Thu, 28 Feb 2019 13:05:03 +0100
Subject: Add OpenMP pragmas to independently processable coupling loops

The rudimentary and highly inefficicient OpenMPI "support" of the
current grid refinement implementation is not something you want to
actually use. Making this performant requires at least:

* Refinement-aware load balancing
* Load-balancing aware coupler communication (i.e. not dumbly
  communicating coupling information to processors that process
  neither the relevant coarse not the fine grid)

Until this issue is solved OpenMP delivers acceptable results on
shared-memory platforms. e.g. processing 13500 refined grid points
in `apps/adrian/cylinder2d/optimized_grid` takes about 1.3 times
as long as processing the same cell count in a uniform grid.
---
 src/refinement/coupler2D.hh | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'src')

diff --git a/src/refinement/coupler2D.hh b/src/refinement/coupler2D.hh
index 373a6de..c2b27db 100644
--- a/src/refinement/coupler2D.hh
+++ b/src/refinement/coupler2D.hh
@@ -109,6 +109,7 @@ void FineCoupler2D<T,DESCRIPTOR>::store()
 {
   auto& coarseLattice = this->_coarse.getSuperLattice();
 
+  #pragma omp parallel for
   for (int y=0; y < this->_coarseSize; ++y) {
     const auto pos = this->getCoarseLatticeR(y);
     T rho{};
@@ -160,6 +161,7 @@ void FineCoupler2D<T,DESCRIPTOR>::interpolate()
 {
   auto& coarseLattice = this->_coarse.getSuperLattice();
 
+  #pragma omp parallel for
   for (int y=0; y < this->_coarseSize; ++y) {
     Cell<T,DESCRIPTOR> coarseCell;
     coarseLattice.get(this->getCoarseLatticeR(y), coarseCell);
@@ -184,6 +186,7 @@ void FineCoupler2D<T,DESCRIPTOR>::couple()
   const auto& coarseLattice = this->_coarse.getSuperLattice();
   auto& fineLattice   = this->_fine.getSuperLattice();
 
+  #pragma omp parallel for
   for (int y=0; y < this->_coarseSize; ++y) {
     const auto& coarsePos = this->getCoarseLatticeR(y);
     const auto& finePos   = this->getFineLatticeR(2*y);
@@ -201,6 +204,7 @@ void FineCoupler2D<T,DESCRIPTOR>::couple()
     fineLattice.set(finePos, cell);
   }
 
+  #pragma omp parallel for
   for (int y=1; y < this->_coarseSize-2; ++y) {
     const auto rho  = order4interpolation(_c2f_rho,  y);
     const auto u    = order4interpolation(_c2f_u,    y);
@@ -305,6 +309,7 @@ void CoarseCoupler2D<T,DESCRIPTOR>::couple()
   const auto& fineLattice = this->_fine.getSuperLattice();
   auto& coarseLattice = this->_coarse.getSuperLattice();
 
+  #pragma omp parallel for
   for (int y=0; y < this->_coarseSize; ++y) {
     const auto& finePos   = this->getFineLatticeR(2*y);
     const auto& coarsePos = this->getCoarseLatticeR(y);
-- 
cgit v1.2.3