diff options
Add OpenMP pragmas to independently processable coupling loops
The rudimentary and highly inefficicient OpenMPI "support" of the
current grid refinement implementation is not something you want to
actually use. Making this performant requires at least:
* Refinement-aware load balancing
* Load-balancing aware coupler communication (i.e. not dumbly
communicating coupling information to processors that process
neither the relevant coarse not the fine grid)
Until this issue is solved OpenMP delivers acceptable results on
shared-memory platforms. e.g. processing 13500 refined grid points
in `apps/adrian/cylinder2d/optimized_grid` takes about 1.3 times
as long as processing the same cell count in a uniform grid.
-rw-r--r-- | src/refinement/coupler2D.hh | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/src/refinement/coupler2D.hh b/src/refinement/coupler2D.hh index 373a6de..c2b27db 100644 --- a/src/refinement/coupler2D.hh +++ b/src/refinement/coupler2D.hh @@ -109,6 +109,7 @@ void FineCoupler2D<T,DESCRIPTOR>::store() { auto& coarseLattice = this->_coarse.getSuperLattice(); + #pragma omp parallel for for (int y=0; y < this->_coarseSize; ++y) { const auto pos = this->getCoarseLatticeR(y); T rho{}; @@ -160,6 +161,7 @@ void FineCoupler2D<T,DESCRIPTOR>::interpolate() { auto& coarseLattice = this->_coarse.getSuperLattice(); + #pragma omp parallel for for (int y=0; y < this->_coarseSize; ++y) { Cell<T,DESCRIPTOR> coarseCell; coarseLattice.get(this->getCoarseLatticeR(y), coarseCell); @@ -184,6 +186,7 @@ void FineCoupler2D<T,DESCRIPTOR>::couple() const auto& coarseLattice = this->_coarse.getSuperLattice(); auto& fineLattice = this->_fine.getSuperLattice(); + #pragma omp parallel for for (int y=0; y < this->_coarseSize; ++y) { const auto& coarsePos = this->getCoarseLatticeR(y); const auto& finePos = this->getFineLatticeR(2*y); @@ -201,6 +204,7 @@ void FineCoupler2D<T,DESCRIPTOR>::couple() fineLattice.set(finePos, cell); } + #pragma omp parallel for for (int y=1; y < this->_coarseSize-2; ++y) { const auto rho = order4interpolation(_c2f_rho, y); const auto u = order4interpolation(_c2f_u, y); @@ -305,6 +309,7 @@ void CoarseCoupler2D<T,DESCRIPTOR>::couple() const auto& fineLattice = this->_fine.getSuperLattice(); auto& coarseLattice = this->_coarse.getSuperLattice(); + #pragma omp parallel for for (int y=0; y < this->_coarseSize; ++y) { const auto& finePos = this->getFineLatticeR(2*y); const auto& coarsePos = this->getCoarseLatticeR(y); |