From c0148c928ff827ee00b6266a4f41649a621698cf Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Thu, 28 Feb 2019 13:05:03 +0100 Subject: Add OpenMP pragmas to independently processable coupling loops The rudimentary and highly inefficicient OpenMPI "support" of the current grid refinement implementation is not something you want to actually use. Making this performant requires at least: * Refinement-aware load balancing * Load-balancing aware coupler communication (i.e. not dumbly communicating coupling information to processors that process neither the relevant coarse not the fine grid) Until this issue is solved OpenMP delivers acceptable results on shared-memory platforms. e.g. processing 13500 refined grid points in `apps/adrian/cylinder2d/optimized_grid` takes about 1.3 times as long as processing the same cell count in a uniform grid. --- src/refinement/coupler2D.hh | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/refinement') diff --git a/src/refinement/coupler2D.hh b/src/refinement/coupler2D.hh index 373a6de..c2b27db 100644 --- a/src/refinement/coupler2D.hh +++ b/src/refinement/coupler2D.hh @@ -109,6 +109,7 @@ void FineCoupler2D::store() { auto& coarseLattice = this->_coarse.getSuperLattice(); + #pragma omp parallel for for (int y=0; y < this->_coarseSize; ++y) { const auto pos = this->getCoarseLatticeR(y); T rho{}; @@ -160,6 +161,7 @@ void FineCoupler2D::interpolate() { auto& coarseLattice = this->_coarse.getSuperLattice(); + #pragma omp parallel for for (int y=0; y < this->_coarseSize; ++y) { Cell coarseCell; coarseLattice.get(this->getCoarseLatticeR(y), coarseCell); @@ -184,6 +186,7 @@ void FineCoupler2D::couple() const auto& coarseLattice = this->_coarse.getSuperLattice(); auto& fineLattice = this->_fine.getSuperLattice(); + #pragma omp parallel for for (int y=0; y < this->_coarseSize; ++y) { const auto& coarsePos = this->getCoarseLatticeR(y); const auto& finePos = this->getFineLatticeR(2*y); @@ -201,6 +204,7 @@ void FineCoupler2D::couple() fineLattice.set(finePos, cell); } + #pragma omp parallel for for (int y=1; y < this->_coarseSize-2; ++y) { const auto rho = order4interpolation(_c2f_rho, y); const auto u = order4interpolation(_c2f_u, y); @@ -305,6 +309,7 @@ void CoarseCoupler2D::couple() const auto& fineLattice = this->_fine.getSuperLattice(); auto& coarseLattice = this->_coarse.getSuperLattice(); + #pragma omp parallel for for (int y=0; y < this->_coarseSize; ++y) { const auto& finePos = this->getFineLatticeR(2*y); const auto& coarsePos = this->getCoarseLatticeR(y); -- cgit v1.2.3