summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Kummerlaender2019-02-28 13:05:03 +0100
committerAdrian Kummerlaender2019-06-24 15:18:01 +0200
commitc0148c928ff827ee00b6266a4f41649a621698cf (patch)
treec61435651a9ac77527d1275f59f6d76e389d136b
parent5ce5bd765828734c8b4240f29533dc51f5c65dc5 (diff)
downloadgrid_refinement_openlb-c0148c928ff827ee00b6266a4f41649a621698cf.tar
grid_refinement_openlb-c0148c928ff827ee00b6266a4f41649a621698cf.tar.gz
grid_refinement_openlb-c0148c928ff827ee00b6266a4f41649a621698cf.tar.bz2
grid_refinement_openlb-c0148c928ff827ee00b6266a4f41649a621698cf.tar.lz
grid_refinement_openlb-c0148c928ff827ee00b6266a4f41649a621698cf.tar.xz
grid_refinement_openlb-c0148c928ff827ee00b6266a4f41649a621698cf.tar.zst
grid_refinement_openlb-c0148c928ff827ee00b6266a4f41649a621698cf.zip
Add OpenMP pragmas to independently processable coupling loops
The rudimentary and highly inefficicient OpenMPI "support" of the current grid refinement implementation is not something you want to actually use. Making this performant requires at least: * Refinement-aware load balancing * Load-balancing aware coupler communication (i.e. not dumbly communicating coupling information to processors that process neither the relevant coarse not the fine grid) Until this issue is solved OpenMP delivers acceptable results on shared-memory platforms. e.g. processing 13500 refined grid points in `apps/adrian/cylinder2d/optimized_grid` takes about 1.3 times as long as processing the same cell count in a uniform grid.
-rw-r--r--src/refinement/coupler2D.hh5
1 files changed, 5 insertions, 0 deletions
diff --git a/src/refinement/coupler2D.hh b/src/refinement/coupler2D.hh
index 373a6de..c2b27db 100644
--- a/src/refinement/coupler2D.hh
+++ b/src/refinement/coupler2D.hh
@@ -109,6 +109,7 @@ void FineCoupler2D<T,DESCRIPTOR>::store()
{
auto& coarseLattice = this->_coarse.getSuperLattice();
+ #pragma omp parallel for
for (int y=0; y < this->_coarseSize; ++y) {
const auto pos = this->getCoarseLatticeR(y);
T rho{};
@@ -160,6 +161,7 @@ void FineCoupler2D<T,DESCRIPTOR>::interpolate()
{
auto& coarseLattice = this->_coarse.getSuperLattice();
+ #pragma omp parallel for
for (int y=0; y < this->_coarseSize; ++y) {
Cell<T,DESCRIPTOR> coarseCell;
coarseLattice.get(this->getCoarseLatticeR(y), coarseCell);
@@ -184,6 +186,7 @@ void FineCoupler2D<T,DESCRIPTOR>::couple()
const auto& coarseLattice = this->_coarse.getSuperLattice();
auto& fineLattice = this->_fine.getSuperLattice();
+ #pragma omp parallel for
for (int y=0; y < this->_coarseSize; ++y) {
const auto& coarsePos = this->getCoarseLatticeR(y);
const auto& finePos = this->getFineLatticeR(2*y);
@@ -201,6 +204,7 @@ void FineCoupler2D<T,DESCRIPTOR>::couple()
fineLattice.set(finePos, cell);
}
+ #pragma omp parallel for
for (int y=1; y < this->_coarseSize-2; ++y) {
const auto rho = order4interpolation(_c2f_rho, y);
const auto u = order4interpolation(_c2f_u, y);
@@ -305,6 +309,7 @@ void CoarseCoupler2D<T,DESCRIPTOR>::couple()
const auto& fineLattice = this->_fine.getSuperLattice();
auto& coarseLattice = this->_coarse.getSuperLattice();
+ #pragma omp parallel for
for (int y=0; y < this->_coarseSize; ++y) {
const auto& finePos = this->getFineLatticeR(2*y);
const auto& coarsePos = this->getCoarseLatticeR(y);