From 7fa72d8718d96727bcfd60cc3bcb1609526d3c9b Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Sat, 26 Oct 2019 23:00:50 +0200 Subject: Add extra toggle for OpenMP in C++ test function Yields ~160 MLUPs on a Xeon E3-1241 for D2Q9 double precision lid driven cavity. Obviously not anywhere near what is possible on GPUs but respectable for a CPU implementation. Especially considering how simple it is. --- boltzgen/kernel/template/basic.cpp.mako | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/boltzgen/kernel/template/basic.cpp.mako b/boltzgen/kernel/template/basic.cpp.mako index 8e06a56..118ef8c 100644 --- a/boltzgen/kernel/template/basic.cpp.mako +++ b/boltzgen/kernel/template/basic.cpp.mako @@ -240,16 +240,25 @@ void test_ldc(std::size_t nStep) f_prev = f_a.get(); } - for (std::size_t iCell : bulk) { - collide_and_stream(f_next, f_prev, iCell); +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif + for (std::size_t i = 0; i < bulk.size(); ++i) { + collide_and_stream(f_next, f_prev, bulk[i]); } ${float_type} u[${descriptor.d}] { 0. }; - for (std::size_t iCell : box_bc) { - velocity_momenta_boundary(f_next, f_prev, iCell, u); +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif + for (std::size_t i = 0; i < box_bc.size(); ++i) { + velocity_momenta_boundary(f_next, f_prev, box_bc[i], u); } - u[0] = 0.1; - for (std::size_t iCell : lid_bc) { - velocity_momenta_boundary(f_next, f_prev, iCell, u); + u[0] = 0.05; +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif + for (std::size_t i = 0; i < lid_bc.size(); ++i) { + velocity_momenta_boundary(f_next, f_prev, lid_bc[i], u); } } -- cgit v1.2.3