diff options
author | Adrian Kummerlaender | 2019-10-26 23:00:50 +0200 |
---|---|---|
committer | Adrian Kummerlaender | 2019-10-26 23:00:50 +0200 |
commit | 7fa72d8718d96727bcfd60cc3bcb1609526d3c9b (patch) | |
tree | 826f7b1380c1c735cc17d52c81f508ca3d90e31a | |
parent | 2a976c2c60565ea3f904feaf4ea573b2769e3084 (diff) | |
download | boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.gz boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.bz2 boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.lz boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.xz boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.zst boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.zip |
Add extra toggle for OpenMP in C++ test function
Yields ~160 MLUPs on a Xeon E3-1241 for D2Q9 double precision lid driven cavity.
Obviously not anywhere near what is possible on GPUs but respectable for a CPU implementation.
Especially considering how simple it is.
-rw-r--r-- | boltzgen/kernel/template/basic.cpp.mako | 23 |
1 files changed, 16 insertions, 7 deletions
diff --git a/boltzgen/kernel/template/basic.cpp.mako b/boltzgen/kernel/template/basic.cpp.mako index 8e06a56..118ef8c 100644 --- a/boltzgen/kernel/template/basic.cpp.mako +++ b/boltzgen/kernel/template/basic.cpp.mako @@ -240,16 +240,25 @@ void test_ldc(std::size_t nStep) f_prev = f_a.get(); } - for (std::size_t iCell : bulk) { - collide_and_stream(f_next, f_prev, iCell); +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif + for (std::size_t i = 0; i < bulk.size(); ++i) { + collide_and_stream(f_next, f_prev, bulk[i]); } ${float_type} u[${descriptor.d}] { 0. }; - for (std::size_t iCell : box_bc) { - velocity_momenta_boundary(f_next, f_prev, iCell, u); +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif + for (std::size_t i = 0; i < box_bc.size(); ++i) { + velocity_momenta_boundary(f_next, f_prev, box_bc[i], u); } - u[0] = 0.1; - for (std::size_t iCell : lid_bc) { - velocity_momenta_boundary(f_next, f_prev, iCell, u); + u[0] = 0.05; +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif + for (std::size_t i = 0; i < lid_bc.size(); ++i) { + velocity_momenta_boundary(f_next, f_prev, lid_bc[i], u); } } |