diff options
| author | Adrian Kummerlaender | 2019-10-26 23:00:50 +0200 | 
|---|---|---|
| committer | Adrian Kummerlaender | 2019-10-26 23:00:50 +0200 | 
| commit | 7fa72d8718d96727bcfd60cc3bcb1609526d3c9b (patch) | |
| tree | 826f7b1380c1c735cc17d52c81f508ca3d90e31a /boltzgen/kernel/template | |
| parent | 2a976c2c60565ea3f904feaf4ea573b2769e3084 (diff) | |
| download | boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.gz boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.bz2 boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.lz boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.xz boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.zst boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.zip  | |
Add extra toggle for OpenMP in C++ test function
Yields ~160 MLUPs on a Xeon E3-1241 for D2Q9 double precision lid driven cavity.
Obviously not anywhere near what is possible on GPUs but respectable for a CPU implementation.
Especially considering how simple it is.
Diffstat (limited to 'boltzgen/kernel/template')
| -rw-r--r-- | boltzgen/kernel/template/basic.cpp.mako | 23 | 
1 files changed, 16 insertions, 7 deletions
diff --git a/boltzgen/kernel/template/basic.cpp.mako b/boltzgen/kernel/template/basic.cpp.mako index 8e06a56..118ef8c 100644 --- a/boltzgen/kernel/template/basic.cpp.mako +++ b/boltzgen/kernel/template/basic.cpp.mako @@ -240,16 +240,25 @@ void test_ldc(std::size_t nStep)              f_prev = f_a.get();          } -        for (std::size_t iCell : bulk) { -            collide_and_stream(f_next, f_prev, iCell); +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif +        for (std::size_t i = 0; i < bulk.size(); ++i) { +            collide_and_stream(f_next, f_prev, bulk[i]);          }          ${float_type} u[${descriptor.d}] { 0. }; -        for (std::size_t iCell : box_bc) { -            velocity_momenta_boundary(f_next, f_prev, iCell, u); +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif +        for (std::size_t i = 0; i < box_bc.size(); ++i) { +            velocity_momenta_boundary(f_next, f_prev, box_bc[i], u);          } -        u[0] = 0.1; -        for (std::size_t iCell : lid_bc) { -            velocity_momenta_boundary(f_next, f_prev, iCell, u); +        u[0] = 0.05; +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif +        for (std::size_t i = 0; i < lid_bc.size(); ++i) { +            velocity_momenta_boundary(f_next, f_prev, lid_bc[i], u);          }      }  | 
