aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Kummerlaender2019-10-26 23:00:50 +0200
committerAdrian Kummerlaender2019-10-26 23:00:50 +0200
commit7fa72d8718d96727bcfd60cc3bcb1609526d3c9b (patch)
tree826f7b1380c1c735cc17d52c81f508ca3d90e31a
parent2a976c2c60565ea3f904feaf4ea573b2769e3084 (diff)
downloadboltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar
boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.gz
boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.bz2
boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.lz
boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.xz
boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.tar.zst
boltzgen-7fa72d8718d96727bcfd60cc3bcb1609526d3c9b.zip
Add extra toggle for OpenMP in C++ test function
Yields ~160 MLUPs on a Xeon E3-1241 for D2Q9 double precision lid driven cavity. Obviously not anywhere near what is possible on GPUs but respectable for a CPU implementation. Especially considering how simple it is.
-rw-r--r--boltzgen/kernel/template/basic.cpp.mako23
1 files changed, 16 insertions, 7 deletions
diff --git a/boltzgen/kernel/template/basic.cpp.mako b/boltzgen/kernel/template/basic.cpp.mako
index 8e06a56..118ef8c 100644
--- a/boltzgen/kernel/template/basic.cpp.mako
+++ b/boltzgen/kernel/template/basic.cpp.mako
@@ -240,16 +240,25 @@ void test_ldc(std::size_t nStep)
f_prev = f_a.get();
}
- for (std::size_t iCell : bulk) {
- collide_and_stream(f_next, f_prev, iCell);
+% if 'omp_parallel_for' in extras:
+#pragma omp parallel for
+% endif
+ for (std::size_t i = 0; i < bulk.size(); ++i) {
+ collide_and_stream(f_next, f_prev, bulk[i]);
}
${float_type} u[${descriptor.d}] { 0. };
- for (std::size_t iCell : box_bc) {
- velocity_momenta_boundary(f_next, f_prev, iCell, u);
+% if 'omp_parallel_for' in extras:
+#pragma omp parallel for
+% endif
+ for (std::size_t i = 0; i < box_bc.size(); ++i) {
+ velocity_momenta_boundary(f_next, f_prev, box_bc[i], u);
}
- u[0] = 0.1;
- for (std::size_t iCell : lid_bc) {
- velocity_momenta_boundary(f_next, f_prev, iCell, u);
+ u[0] = 0.05;
+% if 'omp_parallel_for' in extras:
+#pragma omp parallel for
+% endif
+ for (std::size_t i = 0; i < lid_bc.size(); ++i) {
+ velocity_momenta_boundary(f_next, f_prev, lid_bc[i], u);
}
}