From 27ce855378a80dff680c2989800af1f4e69975fe Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Sat, 9 Nov 2019 20:21:27 +0100 Subject: Implement basic version of the SSS pattern for C++ target An interesting extension of the AA pattern. The main advantage of this is that updating pointers in a control structure is much more elegant than duplicating all function implementations as is required by the normal AA pattern. For more details see [1]. Only works for the SOA layout. On a pure memory access level this pattern is equivalent to the AA pattern. The difference is how the memory locations are calculated (by pointer swap & shift or by different indexing functions for odd and even time steps). [1]: "An auto-vectorization friendly parallel lattice Boltzmann streaming scheme for direct addressing" by Mohrhard et al. (2019) --- .../kernel/template/update_sss_control_structure.cpp.mako | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 boltzgen/kernel/template/update_sss_control_structure.cpp.mako (limited to 'boltzgen/kernel/template/update_sss_control_structure.cpp.mako') diff --git a/boltzgen/kernel/template/update_sss_control_structure.cpp.mako b/boltzgen/kernel/template/update_sss_control_structure.cpp.mako new file mode 100644 index 0000000..9197022 --- /dev/null +++ b/boltzgen/kernel/template/update_sss_control_structure.cpp.mako @@ -0,0 +1,15 @@ +<% +if streaming != 'SSS': + raise Exception('"update_sss_control_structure" function only makes sense for the SSS pattern') +%> +void update_sss_control_structure(${float_type}** f) { +% for i, c_i in enumerate(descriptor.c): + ${float_type}* f_old_${i} = f[${i}]; +% endfor +% for i, c_i in enumerate(descriptor.c): + f[${i}] = f_old_${descriptor.c.index(-c_i)}; +% endfor +% for i, c_i in enumerate(descriptor.c): + f[${i}] += ${layout.neighbor_offset(-c_i)}; +% endfor +} -- cgit v1.2.3