From 27ce855378a80dff680c2989800af1f4e69975fe Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Sat, 9 Nov 2019 20:21:27 +0100 Subject: Implement basic version of the SSS pattern for C++ target An interesting extension of the AA pattern. The main advantage of this is that updating pointers in a control structure is much more elegant than duplicating all function implementations as is required by the normal AA pattern. For more details see [1]. Only works for the SOA layout. On a pure memory access level this pattern is equivalent to the AA pattern. The difference is how the memory locations are calculated (by pointer swap & shift or by different indexing functions for odd and even time steps). [1]: "An auto-vectorization friendly parallel lattice Boltzmann streaming scheme for direct addressing" by Mohrhard et al. (2019) --- README.md | 2 +- boltzgen.py | 2 +- boltzgen/kernel/template/pattern/SSS.cpp.mako | 55 ++++++++++++++++++++++ .../template/update_sss_control_structure.cpp.mako | 15 ++++++ 4 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 boltzgen/kernel/template/pattern/SSS.cpp.mako create mode 100644 boltzgen/kernel/template/update_sss_control_structure.cpp.mako diff --git a/README.md b/README.md index fa8c1c6..13f4d0c 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ At the moment this is a more structured and cleaned up version of the OpenCL ker * array-of-structures and structure-of-arrays memory layouts * configurable cell indexing sequence * static resolution of memory offsets -* AB and AA streaming patterns +* AB, AA and SSS streaming patterns * C++ and OpenCL targets * simple CLI frontend diff --git a/boltzgen.py b/boltzgen.py index 216234f..c2eb297 100755 --- a/boltzgen.py +++ b/boltzgen.py @@ -13,7 +13,7 @@ argparser.add_argument('--model', required = False, help = 'LBM model (curre argparser.add_argument('--precision', required = True, help = 'Floating precision ("single" or "double")') argparser.add_argument('--layout', required = True, help = 'Memory layout ("AOS" or "SOA")') argparser.add_argument('--index', required = False, help = 'Cell indexing ("XYZ" or "ZYX")') -argparser.add_argument('--streaming', required = True, help = 'Streaming pattern ("AB" or "AA")') +argparser.add_argument('--streaming', required = True, help = 'Streaming pattern ("AB", "AA" or "SSS")') argparser.add_argument('--geometry', required = True, help = 'Size of the block geometry ("x:y(:z)")') argparser.add_argument('--tau', required = True, help = 'BGK relaxation time') diff --git a/boltzgen/kernel/template/pattern/SSS.cpp.mako b/boltzgen/kernel/template/pattern/SSS.cpp.mako new file mode 100644 index 0000000..a60b03a --- /dev/null +++ b/boltzgen/kernel/template/pattern/SSS.cpp.mako @@ -0,0 +1,55 @@ +<%def name="operator(name, params = None)"> +<% +if layout.__class__.__name__ != 'SOA': + raise Exception('SSS pattern only works for the AOS memory layout') +%> +void ${name}( + ${float_type}** f + , std::size_t gid +% if params is not None: +% for param_type, param_name in params: + , ${param_type} ${param_name} +% endfor +% endif +) { +% for i, c_i in enumerate(descriptor.c): + ${float_type}* preshifted_f_${i} = f[${i}] + ${layout.cell_preshift('gid')}; +% endfor + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${i} = *preshifted_f_${i}; +% endfor + + ${caller.body()} + +% for i, c_i in enumerate(descriptor.c): + *preshifted_f_${i} = f_next_${descriptor.c.index(-c_i)}; +% endfor +} + + +<%def name="functor(name, params = None)"> +<% +if layout.__class__.__name__ != 'SOA': + raise Exception('SSS pattern only works for the AOS memory layout') +%> +void ${name}( + ${float_type}** f + , std::size_t gid +% if params is not None: +% for param_type, param_name in params: + , ${param_type} ${param_name} +% endfor +% endif +) { +% for i, c_i in enumerate(descriptor.c): + const ${float_type}* preshifted_f_${i} = f[${i}] + ${layout.cell_preshift('gid')}; +% endfor + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${i} = *preshifted_f_${descriptor.c.index(-c_i)}; +% endfor + + ${caller.body()} +} + diff --git a/boltzgen/kernel/template/update_sss_control_structure.cpp.mako b/boltzgen/kernel/template/update_sss_control_structure.cpp.mako new file mode 100644 index 0000000..9197022 --- /dev/null +++ b/boltzgen/kernel/template/update_sss_control_structure.cpp.mako @@ -0,0 +1,15 @@ +<% +if streaming != 'SSS': + raise Exception('"update_sss_control_structure" function only makes sense for the SSS pattern') +%> +void update_sss_control_structure(${float_type}** f) { +% for i, c_i in enumerate(descriptor.c): + ${float_type}* f_old_${i} = f[${i}]; +% endfor +% for i, c_i in enumerate(descriptor.c): + f[${i}] = f_old_${descriptor.c.index(-c_i)}; +% endfor +% for i, c_i in enumerate(descriptor.c): + f[${i}] += ${layout.neighbor_offset(-c_i)}; +% endfor +} -- cgit v1.2.3