aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Kummerlaender2019-11-09 20:21:27 +0100
committerAdrian Kummerlaender2019-11-09 20:40:33 +0100
commit27ce855378a80dff680c2989800af1f4e69975fe (patch)
tree887c58dfaf209721bd3e8240d52a042f54028d24
parent286e243a171c8bcdfc91b5b6dcdd937ac95b0b7b (diff)
downloadboltzgen-27ce855378a80dff680c2989800af1f4e69975fe.tar
boltzgen-27ce855378a80dff680c2989800af1f4e69975fe.tar.gz
boltzgen-27ce855378a80dff680c2989800af1f4e69975fe.tar.bz2
boltzgen-27ce855378a80dff680c2989800af1f4e69975fe.tar.lz
boltzgen-27ce855378a80dff680c2989800af1f4e69975fe.tar.xz
boltzgen-27ce855378a80dff680c2989800af1f4e69975fe.tar.zst
boltzgen-27ce855378a80dff680c2989800af1f4e69975fe.zip
Implement basic version of the SSS pattern for C++ target
An interesting extension of the AA pattern. The main advantage of this is that updating pointers in a control structure is much more elegant than duplicating all function implementations as is required by the normal AA pattern. For more details see [1]. Only works for the SOA layout. On a pure memory access level this pattern is equivalent to the AA pattern. The difference is how the memory locations are calculated (by pointer swap & shift or by different indexing functions for odd and even time steps). [1]: "An auto-vectorization friendly parallel lattice Boltzmann streaming scheme for direct addressing" by Mohrhard et al. (2019)
-rw-r--r--README.md2
-rwxr-xr-xboltzgen.py2
-rw-r--r--boltzgen/kernel/template/pattern/SSS.cpp.mako55
-rw-r--r--boltzgen/kernel/template/update_sss_control_structure.cpp.mako15
4 files changed, 72 insertions, 2 deletions
diff --git a/README.md b/README.md
index fa8c1c6..13f4d0c 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ At the moment this is a more structured and cleaned up version of the OpenCL ker
* array-of-structures and structure-of-arrays memory layouts
* configurable cell indexing sequence
* static resolution of memory offsets
-* AB and AA streaming patterns
+* AB, AA and SSS streaming patterns
* C++ and OpenCL targets
* simple CLI frontend
diff --git a/boltzgen.py b/boltzgen.py
index 216234f..c2eb297 100755
--- a/boltzgen.py
+++ b/boltzgen.py
@@ -13,7 +13,7 @@ argparser.add_argument('--model', required = False, help = 'LBM model (curre
argparser.add_argument('--precision', required = True, help = 'Floating precision ("single" or "double")')
argparser.add_argument('--layout', required = True, help = 'Memory layout ("AOS" or "SOA")')
argparser.add_argument('--index', required = False, help = 'Cell indexing ("XYZ" or "ZYX")')
-argparser.add_argument('--streaming', required = True, help = 'Streaming pattern ("AB" or "AA")')
+argparser.add_argument('--streaming', required = True, help = 'Streaming pattern ("AB", "AA" or "SSS")')
argparser.add_argument('--geometry', required = True, help = 'Size of the block geometry ("x:y(:z)")')
argparser.add_argument('--tau', required = True, help = 'BGK relaxation time')
diff --git a/boltzgen/kernel/template/pattern/SSS.cpp.mako b/boltzgen/kernel/template/pattern/SSS.cpp.mako
new file mode 100644
index 0000000..a60b03a
--- /dev/null
+++ b/boltzgen/kernel/template/pattern/SSS.cpp.mako
@@ -0,0 +1,55 @@
+<%def name="operator(name, params = None)">
+<%
+if layout.__class__.__name__ != 'SOA':
+ raise Exception('SSS pattern only works for the AOS memory layout')
+%>
+void ${name}(
+ ${float_type}** f
+ , std::size_t gid
+% if params is not None:
+% for param_type, param_name in params:
+ , ${param_type} ${param_name}
+% endfor
+% endif
+) {
+% for i, c_i in enumerate(descriptor.c):
+ ${float_type}* preshifted_f_${i} = f[${i}] + ${layout.cell_preshift('gid')};
+% endfor
+
+% for i, c_i in enumerate(descriptor.c):
+ const ${float_type} f_curr_${i} = *preshifted_f_${i};
+% endfor
+
+ ${caller.body()}
+
+% for i, c_i in enumerate(descriptor.c):
+ *preshifted_f_${i} = f_next_${descriptor.c.index(-c_i)};
+% endfor
+}
+</%def>
+
+<%def name="functor(name, params = None)">
+<%
+if layout.__class__.__name__ != 'SOA':
+ raise Exception('SSS pattern only works for the AOS memory layout')
+%>
+void ${name}(
+ ${float_type}** f
+ , std::size_t gid
+% if params is not None:
+% for param_type, param_name in params:
+ , ${param_type} ${param_name}
+% endfor
+% endif
+) {
+% for i, c_i in enumerate(descriptor.c):
+ const ${float_type}* preshifted_f_${i} = f[${i}] + ${layout.cell_preshift('gid')};
+% endfor
+
+% for i, c_i in enumerate(descriptor.c):
+ const ${float_type} f_curr_${i} = *preshifted_f_${descriptor.c.index(-c_i)};
+% endfor
+
+ ${caller.body()}
+}
+</%def>
diff --git a/boltzgen/kernel/template/update_sss_control_structure.cpp.mako b/boltzgen/kernel/template/update_sss_control_structure.cpp.mako
new file mode 100644
index 0000000..9197022
--- /dev/null
+++ b/boltzgen/kernel/template/update_sss_control_structure.cpp.mako
@@ -0,0 +1,15 @@
+<%
+if streaming != 'SSS':
+ raise Exception('"update_sss_control_structure" function only makes sense for the SSS pattern')
+%>
+void update_sss_control_structure(${float_type}** f) {
+% for i, c_i in enumerate(descriptor.c):
+ ${float_type}* f_old_${i} = f[${i}];
+% endfor
+% for i, c_i in enumerate(descriptor.c):
+ f[${i}] = f_old_${descriptor.c.index(-c_i)};
+% endfor
+% for i, c_i in enumerate(descriptor.c):
+ f[${i}] += ${layout.neighbor_offset(-c_i)};
+% endfor
+}