aboutsummaryrefslogtreecommitdiff
path: root/boltzgen/kernel
diff options
context:
space:
mode:
authorAdrian Kummerlaender2019-10-21 18:42:24 +0200
committerAdrian Kummerlaender2019-10-21 18:48:38 +0200
commit82a44e0d64afb8818ea98d68dc08108885d503c2 (patch)
tree6e8f08acd83b2886cd296ed3831acc83e309906c /boltzgen/kernel
downloadboltzgen-82a44e0d64afb8818ea98d68dc08108885d503c2.tar
boltzgen-82a44e0d64afb8818ea98d68dc08108885d503c2.tar.gz
boltzgen-82a44e0d64afb8818ea98d68dc08108885d503c2.tar.bz2
boltzgen-82a44e0d64afb8818ea98d68dc08108885d503c2.tar.lz
boltzgen-82a44e0d64afb8818ea98d68dc08108885d503c2.tar.xz
boltzgen-82a44e0d64afb8818ea98d68dc08108885d503c2.tar.zst
boltzgen-82a44e0d64afb8818ea98d68dc08108885d503c2.zip
Pull in basics from symlbm_playground
It's time to extract the generator-part of my GPU LBM playground and turn it into a nice reusable library. The goal is to produce a framework that can be used to generate collision and streaming programs from symbolic descriptions. i.e. it should be possible to select a LB model, the desired boundary conditions as well as a data structure / streaming model and use this information to automatically generate matching OpenCL / CUDA / C++ programs.
Diffstat (limited to 'boltzgen/kernel')
-rw-r--r--boltzgen/kernel/generator.py25
-rw-r--r--boltzgen/kernel/template/kernel.mako104
2 files changed, 129 insertions, 0 deletions
diff --git a/boltzgen/kernel/generator.py b/boltzgen/kernel/generator.py
new file mode 100644
index 0000000..bd1bb86
--- /dev/null
+++ b/boltzgen/kernel/generator.py
@@ -0,0 +1,25 @@
+import sympy
+
+from mako.template import Template
+from pathlib import Path
+
+def source(descriptor, moments, collide, boundary_src, float_type, geometry):
+ return Template(filename = str(Path(__file__).parent/'template/kernel.mako')).render(
+ descriptor = descriptor,
+ geometry = geometry,
+
+ moments_subexpr = moments[0],
+ moments_assignment = moments[1],
+ collide_subexpr = collide[0],
+ collide_assignment = collide[1],
+
+ float_type = float_type,
+
+ boundary_src = Template(boundary_src).render(
+ descriptor = descriptor,
+ geometry = geometry,
+ float_type = float_type
+ ),
+
+ ccode = sympy.ccode
+ )
diff --git a/boltzgen/kernel/template/kernel.mako b/boltzgen/kernel/template/kernel.mako
new file mode 100644
index 0000000..5ddf64c
--- /dev/null
+++ b/boltzgen/kernel/template/kernel.mako
@@ -0,0 +1,104 @@
+% if float_type == 'double':
+#if defined(cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#elif defined(cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64 : enable
+#endif
+% endif
+
+<%
+def gid():
+ return {
+ 2: 'get_global_id(1)*%d + get_global_id(0)' % geometry.size_x,
+ 3: 'get_global_id(2)*%d + get_global_id(1)*%d + get_global_id(0)' % (geometry.size_x*geometry.size_y, geometry.size_x)
+ }.get(descriptor.d)
+
+def pop_offset(i):
+ return i * geometry.volume
+%>
+
+__kernel void equilibrilize(__global ${float_type}* f_next,
+ __global ${float_type}* f_prev)
+{
+ const unsigned int gid = ${gid()};
+
+ __global ${float_type}* preshifted_f_next = f_next + gid;
+ __global ${float_type}* preshifted_f_prev = f_prev + gid;
+
+% for i, w_i in enumerate(descriptor.w):
+ preshifted_f_next[${pop_offset(i)}] = ${w_i}.f;
+ preshifted_f_prev[${pop_offset(i)}] = ${w_i}.f;
+% endfor
+}
+
+<%
+def neighbor_offset(c_i):
+ return {
+ 2: lambda: c_i[1]*geometry.size_x + c_i[0],
+ 3: lambda: c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0]
+ }.get(descriptor.d)()
+
+%>
+
+__kernel void collide_and_stream(__global ${float_type}* f_next,
+ __global ${float_type}* f_prev,
+ __global int* material,
+ unsigned int time)
+{
+ const unsigned int gid = ${gid()};
+
+ const int m = material[gid];
+
+ if ( m == 0 ) {
+ return;
+ }
+
+ __global ${float_type}* preshifted_f_next = f_next + gid;
+ __global ${float_type}* preshifted_f_prev = f_prev + gid;
+
+% for i, c_i in enumerate(descriptor.c):
+ const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}];
+% endfor
+
+% for i, expr in enumerate(moments_subexpr):
+ const ${float_type} ${expr[0]} = ${ccode(expr[1])};
+% endfor
+
+% for i, expr in enumerate(moments_assignment):
+ ${float_type} ${ccode(expr)}
+% endfor
+
+ ${boundary_src}
+
+% for i, expr in enumerate(collide_subexpr):
+ const ${float_type} ${expr[0]} = ${ccode(expr[1])};
+% endfor
+
+% for i, expr in enumerate(collide_assignment):
+ const ${float_type} ${ccode(expr)}
+% endfor
+
+% for i in range(0,descriptor.q):
+ preshifted_f_next[${pop_offset(i)}] = f_next_${i};
+% endfor
+}
+
+__kernel void collect_moments(__global ${float_type}* f,
+ __global ${float_type}* moments)
+{
+ const unsigned int gid = ${gid()};
+
+ __global ${float_type}* preshifted_f = f + gid;
+
+% for i in range(0,descriptor.q):
+ const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}];
+% endfor
+
+% for i, expr in enumerate(moments_subexpr):
+ const ${float_type} ${expr[0]} = ${ccode(expr[1])};
+% endfor
+
+% for i, expr in enumerate(moments_assignment):
+ moments[${pop_offset(i)} + gid] = ${ccode(expr.rhs)};
+% endfor
+}