aboutsummaryrefslogtreecommitdiff
path: root/template/kernel.mako
diff options
context:
space:
mode:
authorAdrian Kummerlaender2019-06-15 20:45:27 +0200
committerAdrian Kummerlaender2019-06-15 20:54:56 +0200
commitd71faec93ec0a55c46810e0d178b2803ee89130c (patch)
tree3c35650637615af20668a5ec7bf974b2c05b248b /template/kernel.mako
parentc43d3f38b6922d36d15e8ba2b6ce17ddb0c75b0a (diff)
downloadsymlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.gz
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.bz2
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.lz
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.xz
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.zst
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.zip
Add support for generating a D3Q19 kernel
Note how this basically required no changes besides generalizing cell indexing and adding the symbolic formulation of a D3Q19 BGK collision step. Increasing the neighborhood communication from 9 to 19 cells leads to a significant performance "regression": The 3D kernel yields ~ 360 MLUPS compared to the 2D version's ~ 820 MLUPS.
Diffstat (limited to 'template/kernel.mako')
-rw-r--r--template/kernel.mako26
1 files changed, 15 insertions, 11 deletions
diff --git a/template/kernel.mako b/template/kernel.mako
index 7e930af..79cae66 100644
--- a/template/kernel.mako
+++ b/template/kernel.mako
@@ -1,7 +1,7 @@
__kernel void equilibrilize(__global __write_only float* f_next,
__global __write_only float* f_prev)
{
- const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0);
+ const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0);
__global __write_only float* preshifted_f_next = f_next + gid;
__global __write_only float* preshifted_f_prev = f_prev + gid;
@@ -17,21 +17,25 @@ __kernel void equilibrilize(__global __write_only float* f_next,
}
<%
-def direction_index(c_i):
- return (c_i[0]+1) + 3*(1-c_i[1])
-
def neighbor_offset(c_i):
- if c_i[1] == 0:
- return c_i[0]
- else:
- return c_i[1]*geometry.size_x + c_i[0]
+ if descriptor.d == 2:
+ if c_i[1] == 0:
+ return c_i[0]
+ else:
+ return c_i[1]*geometry.size_x + c_i[0]
+ elif descriptor.d == 3:
+ if c_i[1] == 0:
+ return c_i[2]*geometry.size_x*geometry.size_y + c_i[0]
+ else:
+ return c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0]
+
%>
__kernel void collide_and_stream(__global __write_only float* f_next,
__global __read_only float* f_prev,
__global __read_only int* material)
{
- const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0);
+ const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0);
const int m = material[gid];
@@ -43,7 +47,7 @@ __kernel void collide_and_stream(__global __write_only float* f_next,
__global __read_only float* preshifted_f_prev = f_prev + gid;
% for i, c_i in enumerate(descriptor.c):
- const float f_curr_${i} = preshifted_f_prev[${direction_index(c_i)*geometry.volume + neighbor_offset(-c_i)}];
+ const float f_curr_${i} = preshifted_f_prev[${i*geometry.volume + neighbor_offset(-c_i)}];
% endfor
% for i, expr in enumerate(moments_subexpr):
@@ -72,7 +76,7 @@ __kernel void collide_and_stream(__global __write_only float* f_next,
__kernel void collect_moments(__global __read_only float* f,
__global __write_only float* moments)
{
- const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0);
+ const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0);
__global __read_only float* preshifted_f = f + gid;