Add support for generating a D3Q19 kernel

Note how this basically required no changes besides generalizing cell indexing and adding the symbolic formulation of a D3Q19 BGK collision step. Increasing the neighborhood communication from 9 to 19 cells leads to a significant performance "regression": The 3D kernel yields ~ 360 MLUPS compared to the 2D version's ~ 820 MLUPS.
author: Adrian Kummerlaender 2019-06-15 20:45:27 +0200
committer: Adrian Kummerlaender 2019-06-15 20:54:56 +0200
commit: d71faec93ec0a55c46810e0d178b2803ee89130c (patch)
tree: 3c35650637615af20668a5ec7bf974b2c05b248b /template
parent: c43d3f38b6922d36d15e8ba2b6ce17ddb0c75b0a (diff)
download: symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.gz
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.bz2
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.lz
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.xz
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.zst
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.zip
1 files changed, 15 insertions, 11 deletions
diff --git a/template/kernel.mako b/template/kernel.mako
index 7e930af..79cae66 100644
--- a/template/kernel.mako
+++ b/template/kernel.mako
@@ -1,7 +1,7 @@
 __kernel void equilibrilize(__global __write_only float* f_next,
                             __global __write_only float* f_prev)
 {
-    const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0);
+    const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0);
 
     __global __write_only float* preshifted_f_next = f_next + gid;
     __global __write_only float* preshifted_f_prev = f_prev + gid;
@@ -17,21 +17,25 @@ __kernel void equilibrilize(__global __write_only float* f_next,
 }
 
 <%
-def direction_index(c_i):
-    return (c_i[0]+1) + 3*(1-c_i[1])
-
 def neighbor_offset(c_i):
-    if c_i[1] == 0:
-        return c_i[0]
-    else:
-        return c_i[1]*geometry.size_x + c_i[0]
+    if descriptor.d == 2:
+        if c_i[1] == 0:
+            return c_i[0]
+        else:
+            return c_i[1]*geometry.size_x + c_i[0]
+    elif descriptor.d == 3:
+        if c_i[1] == 0:
+            return c_i[2]*geometry.size_x*geometry.size_y + c_i[0]
+        else:
+            return c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0]
+
 %>
 
 __kernel void collide_and_stream(__global __write_only float* f_next,
                                  __global __read_only  float* f_prev,
                                  __global __read_only  int* material)
 {
-    const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0);
+    const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0);
 
     const int m = material[gid];
 
@@ -43,7 +47,7 @@ __kernel void collide_and_stream(__global __write_only float* f_next,
     __global __read_only  float* preshifted_f_prev = f_prev + gid;
 
 % for i, c_i in enumerate(descriptor.c):
-    const float f_curr_${i} = preshifted_f_prev[${direction_index(c_i)*geometry.volume + neighbor_offset(-c_i)}];
+    const float f_curr_${i} = preshifted_f_prev[${i*geometry.volume + neighbor_offset(-c_i)}];
 % endfor
 
 % for i, expr in enumerate(moments_subexpr):
@@ -72,7 +76,7 @@ __kernel void collide_and_stream(__global __write_only float* f_next,
 __kernel void collect_moments(__global __read_only  float* f,
                               __global __write_only float* moments)
 {
-    const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0);
+    const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0);
 
     __global __read_only float* preshifted_f = f + gid;
author	Adrian Kummerlaender	2019-06-15 20:45:27 +0200
committer	Adrian Kummerlaender	2019-06-15 20:54:56 +0200
commit	d71faec93ec0a55c46810e0d178b2803ee89130c (patch)
tree	3c35650637615af20668a5ec7bf974b2c05b248b /template
parent	c43d3f38b6922d36d15e8ba2b6ce17ddb0c75b0a (diff)
download	symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.gz symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.bz2 symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.lz symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.xz symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.zst symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.zip