diff options
Add support for generating a D3Q19 kernel
Note how this basically required no changes besides generalizing cell indexing
and adding the symbolic formulation of a D3Q19 BGK collision step.
Increasing the neighborhood communication from 9 to 19 cells leads to a
significant performance "regression": The 3D kernel yields ~ 360 MLUPS
compared to the 2D version's ~ 820 MLUPS.
Diffstat (limited to 'template')
-rw-r--r-- | template/kernel.mako | 26 |
1 files changed, 15 insertions, 11 deletions
diff --git a/template/kernel.mako b/template/kernel.mako index 7e930af..79cae66 100644 --- a/template/kernel.mako +++ b/template/kernel.mako @@ -1,7 +1,7 @@ __kernel void equilibrilize(__global __write_only float* f_next, __global __write_only float* f_prev) { - const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0); + const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0); __global __write_only float* preshifted_f_next = f_next + gid; __global __write_only float* preshifted_f_prev = f_prev + gid; @@ -17,21 +17,25 @@ __kernel void equilibrilize(__global __write_only float* f_next, } <% -def direction_index(c_i): - return (c_i[0]+1) + 3*(1-c_i[1]) - def neighbor_offset(c_i): - if c_i[1] == 0: - return c_i[0] - else: - return c_i[1]*geometry.size_x + c_i[0] + if descriptor.d == 2: + if c_i[1] == 0: + return c_i[0] + else: + return c_i[1]*geometry.size_x + c_i[0] + elif descriptor.d == 3: + if c_i[1] == 0: + return c_i[2]*geometry.size_x*geometry.size_y + c_i[0] + else: + return c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0] + %> __kernel void collide_and_stream(__global __write_only float* f_next, __global __read_only float* f_prev, __global __read_only int* material) { - const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0); + const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0); const int m = material[gid]; @@ -43,7 +47,7 @@ __kernel void collide_and_stream(__global __write_only float* f_next, __global __read_only float* preshifted_f_prev = f_prev + gid; % for i, c_i in enumerate(descriptor.c): - const float f_curr_${i} = preshifted_f_prev[${direction_index(c_i)*geometry.volume + neighbor_offset(-c_i)}]; + const float f_curr_${i} = preshifted_f_prev[${i*geometry.volume + neighbor_offset(-c_i)}]; % endfor % for i, expr in enumerate(moments_subexpr): @@ -72,7 +76,7 @@ __kernel void collide_and_stream(__global __write_only float* f_next, __kernel void collect_moments(__global __read_only float* f, __global __write_only float* moments) { - const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0); + const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0); __global __read_only float* preshifted_f = f + gid; |