From c91dd14efec94deaa7b94cfb6cc42e27fea66c34 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Tue, 11 Jun 2019 21:50:15 +0200 Subject: Preshift population field pointer Now averaging ~ 820 MLUPS again --- codegen_lbm.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/codegen_lbm.py b/codegen_lbm.py index e1519ba..ba2f0ba 100644 --- a/codegen_lbm.py +++ b/codegen_lbm.py @@ -58,7 +58,7 @@ def neighbor_offset(c_i): if c_i[1] == 0: return c_i[0] else: - return (-c_i[1])*nX + c_i[0] + return c_i[1]*nX + c_i[0] %> __kernel void collide_and_stream(__global __write_only float* f_a, @@ -73,8 +73,10 @@ __kernel void collide_and_stream(__global __write_only float* f_a, return; } + __global __read_only float* preshifted_f_b = f_b + gid; + % for i, c_i in enumerate(c): - const float f_curr_${i} = f_b[${direction_index(c_i)*nCells + neighbor_offset(-c_i)}u + gid]; + const float f_curr_${i} = preshifted_f_b[${direction_index(c_i)*nCells + neighbor_offset(-c_i)}]; % endfor % for i, expr in enumerate(moments_helper): @@ -108,8 +110,10 @@ __kernel void collect_moments(__global __read_only float* f, { const unsigned int gid = get_global_id(1)*${nX} + get_global_id(0); + __global __read_only float* preshifted_f = f + gid; + % for i in range(0,len(c)): - const float f_curr_${i} = f[${i*nCells}u + gid]; + const float f_curr_${i} = preshifted_f[${i*nCells}]; % endfor % for i, expr in enumerate(moments_helper): -- cgit v1.2.3