aboutsummaryrefslogtreecommitdiff
path: root/codegen_lbm.py
diff options
context:
space:
mode:
authorAdrian Kummerlaender2019-06-11 21:50:15 +0200
committerAdrian Kummerlaender2019-06-11 21:50:15 +0200
commitc91dd14efec94deaa7b94cfb6cc42e27fea66c34 (patch)
tree77c54459cef01bfbbcde5dbf0364852d8b17c34c /codegen_lbm.py
parentd9fe5bfdc59f2f637b5f30937462d205718041fb (diff)
downloadsymlbm_playground-c91dd14efec94deaa7b94cfb6cc42e27fea66c34.tar
symlbm_playground-c91dd14efec94deaa7b94cfb6cc42e27fea66c34.tar.gz
symlbm_playground-c91dd14efec94deaa7b94cfb6cc42e27fea66c34.tar.bz2
symlbm_playground-c91dd14efec94deaa7b94cfb6cc42e27fea66c34.tar.lz
symlbm_playground-c91dd14efec94deaa7b94cfb6cc42e27fea66c34.tar.xz
symlbm_playground-c91dd14efec94deaa7b94cfb6cc42e27fea66c34.tar.zst
symlbm_playground-c91dd14efec94deaa7b94cfb6cc42e27fea66c34.zip
Preshift population field pointer
Now averaging ~ 820 MLUPS again
Diffstat (limited to 'codegen_lbm.py')
-rw-r--r--codegen_lbm.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/codegen_lbm.py b/codegen_lbm.py
index e1519ba..ba2f0ba 100644
--- a/codegen_lbm.py
+++ b/codegen_lbm.py
@@ -58,7 +58,7 @@ def neighbor_offset(c_i):
if c_i[1] == 0:
return c_i[0]
else:
- return (-c_i[1])*nX + c_i[0]
+ return c_i[1]*nX + c_i[0]
%>
__kernel void collide_and_stream(__global __write_only float* f_a,
@@ -73,8 +73,10 @@ __kernel void collide_and_stream(__global __write_only float* f_a,
return;
}
+ __global __read_only float* preshifted_f_b = f_b + gid;
+
% for i, c_i in enumerate(c):
- const float f_curr_${i} = f_b[${direction_index(c_i)*nCells + neighbor_offset(-c_i)}u + gid];
+ const float f_curr_${i} = preshifted_f_b[${direction_index(c_i)*nCells + neighbor_offset(-c_i)}];
% endfor
% for i, expr in enumerate(moments_helper):
@@ -108,8 +110,10 @@ __kernel void collect_moments(__global __read_only float* f,
{
const unsigned int gid = get_global_id(1)*${nX} + get_global_id(0);
+ __global __read_only float* preshifted_f = f + gid;
+
% for i in range(0,len(c)):
- const float f_curr_${i} = f[${i*nCells}u + gid];
+ const float f_curr_${i} = preshifted_f[${i*nCells}];
% endfor
% for i, expr in enumerate(moments_helper):