Use OpenCL access qualifiers only for image objects

It seems I was overeager in adding those qualifiers to non-image buffers as they are only defined by the standard in relation to image objects. Adding the qualifiers to normal buffers causes no observable performance difference on Nvidia targets and fails compilation when targeting AMD or Intel.
author: Adrian Kummerlaender 2019-10-06 14:03:02 +0200
committer: Adrian Kummerlaender 2019-10-06 14:03:02 +0200
commit: 72286b36e8ce478af9f62507b5aea762db4ea46b (patch)
tree: f8c9185facb2707e32c591446b19d50e4c39b515 /template/kernel.mako
parent: 95142dbc573d2792f021eb0eb7ae1c7cb96641b0 (diff)
download: symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.gz
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.bz2
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.lz
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.xz
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.zst
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.zip
1 files changed, 12 insertions, 12 deletions
diff --git a/template/kernel.mako b/template/kernel.mako
index 025baf5..bf7b29b 100644
--- a/template/kernel.mako
+++ b/template/kernel.mako
@@ -9,13 +9,13 @@ def pop_offset(i):
     return i * memory.volume
 %>
 
-__kernel void equilibrilize(__global __write_only ${float_type}* f_next,
-                            __global __write_only ${float_type}* f_prev)
+__kernel void equilibrilize(__global ${float_type}* f_next,
+                            __global ${float_type}* f_prev)
 {
     const unsigned int gid = ${gid()};
 
-    __global __write_only ${float_type}* preshifted_f_next = f_next + gid;
-    __global __write_only ${float_type}* preshifted_f_prev = f_prev + gid;
+    __global ${float_type}* preshifted_f_next = f_next + gid;
+    __global ${float_type}* preshifted_f_prev = f_prev + gid;
 
 % if pop_eq_src == '':
 %     for i, w_i in enumerate(descriptor.w):
@@ -36,9 +36,9 @@ def neighbor_offset(c_i):
 
 %>
 
-__kernel void collide_and_stream(__global __write_only ${float_type}* f_next,
-                                 __global __read_only  ${float_type}* f_prev,
-                                 __global __read_only  int* material,
+__kernel void collide_and_stream(__global ${float_type}* f_next,
+                                 __global ${float_type}* f_prev,
+                                 __global int* material,
                                  unsigned int time)
 {
     const unsigned int gid = ${gid()};
@@ -49,8 +49,8 @@ __kernel void collide_and_stream(__global __write_only ${float_type}* f_next,
         return;
     }
 
-    __global __write_only ${float_type}* preshifted_f_next = f_next + gid;
-    __global __read_only  ${float_type}* preshifted_f_prev = f_prev + gid;
+    __global ${float_type}* preshifted_f_next = f_next + gid;
+    __global ${float_type}* preshifted_f_prev = f_prev + gid;
 
 % for i, c_i in enumerate(descriptor.c):
     const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}];
@@ -79,12 +79,12 @@ __kernel void collide_and_stream(__global __write_only ${float_type}* f_next,
 % endfor
 }
 
-__kernel void collect_moments(__global __read_only  ${float_type}* f,
-                              __global __write_only ${float_type}* moments)
+__kernel void collect_moments(__global ${float_type}* f,
+                              __global ${float_type}* moments)
 {
     const unsigned int gid = ${gid()};
 
-    __global __read_only ${float_type}* preshifted_f = f + gid;
+    __global ${float_type}* preshifted_f = f + gid;
 
 % for i in range(0,descriptor.q):
     const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}];
author	Adrian Kummerlaender	2019-10-06 14:03:02 +0200
committer	Adrian Kummerlaender	2019-10-06 14:03:02 +0200
commit	72286b36e8ce478af9f62507b5aea762db4ea46b (patch)
tree	f8c9185facb2707e32c591446b19d50e4c39b515 /template/kernel.mako
parent	95142dbc573d2792f021eb0eb7ae1c7cb96641b0 (diff)
download	symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.gz symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.bz2 symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.lz symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.xz symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.zst symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.zip