aboutsummaryrefslogtreecommitdiff
path: root/template/opengl.mako
diff options
context:
space:
mode:
authorAdrian Kummerlaender2019-10-06 14:03:02 +0200
committerAdrian Kummerlaender2019-10-06 14:03:02 +0200
commit72286b36e8ce478af9f62507b5aea762db4ea46b (patch)
treef8c9185facb2707e32c591446b19d50e4c39b515 /template/opengl.mako
parent95142dbc573d2792f021eb0eb7ae1c7cb96641b0 (diff)
downloadsymlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.gz
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.bz2
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.lz
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.xz
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.tar.zst
symlbm_playground-72286b36e8ce478af9f62507b5aea762db4ea46b.zip
Use OpenCL access qualifiers only for image objects
It seems I was overeager in adding those qualifiers to non-image buffers as they are only defined by the standard in relation to image objects. Adding the qualifiers to normal buffers causes no observable performance difference on Nvidia targets and fails compilation when targeting AMD or Intel.
Diffstat (limited to 'template/opengl.mako')
-rw-r--r--template/opengl.mako14
1 files changed, 7 insertions, 7 deletions
diff --git a/template/opengl.mako b/template/opengl.mako
index 52699d5..181101c 100644
--- a/template/opengl.mako
+++ b/template/opengl.mako
@@ -15,13 +15,13 @@ def moments_cell():
}.get(descriptor.d)
%>
-__kernel void collect_gl_moments(__global __read_only ${float_type}* f,
- __global __read_only int* material,
- __global __write_only float4* moments)
+__kernel void collect_gl_moments(__global ${float_type}* f,
+ __global int* material,
+ __global float4* moments)
{
const unsigned int gid = ${gid()};
- __global __read_only ${float_type}* preshifted_f = f + gid;
+ __global ${float_type}* preshifted_f = f + gid;
% for i in range(0,descriptor.q):
const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}];
@@ -64,8 +64,8 @@ def neighbor_offset(c_i):
%>
-__kernel void collect_gl_moments_to_texture(__global __read_only ${float_type}* f,
- __global __read_only int* material,
+__kernel void collect_gl_moments_to_texture(__global ${float_type}* f,
+ __global int* material,
% if descriptor.d == 2:
__write_only image2d_t moments)
% elif descriptor.d == 3:
@@ -74,7 +74,7 @@ __kernel void collect_gl_moments_to_texture(__global __read_only ${float_type}*
{
const unsigned int gid = ${gid()};
- __global __read_only ${float_type}* preshifted_f = f + gid;
+ __global ${float_type}* preshifted_f = f + gid;
% for i in range(0,descriptor.q):
const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}];