From 72286b36e8ce478af9f62507b5aea762db4ea46b Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Sun, 6 Oct 2019 14:03:02 +0200 Subject: Use OpenCL access qualifiers only for image objects It seems I was overeager in adding those qualifiers to non-image buffers as they are only defined by the standard in relation to image objects. Adding the qualifiers to normal buffers causes no observable performance difference on Nvidia targets and fails compilation when targeting AMD or Intel. --- template/kernel.mako | 24 ++++++++++++------------ template/opengl.mako | 14 +++++++------- template/particles.mako | 8 ++++---- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/template/kernel.mako b/template/kernel.mako index 025baf5..bf7b29b 100644 --- a/template/kernel.mako +++ b/template/kernel.mako @@ -9,13 +9,13 @@ def pop_offset(i): return i * memory.volume %> -__kernel void equilibrilize(__global __write_only ${float_type}* f_next, - __global __write_only ${float_type}* f_prev) +__kernel void equilibrilize(__global ${float_type}* f_next, + __global ${float_type}* f_prev) { const unsigned int gid = ${gid()}; - __global __write_only ${float_type}* preshifted_f_next = f_next + gid; - __global __write_only ${float_type}* preshifted_f_prev = f_prev + gid; + __global ${float_type}* preshifted_f_next = f_next + gid; + __global ${float_type}* preshifted_f_prev = f_prev + gid; % if pop_eq_src == '': % for i, w_i in enumerate(descriptor.w): @@ -36,9 +36,9 @@ def neighbor_offset(c_i): %> -__kernel void collide_and_stream(__global __write_only ${float_type}* f_next, - __global __read_only ${float_type}* f_prev, - __global __read_only int* material, +__kernel void collide_and_stream(__global ${float_type}* f_next, + __global ${float_type}* f_prev, + __global int* material, unsigned int time) { const unsigned int gid = ${gid()}; @@ -49,8 +49,8 @@ __kernel void collide_and_stream(__global __write_only ${float_type}* f_next, return; } - __global __write_only ${float_type}* preshifted_f_next = f_next + gid; - __global __read_only ${float_type}* preshifted_f_prev = f_prev + gid; + __global ${float_type}* preshifted_f_next = f_next + gid; + __global ${float_type}* preshifted_f_prev = f_prev + gid; % for i, c_i in enumerate(descriptor.c): const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}]; @@ -79,12 +79,12 @@ __kernel void collide_and_stream(__global __write_only ${float_type}* f_next, % endfor } -__kernel void collect_moments(__global __read_only ${float_type}* f, - __global __write_only ${float_type}* moments) +__kernel void collect_moments(__global ${float_type}* f, + __global ${float_type}* moments) { const unsigned int gid = ${gid()}; - __global __read_only ${float_type}* preshifted_f = f + gid; + __global ${float_type}* preshifted_f = f + gid; % for i in range(0,descriptor.q): const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}]; diff --git a/template/opengl.mako b/template/opengl.mako index 52699d5..181101c 100644 --- a/template/opengl.mako +++ b/template/opengl.mako @@ -15,13 +15,13 @@ def moments_cell(): }.get(descriptor.d) %> -__kernel void collect_gl_moments(__global __read_only ${float_type}* f, - __global __read_only int* material, - __global __write_only float4* moments) +__kernel void collect_gl_moments(__global ${float_type}* f, + __global int* material, + __global float4* moments) { const unsigned int gid = ${gid()}; - __global __read_only ${float_type}* preshifted_f = f + gid; + __global ${float_type}* preshifted_f = f + gid; % for i in range(0,descriptor.q): const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}]; @@ -64,8 +64,8 @@ def neighbor_offset(c_i): %> -__kernel void collect_gl_moments_to_texture(__global __read_only ${float_type}* f, - __global __read_only int* material, +__kernel void collect_gl_moments_to_texture(__global ${float_type}* f, + __global int* material, % if descriptor.d == 2: __write_only image2d_t moments) % elif descriptor.d == 3: @@ -74,7 +74,7 @@ __kernel void collect_gl_moments_to_texture(__global __read_only ${float_type}* { const unsigned int gid = ${gid()}; - __global __read_only ${float_type}* preshifted_f = f + gid; + __global ${float_type}* preshifted_f = f + gid; % for i in range(0,descriptor.q): const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}]; diff --git a/template/particles.mako b/template/particles.mako index cee3f4b..48191d9 100644 --- a/template/particles.mako +++ b/template/particles.mako @@ -1,7 +1,7 @@ -__kernel void update_particles(__global __read_only float4* moments, - __global __read_only int* material, - __global __write_only float4* particles, - __global __read_only float4* init_particles, +__kernel void update_particles(__global float4* moments, + __global int* material, + __global float4* particles, + __global float4* init_particles, float aging) { const unsigned int pid = get_global_id(0); -- cgit v1.2.3