diff options
Implement SSS for OpenCL target
Sadly OpenCL kernels don't accept pointer-to-pointer arguments which
complicates the control structure implementation.
A workaround is to cast them into `uintptr_t` which is guaranteed to be large
enough to fit any pointer on the device. Special care has to be taken to always
perform the pointer shifts on actual floating point pointers and not on
type-less pointers.
Diffstat (limited to 'boltzgen/kernel/template/update_sss_control_structure.cl.mako')
-rw-r--r-- | boltzgen/kernel/template/update_sss_control_structure.cl.mako | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/boltzgen/kernel/template/update_sss_control_structure.cl.mako b/boltzgen/kernel/template/update_sss_control_structure.cl.mako new file mode 100644 index 0000000..ec6a5d5 --- /dev/null +++ b/boltzgen/kernel/template/update_sss_control_structure.cl.mako @@ -0,0 +1,21 @@ +<% +if streaming != 'SSS': + raise Exception('"update_sss_control_structure" function only makes sense for the SSS pattern') + +padding = (max(geometry.size_x,geometry.size_y,geometry.size_z)+1)**(descriptor.d-1) +%> + +__kernel void init_sss_control_structure(__global ${float_type}* f, __global uintptr_t* control) { +% for i, c_i in enumerate(descriptor.c): + control[${i}] = (uintptr_t)(f + ${padding + layout.pop_offset(i, 2*padding)}); +% endfor +} + +__kernel void update_sss_control_structure(__global uintptr_t* control) { +% for i, c_i in enumerate(descriptor.c): + __global ${float_type}* f_old_${i} = (__global ${float_type}*)(control[${i}]); +% endfor +% for i, c_i in enumerate(descriptor.c): + control[${i}] = (uintptr_t)(f_old_${descriptor.c.index(-c_i)} + ${layout.neighbor_offset(-c_i)}); +% endfor +} |