aboutsummaryrefslogtreecommitdiff
path: root/boltzgen/kernel/template/update_sss_control_structure.cl.mako
diff options
context:
space:
mode:
authorAdrian Kummerlaender2020-01-11 00:11:28 +0100
committerAdrian Kummerlaender2020-01-11 00:15:08 +0100
commitb5a24f31871d900342a3c47398cc75e22bad0b6f (patch)
tree5fa2889803d28f21536cbf964eca3850f7a66700 /boltzgen/kernel/template/update_sss_control_structure.cl.mako
parentaa509dd4ebbb9d1d8ad6ebfe05111228fd9ae7c0 (diff)
downloadboltzgen-b5a24f31871d900342a3c47398cc75e22bad0b6f.tar
boltzgen-b5a24f31871d900342a3c47398cc75e22bad0b6f.tar.gz
boltzgen-b5a24f31871d900342a3c47398cc75e22bad0b6f.tar.bz2
boltzgen-b5a24f31871d900342a3c47398cc75e22bad0b6f.tar.lz
boltzgen-b5a24f31871d900342a3c47398cc75e22bad0b6f.tar.xz
boltzgen-b5a24f31871d900342a3c47398cc75e22bad0b6f.tar.zst
boltzgen-b5a24f31871d900342a3c47398cc75e22bad0b6f.zip
Implement SSS for OpenCL target
Sadly OpenCL kernels don't accept pointer-to-pointer arguments which complicates the control structure implementation. A workaround is to cast them into `uintptr_t` which is guaranteed to be large enough to fit any pointer on the device. Special care has to be taken to always perform the pointer shifts on actual floating point pointers and not on type-less pointers.
Diffstat (limited to 'boltzgen/kernel/template/update_sss_control_structure.cl.mako')
-rw-r--r--boltzgen/kernel/template/update_sss_control_structure.cl.mako21
1 files changed, 21 insertions, 0 deletions
diff --git a/boltzgen/kernel/template/update_sss_control_structure.cl.mako b/boltzgen/kernel/template/update_sss_control_structure.cl.mako
new file mode 100644
index 0000000..ec6a5d5
--- /dev/null
+++ b/boltzgen/kernel/template/update_sss_control_structure.cl.mako
@@ -0,0 +1,21 @@
+<%
+if streaming != 'SSS':
+ raise Exception('"update_sss_control_structure" function only makes sense for the SSS pattern')
+
+padding = (max(geometry.size_x,geometry.size_y,geometry.size_z)+1)**(descriptor.d-1)
+%>
+
+__kernel void init_sss_control_structure(__global ${float_type}* f, __global uintptr_t* control) {
+% for i, c_i in enumerate(descriptor.c):
+ control[${i}] = (uintptr_t)(f + ${padding + layout.pop_offset(i, 2*padding)});
+% endfor
+}
+
+__kernel void update_sss_control_structure(__global uintptr_t* control) {
+% for i, c_i in enumerate(descriptor.c):
+ __global ${float_type}* f_old_${i} = (__global ${float_type}*)(control[${i}]);
+% endfor
+% for i, c_i in enumerate(descriptor.c):
+ control[${i}] = (uintptr_t)(f_old_${descriptor.c.index(-c_i)} + ${layout.neighbor_offset(-c_i)});
+% endfor
+}