Add platform, precision and thread layout parameters

author: Adrian Kummerlaender 2019-06-22 14:44:50 +0200
committer: Adrian Kummerlaender 2019-06-22 14:44:50 +0200
commit: 4df2d7678755c652f7af3d579a812dfc091a00e6 (patch)
tree: 16f83f3292c14a718580e365f2e35ab4ceffbe52
parent: 2129316819f9e9ea2c849f8d3001cf452c51fe4d (diff)
download: symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar
symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar.gz
symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar.bz2
symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar.lz
symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar.xz
symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar.zst
symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.zip
2 files changed, 58 insertions, 38 deletions
diff --git a/simulation.py b/simulation.py
index cbc9840..20b87c8 100644
--- a/simulation.py
+++ b/simulation.py
@@ -35,7 +35,11 @@ class Geometry:
             return (self.size_x-2, self.size_y-2, self.size_z-2)
 
 class Lattice:
-    def __init__(self, descriptor, geometry, moments, collide, pop_eq_src = '', boundary_src = '', opengl = False):
+    def __init__(self,
+        descriptor, geometry, moments, collide,
+        pop_eq_src = '', boundary_src = '',
+        platform = 0, precision = 'single', layout = None, opengl = False
+    ):
         self.descriptor = descriptor
         self.geometry   = geometry
 
@@ -45,7 +49,17 @@ class Lattice:
         self.pop_eq_src = pop_eq_src
         self.boundary_src = boundary_src
 
-        self.platform = cl.get_platforms()[0]
+        self.float_type = {
+            'single': (numpy.float32, 'float'),
+            'double': (numpy.float64, 'double'),
+        }.get(precision, None)
+
+        self.compiler_args = {
+            'single': '-cl-single-precision-constant -cl-fast-relaxed-math',
+            'double': '-cl-fast-relaxed-math'
+        }.get(precision, None)
+
+        self.platform = cl.get_platforms()[platform]
         if opengl:
             self.context = cl.Context(
                 properties=[(cl.context_properties.PLATFORM, self.platform)] + get_gl_sharing_context_properties())
@@ -56,16 +70,15 @@ class Lattice:
 
         self.np_material = numpy.ndarray(shape=(self.geometry.volume, 1), dtype=numpy.int32)
 
-        self.tick = True
-
-        self.pop_size     = descriptor.q     * self.geometry.volume * numpy.float32(0).nbytes
-        self.moments_size = (descriptor.d+1) * self.geometry.volume * numpy.float32(0).nbytes
+        self.pop_size     = descriptor.q     * self.geometry.volume * self.float_type[0](0).nbytes
+        self.moments_size = (descriptor.d+1) * self.geometry.volume * self.float_type[0](0).nbytes
 
+        self.tick = True
         self.cl_pop_a = cl.Buffer(self.context, mf.READ_WRITE, size=self.pop_size)
         self.cl_pop_b = cl.Buffer(self.context, mf.READ_WRITE, size=self.pop_size)
 
         if opengl:
-            self.np_moments = numpy.ndarray(shape=(self.geometry.volume, 4), dtype=numpy.float32)
+            self.np_moments = numpy.ndarray(shape=(self.geometry.volume, 4), dtype=self.float_type[0])
             self.gl_moments = vbo.VBO(data=self.np_moments, usage=gl.GL_DYNAMIC_DRAW, target=gl.GL_ARRAY_BUFFER)
             self.gl_moments.bind()
             self.cl_gl_moments  = cl.GLBuffer(self.context, mf.READ_WRITE, int(self.gl_moments))
@@ -76,11 +89,14 @@ class Lattice:
 
         self.build_kernel()
 
-        self.layout = {
-            (2, 9): (32,1),
-            (3,19): (32,1,1),
-            (3,27): (32,1,1)
-        }.get((descriptor.d, descriptor.q), None)
+        if layout == None:
+            self.layout = {
+                (2, 9): (32,1),
+                (3,19): (32,1,1),
+                (3,27): (32,1,1)
+            }.get((descriptor.d, descriptor.q), None)
+        else:
+            self.layout = layout
 
         self.program.equilibrilize(
             self.queue, self.geometry.size(), self.layout, self.cl_pop_a, self.cl_pop_b).wait()
@@ -104,18 +120,22 @@ class Lattice:
             collide_subexpr    = self.collide[0],
             collide_assignment = self.collide[1],
 
+            float_type = self.float_type[1],
+
             pop_eq_src = Template(self.pop_eq_src).render(
                 descriptor = self.descriptor,
-                geometry   = self.geometry
+                geometry   = self.geometry,
+                float_type = self.float_type[1]
             ),
             boundary_src = Template(self.boundary_src).render(
                 descriptor = self.descriptor,
-                geometry   = self.geometry
+                geometry   = self.geometry,
+                float_type = self.float_type[1]
             ),
 
             ccode = sympy.ccode
         )
-        self.program = cl.Program(self.context, program_src).build('-cl-single-precision-constant -cl-fast-relaxed-math')
+        self.program = cl.Program(self.context, program_src).build(self.compiler_args)
 
     def evolve(self):
         if self.tick:
@@ -131,7 +151,7 @@ class Lattice:
         self.queue.finish()
 
     def get_moments(self):
-        moments = numpy.ndarray(shape=(self.descriptor.d+1, self.geometry.volume), dtype=numpy.float32)
+        moments = numpy.ndarray(shape=(self.descriptor.d+1, self.geometry.volume), dtype=self.float_type[0])
 
         if self.tick:
             self.program.collect_moments(
diff --git a/template/kernel.mako b/template/kernel.mako
index 417851a..41edcbf 100644
--- a/template/kernel.mako
+++ b/template/kernel.mako
@@ -9,13 +9,13 @@ def pop_offset(i):
     return i * geometry.volume
 %>
 
-__kernel void equilibrilize(__global __write_only float* f_next,
-                            __global __write_only float* f_prev)
+__kernel void equilibrilize(__global __write_only ${float_type}* f_next,
+                            __global __write_only ${float_type}* f_prev)
 {
     const unsigned int gid = ${gid()};
 
-    __global __write_only float* preshifted_f_next = f_next + gid;
-    __global __write_only float* preshifted_f_prev = f_prev + gid;
+    __global __write_only ${float_type}* preshifted_f_next = f_next + gid;
+    __global __write_only ${float_type}* preshifted_f_prev = f_prev + gid;
 
 % if pop_eq_src == '':
 %     for i, w_i in enumerate(descriptor.w):
@@ -36,8 +36,8 @@ def neighbor_offset(c_i):
 
 %>
 
-__kernel void collide_and_stream(__global __write_only float* f_next,
-                                 __global __read_only  float* f_prev,
+__kernel void collide_and_stream(__global __write_only ${float_type}* f_next,
+                                 __global __read_only  ${float_type}* f_prev,
                                  __global __read_only  int* material)
 {
     const unsigned int gid = ${gid()};
@@ -48,29 +48,29 @@ __kernel void collide_and_stream(__global __write_only float* f_next,
         return;
     }
 
-    __global __write_only float* preshifted_f_next = f_next + gid;
-    __global __read_only  float* preshifted_f_prev = f_prev + gid;
+    __global __write_only ${float_type}* preshifted_f_next = f_next + gid;
+    __global __read_only  ${float_type}* preshifted_f_prev = f_prev + gid;
 
 % for i, c_i in enumerate(descriptor.c):
-    const float f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}];
+    const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}];
 % endfor
 
 % for i, expr in enumerate(moments_subexpr):
-    const float ${expr[0]} = ${ccode(expr[1])};
+    const ${float_type} ${expr[0]} = ${ccode(expr[1])};
 % endfor
 
 % for i, expr in enumerate(moments_assignment):
-    float ${ccode(expr)}
+    ${float_type} ${ccode(expr)}
 % endfor
 
   ${boundary_src}
 
 % for i, expr in enumerate(collide_subexpr):
-    const float ${expr[0]} = ${ccode(expr[1])};
+    const ${float_type} ${expr[0]} = ${ccode(expr[1])};
 % endfor
 
 % for i, expr in enumerate(collide_assignment):
-    const float ${ccode(expr)}
+    const ${float_type} ${ccode(expr)}
 % endfor
 
 % for i in range(0,descriptor.q):
@@ -78,19 +78,19 @@ __kernel void collide_and_stream(__global __write_only float* f_next,
 % endfor
 }
 
-__kernel void collect_moments(__global __read_only  float* f,
-                              __global __write_only float* moments)
+__kernel void collect_moments(__global __read_only  ${float_type}* f,
+                              __global __write_only ${float_type}* moments)
 {
     const unsigned int gid = ${gid()};
 
-    __global __read_only float* preshifted_f = f + gid;
+    __global __read_only ${float_type}* preshifted_f = f + gid;
 
 % for i in range(0,descriptor.q):
-    const float f_curr_${i} = preshifted_f[${pop_offset(i)}];
+    const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}];
 % endfor
 
 % for i, expr in enumerate(moments_subexpr):
-    const float ${expr[0]} = ${ccode(expr[1])};
+    const ${float_type} ${expr[0]} = ${ccode(expr[1])};
 % endfor
 
 % for i, expr in enumerate(moments_assignment):
@@ -98,19 +98,19 @@ __kernel void collect_moments(__global __read_only  float* f,
 % endfor
 }
 
-__kernel void collect_gl_moments(__global __read_only  float* f,
+__kernel void collect_gl_moments(__global __read_only  ${float_type}* f,
                                  __global __write_only float4* moments)
 {
     const unsigned int gid = ${gid()};
 
-    __global __read_only float* preshifted_f = f + gid;
+    __global __read_only ${float_type}* preshifted_f = f + gid;
 
 % for i in range(0,descriptor.q):
-    const float f_curr_${i} = preshifted_f[${pop_offset(i)}];
+    const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}];
 % endfor
 
 % for i, expr in enumerate(moments_subexpr):
-    const float ${expr[0]} = ${ccode(expr[1])};
+    const ${float_type} ${expr[0]} = ${ccode(expr[1])};
 % endfor
 
     float4 data;
author	Adrian Kummerlaender	2019-06-22 14:44:50 +0200
committer	Adrian Kummerlaender	2019-06-22 14:44:50 +0200
commit	4df2d7678755c652f7af3d579a812dfc091a00e6 (patch)
tree	16f83f3292c14a718580e365f2e35ab4ceffbe52
parent	2129316819f9e9ea2c849f8d3001cf452c51fe4d (diff)
download	symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar.gz symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar.bz2 symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar.lz symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar.xz symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.tar.zst symlbm_playground-4df2d7678755c652f7af3d579a812dfc091a00e6.zip