diff options
Add platform, precision and thread layout parameters
-rw-r--r-- | simulation.py | 52 | ||||
-rw-r--r-- | template/kernel.mako | 44 |
2 files changed, 58 insertions, 38 deletions
diff --git a/simulation.py b/simulation.py index cbc9840..20b87c8 100644 --- a/simulation.py +++ b/simulation.py @@ -35,7 +35,11 @@ class Geometry: return (self.size_x-2, self.size_y-2, self.size_z-2) class Lattice: - def __init__(self, descriptor, geometry, moments, collide, pop_eq_src = '', boundary_src = '', opengl = False): + def __init__(self, + descriptor, geometry, moments, collide, + pop_eq_src = '', boundary_src = '', + platform = 0, precision = 'single', layout = None, opengl = False + ): self.descriptor = descriptor self.geometry = geometry @@ -45,7 +49,17 @@ class Lattice: self.pop_eq_src = pop_eq_src self.boundary_src = boundary_src - self.platform = cl.get_platforms()[0] + self.float_type = { + 'single': (numpy.float32, 'float'), + 'double': (numpy.float64, 'double'), + }.get(precision, None) + + self.compiler_args = { + 'single': '-cl-single-precision-constant -cl-fast-relaxed-math', + 'double': '-cl-fast-relaxed-math' + }.get(precision, None) + + self.platform = cl.get_platforms()[platform] if opengl: self.context = cl.Context( properties=[(cl.context_properties.PLATFORM, self.platform)] + get_gl_sharing_context_properties()) @@ -56,16 +70,15 @@ class Lattice: self.np_material = numpy.ndarray(shape=(self.geometry.volume, 1), dtype=numpy.int32) - self.tick = True - - self.pop_size = descriptor.q * self.geometry.volume * numpy.float32(0).nbytes - self.moments_size = (descriptor.d+1) * self.geometry.volume * numpy.float32(0).nbytes + self.pop_size = descriptor.q * self.geometry.volume * self.float_type[0](0).nbytes + self.moments_size = (descriptor.d+1) * self.geometry.volume * self.float_type[0](0).nbytes + self.tick = True self.cl_pop_a = cl.Buffer(self.context, mf.READ_WRITE, size=self.pop_size) self.cl_pop_b = cl.Buffer(self.context, mf.READ_WRITE, size=self.pop_size) if opengl: - self.np_moments = numpy.ndarray(shape=(self.geometry.volume, 4), dtype=numpy.float32) + self.np_moments = numpy.ndarray(shape=(self.geometry.volume, 4), dtype=self.float_type[0]) self.gl_moments = vbo.VBO(data=self.np_moments, usage=gl.GL_DYNAMIC_DRAW, target=gl.GL_ARRAY_BUFFER) self.gl_moments.bind() self.cl_gl_moments = cl.GLBuffer(self.context, mf.READ_WRITE, int(self.gl_moments)) @@ -76,11 +89,14 @@ class Lattice: self.build_kernel() - self.layout = { - (2, 9): (32,1), - (3,19): (32,1,1), - (3,27): (32,1,1) - }.get((descriptor.d, descriptor.q), None) + if layout == None: + self.layout = { + (2, 9): (32,1), + (3,19): (32,1,1), + (3,27): (32,1,1) + }.get((descriptor.d, descriptor.q), None) + else: + self.layout = layout self.program.equilibrilize( self.queue, self.geometry.size(), self.layout, self.cl_pop_a, self.cl_pop_b).wait() @@ -104,18 +120,22 @@ class Lattice: collide_subexpr = self.collide[0], collide_assignment = self.collide[1], + float_type = self.float_type[1], + pop_eq_src = Template(self.pop_eq_src).render( descriptor = self.descriptor, - geometry = self.geometry + geometry = self.geometry, + float_type = self.float_type[1] ), boundary_src = Template(self.boundary_src).render( descriptor = self.descriptor, - geometry = self.geometry + geometry = self.geometry, + float_type = self.float_type[1] ), ccode = sympy.ccode ) - self.program = cl.Program(self.context, program_src).build('-cl-single-precision-constant -cl-fast-relaxed-math') + self.program = cl.Program(self.context, program_src).build(self.compiler_args) def evolve(self): if self.tick: @@ -131,7 +151,7 @@ class Lattice: self.queue.finish() def get_moments(self): - moments = numpy.ndarray(shape=(self.descriptor.d+1, self.geometry.volume), dtype=numpy.float32) + moments = numpy.ndarray(shape=(self.descriptor.d+1, self.geometry.volume), dtype=self.float_type[0]) if self.tick: self.program.collect_moments( diff --git a/template/kernel.mako b/template/kernel.mako index 417851a..41edcbf 100644 --- a/template/kernel.mako +++ b/template/kernel.mako @@ -9,13 +9,13 @@ def pop_offset(i): return i * geometry.volume %> -__kernel void equilibrilize(__global __write_only float* f_next, - __global __write_only float* f_prev) +__kernel void equilibrilize(__global __write_only ${float_type}* f_next, + __global __write_only ${float_type}* f_prev) { const unsigned int gid = ${gid()}; - __global __write_only float* preshifted_f_next = f_next + gid; - __global __write_only float* preshifted_f_prev = f_prev + gid; + __global __write_only ${float_type}* preshifted_f_next = f_next + gid; + __global __write_only ${float_type}* preshifted_f_prev = f_prev + gid; % if pop_eq_src == '': % for i, w_i in enumerate(descriptor.w): @@ -36,8 +36,8 @@ def neighbor_offset(c_i): %> -__kernel void collide_and_stream(__global __write_only float* f_next, - __global __read_only float* f_prev, +__kernel void collide_and_stream(__global __write_only ${float_type}* f_next, + __global __read_only ${float_type}* f_prev, __global __read_only int* material) { const unsigned int gid = ${gid()}; @@ -48,29 +48,29 @@ __kernel void collide_and_stream(__global __write_only float* f_next, return; } - __global __write_only float* preshifted_f_next = f_next + gid; - __global __read_only float* preshifted_f_prev = f_prev + gid; + __global __write_only ${float_type}* preshifted_f_next = f_next + gid; + __global __read_only ${float_type}* preshifted_f_prev = f_prev + gid; % for i, c_i in enumerate(descriptor.c): - const float f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}]; + const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}]; % endfor % for i, expr in enumerate(moments_subexpr): - const float ${expr[0]} = ${ccode(expr[1])}; + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; % endfor % for i, expr in enumerate(moments_assignment): - float ${ccode(expr)} + ${float_type} ${ccode(expr)} % endfor ${boundary_src} % for i, expr in enumerate(collide_subexpr): - const float ${expr[0]} = ${ccode(expr[1])}; + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; % endfor % for i, expr in enumerate(collide_assignment): - const float ${ccode(expr)} + const ${float_type} ${ccode(expr)} % endfor % for i in range(0,descriptor.q): @@ -78,19 +78,19 @@ __kernel void collide_and_stream(__global __write_only float* f_next, % endfor } -__kernel void collect_moments(__global __read_only float* f, - __global __write_only float* moments) +__kernel void collect_moments(__global __read_only ${float_type}* f, + __global __write_only ${float_type}* moments) { const unsigned int gid = ${gid()}; - __global __read_only float* preshifted_f = f + gid; + __global __read_only ${float_type}* preshifted_f = f + gid; % for i in range(0,descriptor.q): - const float f_curr_${i} = preshifted_f[${pop_offset(i)}]; + const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}]; % endfor % for i, expr in enumerate(moments_subexpr): - const float ${expr[0]} = ${ccode(expr[1])}; + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; % endfor % for i, expr in enumerate(moments_assignment): @@ -98,19 +98,19 @@ __kernel void collect_moments(__global __read_only float* f, % endfor } -__kernel void collect_gl_moments(__global __read_only float* f, +__kernel void collect_gl_moments(__global __read_only ${float_type}* f, __global __write_only float4* moments) { const unsigned int gid = ${gid()}; - __global __read_only float* preshifted_f = f + gid; + __global __read_only ${float_type}* preshifted_f = f + gid; % for i in range(0,descriptor.q): - const float f_curr_${i} = preshifted_f[${pop_offset(i)}]; + const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}]; % endfor % for i, expr in enumerate(moments_subexpr): - const float ${expr[0]} = ${ccode(expr[1])}; + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; % endfor float4 data; |