diff options
author | Adrian Kummerlaender | 2019-10-24 21:52:45 +0200 |
---|---|---|
committer | Adrian Kummerlaender | 2019-10-24 21:52:45 +0200 |
commit | 73de5f16efc696cf0c88beec086eb9a4df9098dd (patch) | |
tree | 543f1eee6f9ae8391838635268a83177dc5c1c2e | |
parent | b3d131b94f9417c0c4cd6733433c86ca780dde5e (diff) | |
download | boltzgen-73de5f16efc696cf0c88beec086eb9a4df9098dd.tar boltzgen-73de5f16efc696cf0c88beec086eb9a4df9098dd.tar.gz boltzgen-73de5f16efc696cf0c88beec086eb9a4df9098dd.tar.bz2 boltzgen-73de5f16efc696cf0c88beec086eb9a4df9098dd.tar.lz boltzgen-73de5f16efc696cf0c88beec086eb9a4df9098dd.tar.xz boltzgen-73de5f16efc696cf0c88beec086eb9a4df9098dd.tar.zst boltzgen-73de5f16efc696cf0c88beec086eb9a4df9098dd.zip |
Extract offset helper into target and layout specific classes
-rw-r--r-- | boltzgen.py | 12 | ||||
-rw-r--r-- | boltzgen/kernel/generator.py | 12 | ||||
-rw-r--r-- | boltzgen/kernel/target/cl.py | 19 | ||||
-rw-r--r-- | boltzgen/kernel/target/cpp.py | 45 | ||||
-rw-r--r-- | boltzgen/kernel/template/basic.cl.mako | 35 | ||||
-rw-r--r-- | boltzgen/kernel/template/basic.cpp.mako | 63 |
6 files changed, 103 insertions, 83 deletions
diff --git a/boltzgen.py b/boltzgen.py index 04651e4..e5a8588 100644 --- a/boltzgen.py +++ b/boltzgen.py @@ -4,16 +4,12 @@ import string from boltzgen import * argparser = argparse.ArgumentParser(description='Generate LBM kernels in various languages using a symbolic description.') -argparser.add_argument('language', help = 'Target language (currently either "opencl" or "cpp")') -argparser.add_argument( - '--layout', dest = 'layout', - help = 'Memory layout ("aos" or "soa" for C++, ignored for OpenCL') +argparser.add_argument('language', help = 'Target language (currently either "cl" or "cpp")') +argparser.add_argument('--layout', dest = 'layout', help = 'Memory layout ("AOS" or "SOA")', required = True) +argparser.add_argument('--precision', dest = 'precision', help = 'Floating precision ("float" or "double")', required = True) args = argparser.parse_args() -if args.language == 'cpp' and args.layout is None: - raise Exception('Please specify the memory layout') - lbm = LBM(D2Q9) generator = Generator( descriptor = D2Q9, @@ -22,5 +18,5 @@ generator = Generator( geometry = Geometry(1024,1024) -src = generator.kernel(args.language, 'double', args.layout, geometry) +src = generator.kernel(args.language, args.precision, args.layout, geometry) print(src) diff --git a/boltzgen/kernel/generator.py b/boltzgen/kernel/generator.py index c9fdb27..e897325 100644 --- a/boltzgen/kernel/generator.py +++ b/boltzgen/kernel/generator.py @@ -1,9 +1,11 @@ import sympy from mako.template import Template - from pathlib import Path +import kernel.target.cl +import kernel.target.cpp + class Generator: def __init__(self, descriptor, moments, collision, boundary = ''): self.descriptor = descriptor @@ -16,10 +18,16 @@ class Generator: if not template_path.exists(): raise Exception("Target '%s' not supported" % target) + layout_impl = eval("kernel.target.%s.%s" % (target, layout)) + if layout_impl is None: + raise Exception("Target '%s' doesn't support layout '%s'" % (target, layout)) + else: + layout_impl = layout_impl(self.descriptor, geometry) + return Template(filename = str(template_path)).render( descriptor = self.descriptor, geometry = geometry, - layout = layout, + layout = layout_impl, moments_subexpr = self.moments[0], moments_assignment = self.moments[1], diff --git a/boltzgen/kernel/target/cl.py b/boltzgen/kernel/target/cl.py new file mode 100644 index 0000000..a2fc819 --- /dev/null +++ b/boltzgen/kernel/target/cl.py @@ -0,0 +1,19 @@ +class SOA: + def __init__(self, descriptor, geometry): + self.descriptor = descriptor + self.geometry = geometry + + def gid(self): + return { + 2: 'get_global_id(1)*%d + get_global_id(0)' % self.geometry.size_x, + 3: 'get_global_id(2)*%d + get_global_id(1)*%d + get_global_id(0)' % (self.geometry.size_x*self.geometry.size_y, self.geometry.size_x) + }.get(self.descriptor.d) + + def pop_offset(self, i): + return i * self.geometry.volume + + def neighbor_offset(self, c_i): + return { + 2: lambda: c_i[1]*self.geometry.size_x + c_i[0], + 3: lambda: c_i[2]*self.geometry.size_x*self.geometry.size_y + c_i[1]*self.geometry.size_x + c_i[0] + }.get(self.descriptor.d)() diff --git a/boltzgen/kernel/target/cpp.py b/boltzgen/kernel/target/cpp.py new file mode 100644 index 0000000..de79716 --- /dev/null +++ b/boltzgen/kernel/target/cpp.py @@ -0,0 +1,45 @@ +class AOS: + def __init__(self, descriptor, geometry): + self.descriptor = descriptor + self.geometry = geometry + + def gid_offset(self): + return self.descriptor.q + + def pop_offset(self, i): + return i + + def neighbor_offset(self, c_i): + return self.descriptor.q * { + 2: lambda: c_i[0]*self.geometry.size_y + c_i[1], + 3: lambda: c_i[0]*self.geometry.size_y*self.geometry.size_z + c_i[1]*self.geometry.size_z + c_i[2] + }.get(self.descriptor.d)() + + def padding(self): + return self.descriptor.q * { + 2: lambda: 1*self.geometry.size_y + 1, + 3: lambda: 1*self.geometry.size_y*self.geometry.size_z + 1*self.geometry.size_z + 1 + }.get(self.descriptor.d)() + +class SOA: + def __init__(self, descriptor, geometry): + self.descriptor = descriptor + self.geometry = geometry + + def gid_offset(self): + return 1 + + def pop_offset(self, i): + return i * self.geometry.volume + + def neighbor_offset(self, c_i): + return { + 2: lambda: c_i[0]*self.geometry.size_y + c_i[1], + 3: lambda: c_i[0]*self.geometry.size_y*self.geometry.size_z + c_i[1]*self.geometry.size_z + c_i[2] + }.get(self.descriptor.d)() + + def padding(self): + return { + 2: lambda: 1*self.geometry.size_y + 1, + 3: lambda: 1*self.geometry.size_y*self.geometry.size_z + 1*self.geometry.size_z + 1 + }.get(self.descriptor.d)() diff --git a/boltzgen/kernel/template/basic.cl.mako b/boltzgen/kernel/template/basic.cl.mako index 1b02c63..b64a480 100644 --- a/boltzgen/kernel/template/basic.cl.mako +++ b/boltzgen/kernel/template/basic.cl.mako @@ -1,20 +1,3 @@ -<% -def gid(): - return { - 2: 'get_global_id(1)*%d + get_global_id(0)' % geometry.size_x, - 3: 'get_global_id(2)*%d + get_global_id(1)*%d + get_global_id(0)' % (geometry.size_x*geometry.size_y, geometry.size_x) - }.get(descriptor.d) - -def pop_offset(i): - return i * geometry.volume - -def neighbor_offset(c_i): - return { - 2: lambda: c_i[1]*geometry.size_x + c_i[0], - 3: lambda: c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0] - }.get(descriptor.d)() -%> - % if float_type == 'double': #if defined(cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64 : enable @@ -26,14 +9,14 @@ def neighbor_offset(c_i): __kernel void equilibrilize(__global ${float_type}* f_next, __global ${float_type}* f_prev) { - const unsigned int gid = ${gid()}; + const unsigned int gid = ${layout.gid()}; __global ${float_type}* preshifted_f_next = f_next + gid; __global ${float_type}* preshifted_f_prev = f_prev + gid; % for i, w_i in enumerate(descriptor.w): - preshifted_f_next[${pop_offset(i)}] = ${w_i}.f; - preshifted_f_prev[${pop_offset(i)}] = ${w_i}.f; + preshifted_f_next[${layout.pop_offset(i)}] = ${w_i}.f; + preshifted_f_prev[${layout.pop_offset(i)}] = ${w_i}.f; % endfor } @@ -42,7 +25,7 @@ __kernel void collide_and_stream(__global ${float_type}* f_next, __global int* material, unsigned int time) { - const unsigned int gid = ${gid()}; + const unsigned int gid = ${layout.gid()}; const int m = material[gid]; @@ -54,7 +37,7 @@ __kernel void collide_and_stream(__global ${float_type}* f_next, __global ${float_type}* preshifted_f_prev = f_prev + gid; % for i, c_i in enumerate(descriptor.c): - const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}]; + const ${float_type} f_curr_${i} = preshifted_f_prev[${layout.pop_offset(i) + layout.neighbor_offset(-c_i)}]; % endfor % for i, expr in enumerate(moments_subexpr): @@ -76,19 +59,19 @@ __kernel void collide_and_stream(__global ${float_type}* f_next, % endfor % for i in range(0,descriptor.q): - preshifted_f_next[${pop_offset(i)}] = f_next_${i}; + preshifted_f_next[${layout.pop_offset(i)}] = f_next_${i}; % endfor } __kernel void collect_moments(__global ${float_type}* f, __global ${float_type}* moments) { - const unsigned int gid = ${gid()}; + const unsigned int gid = ${layout.gid()}; __global ${float_type}* preshifted_f = f + gid; % for i in range(0,descriptor.q): - const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}]; + const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(i)}]; % endfor % for i, expr in enumerate(moments_subexpr): @@ -96,6 +79,6 @@ __kernel void collect_moments(__global ${float_type}* f, % endfor % for i, expr in enumerate(moments_assignment): - moments[${pop_offset(i)} + gid] = ${ccode(expr.rhs)}; + moments[${layout.pop_offset(i)} + gid] = ${ccode(expr.rhs)}; % endfor } diff --git a/boltzgen/kernel/template/basic.cpp.mako b/boltzgen/kernel/template/basic.cpp.mako index d969d60..529c1de 100644 --- a/boltzgen/kernel/template/basic.cpp.mako +++ b/boltzgen/kernel/template/basic.cpp.mako @@ -1,45 +1,13 @@ -<% -def gid_offset(): - return { - 'soa': 1, - 'aos': descriptor.q - }.get(layout); - -def pop_offset(i): - return { - 'soa': i * geometry.volume, - 'aos': i - }.get(layout); - -def neighbor_offset(c_i): - return { - 2: lambda: c_i[0]*geometry.size_y + c_i[1], - 3: lambda: c_i[0]*geometry.size_y*geometry.size_z + c_i[1]*geometry.size_z + c_i[2] - }.get(descriptor.d)() * { - 'soa': 1, - 'aos': descriptor.q - }.get(layout); - -def padding(): - return { - 2: lambda: 1*geometry.size_y + 1, - 3: lambda: 1*geometry.size_y*geometry.size_z + 1*geometry.size_z + 1 - }.get(descriptor.d)() * { - 'soa': 1, - 'aos': descriptor.q - }.get(layout); -%> - void equilibrilize(${float_type}* f_next, ${float_type}* f_prev, std::size_t gid) { - ${float_type}* preshifted_f_next = f_next + gid*${gid_offset()}; - ${float_type}* preshifted_f_prev = f_prev + gid*${gid_offset()}; + ${float_type}* preshifted_f_next = f_next + gid*${layout.gid_offset()}; + ${float_type}* preshifted_f_prev = f_prev + gid*${layout.gid_offset()}; % for i, w_i in enumerate(descriptor.w): - preshifted_f_next[${pop_offset(i)}] = ${w_i.evalf()}; - preshifted_f_prev[${pop_offset(i)}] = ${w_i.evalf()}; + preshifted_f_next[${layout.pop_offset(i)}] = ${w_i.evalf()}; + preshifted_f_prev[${layout.pop_offset(i)}] = ${w_i.evalf()}; % endfor } @@ -47,11 +15,11 @@ void collide_and_stream( ${float_type}* f_next, const ${float_type}* f_prev, std::size_t gid) { - ${float_type}* preshifted_f_next = f_next + gid*${gid_offset()}; - const ${float_type}* preshifted_f_prev = f_prev + gid*${gid_offset()}; + ${float_type}* preshifted_f_next = f_next + gid*${layout.gid_offset()}; + const ${float_type}* preshifted_f_prev = f_prev + gid*${layout.gid_offset()}; % for i, c_i in enumerate(descriptor.c): - const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}]; + const ${float_type} f_curr_${i} = preshifted_f_prev[${layout.pop_offset(i) + layout.neighbor_offset(-c_i)}]; % endfor % for i, expr in enumerate(moments_subexpr): @@ -71,7 +39,7 @@ void collide_and_stream( ${float_type}* f_next, % endfor % for i, expr in enumerate(collision_assignment): - preshifted_f_next[${pop_offset(i)}] = f_next_${i}; + preshifted_f_next[${layout.pop_offset(i)}] = f_next_${i}; % endfor } @@ -80,10 +48,10 @@ void collect_moments(const ${float_type}* f, ${float_type}& rho, ${float_type} u[${descriptor.d}]) { - const ${float_type}* preshifted_f = f + gid*${gid_offset()}; + const ${float_type}* preshifted_f = f + gid*${layout.gid_offset()}; % for i in range(0,descriptor.q): - const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}]; + const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(i)}]; % endfor % for i, expr in enumerate(moments_subexpr): @@ -101,18 +69,19 @@ void collect_moments(const ${float_type}* f, void test(std::size_t nStep) { - auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()}); - auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()}); + auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*layout.padding()}); + auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*layout.padding()}); auto material = std::make_unique<int[]>(${geometry.volume}); // buffers are padded by maximum neighbor overreach to prevent invalid memory access - ${float_type}* f_prev = f_a.get() + ${padding()}; - ${float_type}* f_next = f_b.get() + ${padding()}; + ${float_type}* f_prev = f_a.get() + ${layout.padding()}; + ${float_type}* f_next = f_b.get() + ${layout.padding()}; for (int iX = 0; iX < ${geometry.size_x}; ++iX) { for (int iY = 0; iY < ${geometry.size_y}; ++iY) { for (int iZ = 0; iZ < ${geometry.size_z}; ++iZ) { - if (iX == 0 || iY == 0 || iZ == 0 || iX == ${geometry.size_x-1} || iY == ${geometry.size_y-1} || iZ == ${geometry.size_z-1}) { + if (iX == 0 || iY == 0 || iZ == 0 || + iX == ${geometry.size_x-1} || iY == ${geometry.size_y-1} || iZ == ${geometry.size_z-1}) { material[iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ] = 0; } else { material[iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ] = 1; |