aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--boltzgen.py12
-rw-r--r--boltzgen/kernel/generator.py12
-rw-r--r--boltzgen/kernel/target/cl.py19
-rw-r--r--boltzgen/kernel/target/cpp.py45
-rw-r--r--boltzgen/kernel/template/basic.cl.mako35
-rw-r--r--boltzgen/kernel/template/basic.cpp.mako63
6 files changed, 103 insertions, 83 deletions
diff --git a/boltzgen.py b/boltzgen.py
index 04651e4..e5a8588 100644
--- a/boltzgen.py
+++ b/boltzgen.py
@@ -4,16 +4,12 @@ import string
from boltzgen import *
argparser = argparse.ArgumentParser(description='Generate LBM kernels in various languages using a symbolic description.')
-argparser.add_argument('language', help = 'Target language (currently either "opencl" or "cpp")')
-argparser.add_argument(
- '--layout', dest = 'layout',
- help = 'Memory layout ("aos" or "soa" for C++, ignored for OpenCL')
+argparser.add_argument('language', help = 'Target language (currently either "cl" or "cpp")')
+argparser.add_argument('--layout', dest = 'layout', help = 'Memory layout ("AOS" or "SOA")', required = True)
+argparser.add_argument('--precision', dest = 'precision', help = 'Floating precision ("float" or "double")', required = True)
args = argparser.parse_args()
-if args.language == 'cpp' and args.layout is None:
- raise Exception('Please specify the memory layout')
-
lbm = LBM(D2Q9)
generator = Generator(
descriptor = D2Q9,
@@ -22,5 +18,5 @@ generator = Generator(
geometry = Geometry(1024,1024)
-src = generator.kernel(args.language, 'double', args.layout, geometry)
+src = generator.kernel(args.language, args.precision, args.layout, geometry)
print(src)
diff --git a/boltzgen/kernel/generator.py b/boltzgen/kernel/generator.py
index c9fdb27..e897325 100644
--- a/boltzgen/kernel/generator.py
+++ b/boltzgen/kernel/generator.py
@@ -1,9 +1,11 @@
import sympy
from mako.template import Template
-
from pathlib import Path
+import kernel.target.cl
+import kernel.target.cpp
+
class Generator:
def __init__(self, descriptor, moments, collision, boundary = ''):
self.descriptor = descriptor
@@ -16,10 +18,16 @@ class Generator:
if not template_path.exists():
raise Exception("Target '%s' not supported" % target)
+ layout_impl = eval("kernel.target.%s.%s" % (target, layout))
+ if layout_impl is None:
+ raise Exception("Target '%s' doesn't support layout '%s'" % (target, layout))
+ else:
+ layout_impl = layout_impl(self.descriptor, geometry)
+
return Template(filename = str(template_path)).render(
descriptor = self.descriptor,
geometry = geometry,
- layout = layout,
+ layout = layout_impl,
moments_subexpr = self.moments[0],
moments_assignment = self.moments[1],
diff --git a/boltzgen/kernel/target/cl.py b/boltzgen/kernel/target/cl.py
new file mode 100644
index 0000000..a2fc819
--- /dev/null
+++ b/boltzgen/kernel/target/cl.py
@@ -0,0 +1,19 @@
+class SOA:
+ def __init__(self, descriptor, geometry):
+ self.descriptor = descriptor
+ self.geometry = geometry
+
+ def gid(self):
+ return {
+ 2: 'get_global_id(1)*%d + get_global_id(0)' % self.geometry.size_x,
+ 3: 'get_global_id(2)*%d + get_global_id(1)*%d + get_global_id(0)' % (self.geometry.size_x*self.geometry.size_y, self.geometry.size_x)
+ }.get(self.descriptor.d)
+
+ def pop_offset(self, i):
+ return i * self.geometry.volume
+
+ def neighbor_offset(self, c_i):
+ return {
+ 2: lambda: c_i[1]*self.geometry.size_x + c_i[0],
+ 3: lambda: c_i[2]*self.geometry.size_x*self.geometry.size_y + c_i[1]*self.geometry.size_x + c_i[0]
+ }.get(self.descriptor.d)()
diff --git a/boltzgen/kernel/target/cpp.py b/boltzgen/kernel/target/cpp.py
new file mode 100644
index 0000000..de79716
--- /dev/null
+++ b/boltzgen/kernel/target/cpp.py
@@ -0,0 +1,45 @@
+class AOS:
+ def __init__(self, descriptor, geometry):
+ self.descriptor = descriptor
+ self.geometry = geometry
+
+ def gid_offset(self):
+ return self.descriptor.q
+
+ def pop_offset(self, i):
+ return i
+
+ def neighbor_offset(self, c_i):
+ return self.descriptor.q * {
+ 2: lambda: c_i[0]*self.geometry.size_y + c_i[1],
+ 3: lambda: c_i[0]*self.geometry.size_y*self.geometry.size_z + c_i[1]*self.geometry.size_z + c_i[2]
+ }.get(self.descriptor.d)()
+
+ def padding(self):
+ return self.descriptor.q * {
+ 2: lambda: 1*self.geometry.size_y + 1,
+ 3: lambda: 1*self.geometry.size_y*self.geometry.size_z + 1*self.geometry.size_z + 1
+ }.get(self.descriptor.d)()
+
+class SOA:
+ def __init__(self, descriptor, geometry):
+ self.descriptor = descriptor
+ self.geometry = geometry
+
+ def gid_offset(self):
+ return 1
+
+ def pop_offset(self, i):
+ return i * self.geometry.volume
+
+ def neighbor_offset(self, c_i):
+ return {
+ 2: lambda: c_i[0]*self.geometry.size_y + c_i[1],
+ 3: lambda: c_i[0]*self.geometry.size_y*self.geometry.size_z + c_i[1]*self.geometry.size_z + c_i[2]
+ }.get(self.descriptor.d)()
+
+ def padding(self):
+ return {
+ 2: lambda: 1*self.geometry.size_y + 1,
+ 3: lambda: 1*self.geometry.size_y*self.geometry.size_z + 1*self.geometry.size_z + 1
+ }.get(self.descriptor.d)()
diff --git a/boltzgen/kernel/template/basic.cl.mako b/boltzgen/kernel/template/basic.cl.mako
index 1b02c63..b64a480 100644
--- a/boltzgen/kernel/template/basic.cl.mako
+++ b/boltzgen/kernel/template/basic.cl.mako
@@ -1,20 +1,3 @@
-<%
-def gid():
- return {
- 2: 'get_global_id(1)*%d + get_global_id(0)' % geometry.size_x,
- 3: 'get_global_id(2)*%d + get_global_id(1)*%d + get_global_id(0)' % (geometry.size_x*geometry.size_y, geometry.size_x)
- }.get(descriptor.d)
-
-def pop_offset(i):
- return i * geometry.volume
-
-def neighbor_offset(c_i):
- return {
- 2: lambda: c_i[1]*geometry.size_x + c_i[0],
- 3: lambda: c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0]
- }.get(descriptor.d)()
-%>
-
% if float_type == 'double':
#if defined(cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -26,14 +9,14 @@ def neighbor_offset(c_i):
__kernel void equilibrilize(__global ${float_type}* f_next,
__global ${float_type}* f_prev)
{
- const unsigned int gid = ${gid()};
+ const unsigned int gid = ${layout.gid()};
__global ${float_type}* preshifted_f_next = f_next + gid;
__global ${float_type}* preshifted_f_prev = f_prev + gid;
% for i, w_i in enumerate(descriptor.w):
- preshifted_f_next[${pop_offset(i)}] = ${w_i}.f;
- preshifted_f_prev[${pop_offset(i)}] = ${w_i}.f;
+ preshifted_f_next[${layout.pop_offset(i)}] = ${w_i}.f;
+ preshifted_f_prev[${layout.pop_offset(i)}] = ${w_i}.f;
% endfor
}
@@ -42,7 +25,7 @@ __kernel void collide_and_stream(__global ${float_type}* f_next,
__global int* material,
unsigned int time)
{
- const unsigned int gid = ${gid()};
+ const unsigned int gid = ${layout.gid()};
const int m = material[gid];
@@ -54,7 +37,7 @@ __kernel void collide_and_stream(__global ${float_type}* f_next,
__global ${float_type}* preshifted_f_prev = f_prev + gid;
% for i, c_i in enumerate(descriptor.c):
- const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}];
+ const ${float_type} f_curr_${i} = preshifted_f_prev[${layout.pop_offset(i) + layout.neighbor_offset(-c_i)}];
% endfor
% for i, expr in enumerate(moments_subexpr):
@@ -76,19 +59,19 @@ __kernel void collide_and_stream(__global ${float_type}* f_next,
% endfor
% for i in range(0,descriptor.q):
- preshifted_f_next[${pop_offset(i)}] = f_next_${i};
+ preshifted_f_next[${layout.pop_offset(i)}] = f_next_${i};
% endfor
}
__kernel void collect_moments(__global ${float_type}* f,
__global ${float_type}* moments)
{
- const unsigned int gid = ${gid()};
+ const unsigned int gid = ${layout.gid()};
__global ${float_type}* preshifted_f = f + gid;
% for i in range(0,descriptor.q):
- const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}];
+ const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(i)}];
% endfor
% for i, expr in enumerate(moments_subexpr):
@@ -96,6 +79,6 @@ __kernel void collect_moments(__global ${float_type}* f,
% endfor
% for i, expr in enumerate(moments_assignment):
- moments[${pop_offset(i)} + gid] = ${ccode(expr.rhs)};
+ moments[${layout.pop_offset(i)} + gid] = ${ccode(expr.rhs)};
% endfor
}
diff --git a/boltzgen/kernel/template/basic.cpp.mako b/boltzgen/kernel/template/basic.cpp.mako
index d969d60..529c1de 100644
--- a/boltzgen/kernel/template/basic.cpp.mako
+++ b/boltzgen/kernel/template/basic.cpp.mako
@@ -1,45 +1,13 @@
-<%
-def gid_offset():
- return {
- 'soa': 1,
- 'aos': descriptor.q
- }.get(layout);
-
-def pop_offset(i):
- return {
- 'soa': i * geometry.volume,
- 'aos': i
- }.get(layout);
-
-def neighbor_offset(c_i):
- return {
- 2: lambda: c_i[0]*geometry.size_y + c_i[1],
- 3: lambda: c_i[0]*geometry.size_y*geometry.size_z + c_i[1]*geometry.size_z + c_i[2]
- }.get(descriptor.d)() * {
- 'soa': 1,
- 'aos': descriptor.q
- }.get(layout);
-
-def padding():
- return {
- 2: lambda: 1*geometry.size_y + 1,
- 3: lambda: 1*geometry.size_y*geometry.size_z + 1*geometry.size_z + 1
- }.get(descriptor.d)() * {
- 'soa': 1,
- 'aos': descriptor.q
- }.get(layout);
-%>
-
void equilibrilize(${float_type}* f_next,
${float_type}* f_prev,
std::size_t gid)
{
- ${float_type}* preshifted_f_next = f_next + gid*${gid_offset()};
- ${float_type}* preshifted_f_prev = f_prev + gid*${gid_offset()};
+ ${float_type}* preshifted_f_next = f_next + gid*${layout.gid_offset()};
+ ${float_type}* preshifted_f_prev = f_prev + gid*${layout.gid_offset()};
% for i, w_i in enumerate(descriptor.w):
- preshifted_f_next[${pop_offset(i)}] = ${w_i.evalf()};
- preshifted_f_prev[${pop_offset(i)}] = ${w_i.evalf()};
+ preshifted_f_next[${layout.pop_offset(i)}] = ${w_i.evalf()};
+ preshifted_f_prev[${layout.pop_offset(i)}] = ${w_i.evalf()};
% endfor
}
@@ -47,11 +15,11 @@ void collide_and_stream( ${float_type}* f_next,
const ${float_type}* f_prev,
std::size_t gid)
{
- ${float_type}* preshifted_f_next = f_next + gid*${gid_offset()};
- const ${float_type}* preshifted_f_prev = f_prev + gid*${gid_offset()};
+ ${float_type}* preshifted_f_next = f_next + gid*${layout.gid_offset()};
+ const ${float_type}* preshifted_f_prev = f_prev + gid*${layout.gid_offset()};
% for i, c_i in enumerate(descriptor.c):
- const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}];
+ const ${float_type} f_curr_${i} = preshifted_f_prev[${layout.pop_offset(i) + layout.neighbor_offset(-c_i)}];
% endfor
% for i, expr in enumerate(moments_subexpr):
@@ -71,7 +39,7 @@ void collide_and_stream( ${float_type}* f_next,
% endfor
% for i, expr in enumerate(collision_assignment):
- preshifted_f_next[${pop_offset(i)}] = f_next_${i};
+ preshifted_f_next[${layout.pop_offset(i)}] = f_next_${i};
% endfor
}
@@ -80,10 +48,10 @@ void collect_moments(const ${float_type}* f,
${float_type}& rho,
${float_type} u[${descriptor.d}])
{
- const ${float_type}* preshifted_f = f + gid*${gid_offset()};
+ const ${float_type}* preshifted_f = f + gid*${layout.gid_offset()};
% for i in range(0,descriptor.q):
- const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}];
+ const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(i)}];
% endfor
% for i, expr in enumerate(moments_subexpr):
@@ -101,18 +69,19 @@ void collect_moments(const ${float_type}* f,
void test(std::size_t nStep)
{
- auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()});
- auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()});
+ auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*layout.padding()});
+ auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*layout.padding()});
auto material = std::make_unique<int[]>(${geometry.volume});
// buffers are padded by maximum neighbor overreach to prevent invalid memory access
- ${float_type}* f_prev = f_a.get() + ${padding()};
- ${float_type}* f_next = f_b.get() + ${padding()};
+ ${float_type}* f_prev = f_a.get() + ${layout.padding()};
+ ${float_type}* f_next = f_b.get() + ${layout.padding()};
for (int iX = 0; iX < ${geometry.size_x}; ++iX) {
for (int iY = 0; iY < ${geometry.size_y}; ++iY) {
for (int iZ = 0; iZ < ${geometry.size_z}; ++iZ) {
- if (iX == 0 || iY == 0 || iZ == 0 || iX == ${geometry.size_x-1} || iY == ${geometry.size_y-1} || iZ == ${geometry.size_z-1}) {
+ if (iX == 0 || iY == 0 || iZ == 0 ||
+ iX == ${geometry.size_x-1} || iY == ${geometry.size_y-1} || iZ == ${geometry.size_z-1}) {
material[iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ] = 0;
} else {
material[iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ] = 1;