aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--boltzgen.py5
-rw-r--r--boltzgen/kernel/generator.py3
-rw-r--r--boltzgen/kernel/template/basic.cpp.mako116
3 files changed, 105 insertions, 19 deletions
diff --git a/boltzgen.py b/boltzgen.py
index 27a34fa..14aa292 100644
--- a/boltzgen.py
+++ b/boltzgen.py
@@ -5,6 +5,7 @@ from boltzgen import *
argparser = argparse.ArgumentParser(description='Generate LBM kernels in various languages using a symbolic description.')
argparser.add_argument('language', help = 'Target language (currently either "opencl" or "cpp")')
+argparser.add_argument('layout', help = 'Memory layout ("aos" or "soa" for C++, ignored for OpenCL')
args = argparser.parse_args()
@@ -14,7 +15,7 @@ generator = Generator(
moments = lbm.moments(),
collision = lbm.bgk(f_eq = lbm.equilibrium(), tau = 0.6))
-geometry = Geometry(32,32)
+geometry = Geometry(1024,1024)
-src = generator.kernel(args.language, 'float', geometry)
+src = generator.kernel(args.language, 'double', args.layout, geometry)
print(src)
diff --git a/boltzgen/kernel/generator.py b/boltzgen/kernel/generator.py
index 42d0430..c9fdb27 100644
--- a/boltzgen/kernel/generator.py
+++ b/boltzgen/kernel/generator.py
@@ -11,7 +11,7 @@ class Generator:
self.collision = collision
self.boundary = boundary
- def kernel(self, target, precision, geometry):
+ def kernel(self, target, precision, layout, geometry):
template_path = Path(__file__).parent/('template/basic.' + target + '.mako')
if not template_path.exists():
raise Exception("Target '%s' not supported" % target)
@@ -19,6 +19,7 @@ class Generator:
return Template(filename = str(template_path)).render(
descriptor = self.descriptor,
geometry = geometry,
+ layout = layout,
moments_subexpr = self.moments[0],
moments_assignment = self.moments[1],
diff --git a/boltzgen/kernel/template/basic.cpp.mako b/boltzgen/kernel/template/basic.cpp.mako
index d284a1c..d969d60 100644
--- a/boltzgen/kernel/template/basic.cpp.mako
+++ b/boltzgen/kernel/template/basic.cpp.mako
@@ -1,26 +1,41 @@
<%
+def gid_offset():
+ return {
+ 'soa': 1,
+ 'aos': descriptor.q
+ }.get(layout);
+
def pop_offset(i):
- return i * geometry.volume
+ return {
+ 'soa': i * geometry.volume,
+ 'aos': i
+ }.get(layout);
def neighbor_offset(c_i):
return {
- 2: lambda: c_i[1]*geometry.size_x + c_i[0],
- 3: lambda: c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0]
- }.get(descriptor.d)()
+ 2: lambda: c_i[0]*geometry.size_y + c_i[1],
+ 3: lambda: c_i[0]*geometry.size_y*geometry.size_z + c_i[1]*geometry.size_z + c_i[2]
+ }.get(descriptor.d)() * {
+ 'soa': 1,
+ 'aos': descriptor.q
+ }.get(layout);
def padding():
return {
- 2: lambda: 1*geometry.size_x + 1,
- 3: lambda: 1*geometry.size_x*geometry.size_y + 1*geometry.size_x + 1
- }.get(descriptor.d)()
+ 2: lambda: 1*geometry.size_y + 1,
+ 3: lambda: 1*geometry.size_y*geometry.size_z + 1*geometry.size_z + 1
+ }.get(descriptor.d)() * {
+ 'soa': 1,
+ 'aos': descriptor.q
+ }.get(layout);
%>
void equilibrilize(${float_type}* f_next,
${float_type}* f_prev,
std::size_t gid)
{
- ${float_type}* preshifted_f_next = f_next + gid;
- ${float_type}* preshifted_f_prev = f_prev + gid;
+ ${float_type}* preshifted_f_next = f_next + gid*${gid_offset()};
+ ${float_type}* preshifted_f_prev = f_prev + gid*${gid_offset()};
% for i, w_i in enumerate(descriptor.w):
preshifted_f_next[${pop_offset(i)}] = ${w_i.evalf()};
@@ -30,13 +45,10 @@ void equilibrilize(${float_type}* f_next,
void collide_and_stream( ${float_type}* f_next,
const ${float_type}* f_prev,
- const int* material,
std::size_t gid)
{
- const int m = material[gid];
-
- ${float_type}* preshifted_f_next = f_next + gid;
- const ${float_type}* preshifted_f_prev = f_prev + gid;
+ ${float_type}* preshifted_f_next = f_next + gid*${gid_offset()};
+ const ${float_type}* preshifted_f_prev = f_prev + gid*${gid_offset()};
% for i, c_i in enumerate(descriptor.c):
const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}];
@@ -59,7 +71,7 @@ void collide_and_stream( ${float_type}* f_next,
% endfor
% for i, expr in enumerate(collision_assignment):
- preshifted_f_next[${pop_offset(i)}] = m*f_next_${i} + (1.0-m)*${descriptor.w[i].evalf()};
+ preshifted_f_next[${pop_offset(i)}] = f_next_${i};
% endfor
}
@@ -68,7 +80,7 @@ void collect_moments(const ${float_type}* f,
${float_type}& rho,
${float_type} u[${descriptor.d}])
{
- const ${float_type}* preshifted_f = f + gid;
+ const ${float_type}* preshifted_f = f + gid*${gid_offset()};
% for i in range(0,descriptor.q):
const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}];
@@ -86,3 +98,75 @@ void collect_moments(const ${float_type}* f,
% endif
% endfor
}
+
+void test(std::size_t nStep)
+{
+ auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()});
+ auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()});
+ auto material = std::make_unique<int[]>(${geometry.volume});
+
+ // buffers are padded by maximum neighbor overreach to prevent invalid memory access
+ ${float_type}* f_prev = f_a.get() + ${padding()};
+ ${float_type}* f_next = f_b.get() + ${padding()};
+
+ for (int iX = 0; iX < ${geometry.size_x}; ++iX) {
+ for (int iY = 0; iY < ${geometry.size_y}; ++iY) {
+ for (int iZ = 0; iZ < ${geometry.size_z}; ++iZ) {
+ if (iX == 0 || iY == 0 || iZ == 0 || iX == ${geometry.size_x-1} || iY == ${geometry.size_y-1} || iZ == ${geometry.size_z-1}) {
+ material[iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ] = 0;
+ } else {
+ material[iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ] = 1;
+ }
+ }
+ }
+ }
+
+ std::vector<std::size_t> bulk;
+ std::vector<std::size_t> bc;
+
+ for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) {
+ if (material[iCell] == 0) {
+ bc.emplace_back(iCell);
+ }
+ if (material[iCell] == 1) {
+ bulk.emplace_back(iCell);
+ }
+ }
+
+ for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) {
+ equilibrilize(f_prev, f_next, iCell);
+ }
+
+ const auto start = std::chrono::high_resolution_clock::now();
+
+ for (std::size_t iStep = 0; iStep < nStep; ++iStep) {
+ if (iStep % 2 == 0) {
+ f_next = f_a.get();
+ f_prev = f_b.get();
+ } else {
+ f_next = f_b.get();
+ f_prev = f_a.get();
+ }
+
+ for (std::size_t i = 0; i < bulk.size(); ++i) {
+ collide_and_stream(f_next, f_prev, bulk[i]);
+ }
+ for (std::size_t i = 0; i < bc.size(); ++i) {
+ equilibrilize(f_next, f_prev, bc[i]);
+ }
+ }
+
+ auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(
+ std::chrono::high_resolution_clock::now() - start);
+
+ std::cout << "MLUPS: " << nStep*${geometry.volume}/(1e6*duration.count()) << std::endl;
+
+ // calculate average rho as a basic quality check
+ double rho_sum = 0.0;
+
+ for (std::size_t i = 0; i < ${geometry.volume*descriptor.q}; ++i) {
+ rho_sum += f_next[i];
+ }
+
+ std::cout << "avg rho: " << rho_sum/${geometry.volume} << std::endl;
+}