From f1c5164b327054fc540527e973ff7843d4c01996 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Thu, 24 Oct 2019 14:33:59 +0200 Subject: Add test template for C++, enable switching between AOS and SOA --- boltzgen.py | 5 +- boltzgen/kernel/generator.py | 3 +- boltzgen/kernel/template/basic.cpp.mako | 116 +++++++++++++++++++++++++++----- 3 files changed, 105 insertions(+), 19 deletions(-) diff --git a/boltzgen.py b/boltzgen.py index 27a34fa..14aa292 100644 --- a/boltzgen.py +++ b/boltzgen.py @@ -5,6 +5,7 @@ from boltzgen import * argparser = argparse.ArgumentParser(description='Generate LBM kernels in various languages using a symbolic description.') argparser.add_argument('language', help = 'Target language (currently either "opencl" or "cpp")') +argparser.add_argument('layout', help = 'Memory layout ("aos" or "soa" for C++, ignored for OpenCL') args = argparser.parse_args() @@ -14,7 +15,7 @@ generator = Generator( moments = lbm.moments(), collision = lbm.bgk(f_eq = lbm.equilibrium(), tau = 0.6)) -geometry = Geometry(32,32) +geometry = Geometry(1024,1024) -src = generator.kernel(args.language, 'float', geometry) +src = generator.kernel(args.language, 'double', args.layout, geometry) print(src) diff --git a/boltzgen/kernel/generator.py b/boltzgen/kernel/generator.py index 42d0430..c9fdb27 100644 --- a/boltzgen/kernel/generator.py +++ b/boltzgen/kernel/generator.py @@ -11,7 +11,7 @@ class Generator: self.collision = collision self.boundary = boundary - def kernel(self, target, precision, geometry): + def kernel(self, target, precision, layout, geometry): template_path = Path(__file__).parent/('template/basic.' + target + '.mako') if not template_path.exists(): raise Exception("Target '%s' not supported" % target) @@ -19,6 +19,7 @@ class Generator: return Template(filename = str(template_path)).render( descriptor = self.descriptor, geometry = geometry, + layout = layout, moments_subexpr = self.moments[0], moments_assignment = self.moments[1], diff --git a/boltzgen/kernel/template/basic.cpp.mako b/boltzgen/kernel/template/basic.cpp.mako index d284a1c..d969d60 100644 --- a/boltzgen/kernel/template/basic.cpp.mako +++ b/boltzgen/kernel/template/basic.cpp.mako @@ -1,26 +1,41 @@ <% +def gid_offset(): + return { + 'soa': 1, + 'aos': descriptor.q + }.get(layout); + def pop_offset(i): - return i * geometry.volume + return { + 'soa': i * geometry.volume, + 'aos': i + }.get(layout); def neighbor_offset(c_i): return { - 2: lambda: c_i[1]*geometry.size_x + c_i[0], - 3: lambda: c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0] - }.get(descriptor.d)() + 2: lambda: c_i[0]*geometry.size_y + c_i[1], + 3: lambda: c_i[0]*geometry.size_y*geometry.size_z + c_i[1]*geometry.size_z + c_i[2] + }.get(descriptor.d)() * { + 'soa': 1, + 'aos': descriptor.q + }.get(layout); def padding(): return { - 2: lambda: 1*geometry.size_x + 1, - 3: lambda: 1*geometry.size_x*geometry.size_y + 1*geometry.size_x + 1 - }.get(descriptor.d)() + 2: lambda: 1*geometry.size_y + 1, + 3: lambda: 1*geometry.size_y*geometry.size_z + 1*geometry.size_z + 1 + }.get(descriptor.d)() * { + 'soa': 1, + 'aos': descriptor.q + }.get(layout); %> void equilibrilize(${float_type}* f_next, ${float_type}* f_prev, std::size_t gid) { - ${float_type}* preshifted_f_next = f_next + gid; - ${float_type}* preshifted_f_prev = f_prev + gid; + ${float_type}* preshifted_f_next = f_next + gid*${gid_offset()}; + ${float_type}* preshifted_f_prev = f_prev + gid*${gid_offset()}; % for i, w_i in enumerate(descriptor.w): preshifted_f_next[${pop_offset(i)}] = ${w_i.evalf()}; @@ -30,13 +45,10 @@ void equilibrilize(${float_type}* f_next, void collide_and_stream( ${float_type}* f_next, const ${float_type}* f_prev, - const int* material, std::size_t gid) { - const int m = material[gid]; - - ${float_type}* preshifted_f_next = f_next + gid; - const ${float_type}* preshifted_f_prev = f_prev + gid; + ${float_type}* preshifted_f_next = f_next + gid*${gid_offset()}; + const ${float_type}* preshifted_f_prev = f_prev + gid*${gid_offset()}; % for i, c_i in enumerate(descriptor.c): const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}]; @@ -59,7 +71,7 @@ void collide_and_stream( ${float_type}* f_next, % endfor % for i, expr in enumerate(collision_assignment): - preshifted_f_next[${pop_offset(i)}] = m*f_next_${i} + (1.0-m)*${descriptor.w[i].evalf()}; + preshifted_f_next[${pop_offset(i)}] = f_next_${i}; % endfor } @@ -68,7 +80,7 @@ void collect_moments(const ${float_type}* f, ${float_type}& rho, ${float_type} u[${descriptor.d}]) { - const ${float_type}* preshifted_f = f + gid; + const ${float_type}* preshifted_f = f + gid*${gid_offset()}; % for i in range(0,descriptor.q): const ${float_type} f_curr_${i} = preshifted_f[${pop_offset(i)}]; @@ -86,3 +98,75 @@ void collect_moments(const ${float_type}* f, % endif % endfor } + +void test(std::size_t nStep) +{ + auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()}); + auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()}); + auto material = std::make_unique(${geometry.volume}); + + // buffers are padded by maximum neighbor overreach to prevent invalid memory access + ${float_type}* f_prev = f_a.get() + ${padding()}; + ${float_type}* f_next = f_b.get() + ${padding()}; + + for (int iX = 0; iX < ${geometry.size_x}; ++iX) { + for (int iY = 0; iY < ${geometry.size_y}; ++iY) { + for (int iZ = 0; iZ < ${geometry.size_z}; ++iZ) { + if (iX == 0 || iY == 0 || iZ == 0 || iX == ${geometry.size_x-1} || iY == ${geometry.size_y-1} || iZ == ${geometry.size_z-1}) { + material[iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ] = 0; + } else { + material[iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ] = 1; + } + } + } + } + + std::vector bulk; + std::vector bc; + + for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) { + if (material[iCell] == 0) { + bc.emplace_back(iCell); + } + if (material[iCell] == 1) { + bulk.emplace_back(iCell); + } + } + + for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) { + equilibrilize(f_prev, f_next, iCell); + } + + const auto start = std::chrono::high_resolution_clock::now(); + + for (std::size_t iStep = 0; iStep < nStep; ++iStep) { + if (iStep % 2 == 0) { + f_next = f_a.get(); + f_prev = f_b.get(); + } else { + f_next = f_b.get(); + f_prev = f_a.get(); + } + + for (std::size_t i = 0; i < bulk.size(); ++i) { + collide_and_stream(f_next, f_prev, bulk[i]); + } + for (std::size_t i = 0; i < bc.size(); ++i) { + equilibrilize(f_next, f_prev, bc[i]); + } + } + + auto duration = std::chrono::duration_cast>( + std::chrono::high_resolution_clock::now() - start); + + std::cout << "MLUPS: " << nStep*${geometry.volume}/(1e6*duration.count()) << std::endl; + + // calculate average rho as a basic quality check + double rho_sum = 0.0; + + for (std::size_t i = 0; i < ${geometry.volume*descriptor.q}; ++i) { + rho_sum += f_next[i]; + } + + std::cout << "avg rho: " << rho_sum/${geometry.volume} << std::endl; +} -- cgit v1.2.3