From 4e16a6a9df4a6412c48f735fab6984a005bfd38a Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Thu, 25 Jul 2019 20:21:15 +0200 Subject: Fix handling of outer boundary cells As we can only use a multiplicative mask to distinguish between cell types and streaming memory offsets are statically resolved the buffers have to provide a well-defined padding in both directions. Otherwise undefined data is accessed which may distort results. --- standalone_cpp_codegen.py | 6 ++++-- template/standalone.mako | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/standalone_cpp_codegen.py b/standalone_cpp_codegen.py index 24a97af..e4dd0b7 100644 --- a/standalone_cpp_codegen.py +++ b/standalone_cpp_codegen.py @@ -12,7 +12,7 @@ lbm = LBM(D3Q19) moments = lbm.moments(optimize = True) collide = lbm.bgk(f_eq = lbm.equilibrium(), tau = 0.6, optimize = True) -geometry = Geometry(64, 64, 64) +geometry = Geometry(32, 32, 32) program_src = Template(filename = str(Path(__file__).parent/'template/standalone.mako')).render( descriptor = lbm.descriptor, @@ -26,7 +26,9 @@ program_src = Template(filename = str(Path(__file__).parent/'template/standalone collide_assignment = collide[1], float_type = 'double', - ccode = sympy.ccode + ccode = sympy.ccode, + + enable_omp_simd = True ) print(program_src) diff --git a/template/standalone.mako b/template/standalone.mako index 61aded3..39f0da0 100644 --- a/template/standalone.mako +++ b/template/standalone.mako @@ -17,8 +17,8 @@ void equilibrilize(${float_type}* f_next, ${float_type}* preshifted_f_prev = f_prev + gid; % for i, w_i in enumerate(descriptor.w): - preshifted_f_next[${pop_offset(i)}] = ${w_i}.f; - preshifted_f_prev[${pop_offset(i)}] = ${w_i}.f; + preshifted_f_next[${pop_offset(i)}] = ${w_i.evalf()}; + preshifted_f_prev[${pop_offset(i)}] = ${w_i.evalf()}; % endfor } @@ -28,6 +28,12 @@ def neighbor_offset(c_i): 2: lambda: c_i[1]*geometry.size_x + c_i[0], 3: lambda: c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0] }.get(descriptor.d)() + +def padding(): + return { + 2: lambda: 1*geometry.size_x + 1, + 3: lambda: 1*geometry.size_x*geometry.size_y + 1*geometry.size_x + 1 + }.get(descriptor.d)() %> void collide_and_stream( ${float_type}* f_next, @@ -37,10 +43,6 @@ void collide_and_stream( ${float_type}* f_next, { const int m = material[gid]; - if ( m == 0 ) { - return; - } - ${float_type}* preshifted_f_next = f_next + gid; const ${float_type}* preshifted_f_prev = f_prev + gid; @@ -61,18 +63,23 @@ void collide_and_stream( ${float_type}* f_next, % endfor % for i, expr in enumerate(collide_assignment): - preshifted_f_next[${pop_offset(i)}] = ${ccode(expr.rhs)}; + const ${float_type} ${ccode(expr)} +% endfor + +% for i, expr in enumerate(collide_assignment): + preshifted_f_next[${pop_offset(i)}] = m*f_next_${i} + (1.0-m)*${descriptor.w[i].evalf()}; % endfor } int main() { - auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q}); - auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q}); + auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()}); + auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()}); auto material = std::make_unique(${geometry.volume}); - ${float_type}* f_prev = f_a.get(); - ${float_type}* f_next = f_b.get(); + // buffers are padded by maximum neighbor overreach to prevent invalid memory access + ${float_type}* f_prev = f_a.get() + ${padding()}; + ${float_type}* f_next = f_b.get() + ${padding()}; for (int iX = 0; iX < ${geometry.size_x}; ++iX) { for (int iY = 0; iY < ${geometry.size_y}; ++iY) { @@ -101,6 +108,9 @@ int main() f_prev = f_a.get(); } +% if enable_omp_simd: +#pragma omp simd +% endif for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) { collide_and_stream(f_next, f_prev, material.get(), iCell); } @@ -111,5 +121,14 @@ int main() std::cout << "MLUPS: " << ${steps*geometry.volume}/(1e6*duration.count()) << std::endl; + // calculate average rho as a basic quality check + double rho_sum = 0.0; + + for (std::size_t i = 0; i < ${geometry.volume*descriptor.q}; ++i) { + rho_sum += f_next[i]; + } + + std::cout << "avg rho: " << rho_sum/${geometry.volume} << std::endl; + return 0; } -- cgit v1.2.3