aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Kummerlaender2019-07-25 20:21:15 +0200
committerAdrian Kummerlaender2019-07-25 20:21:15 +0200
commit4e16a6a9df4a6412c48f735fab6984a005bfd38a (patch)
tree9aafca48e4bd38c05d855c1d10c0ff524f89c7bf
parentf8206aae565894a5d4374600cebaa0fae8b8c19c (diff)
downloadsymlbm_playground-standalone.tar
symlbm_playground-standalone.tar.gz
symlbm_playground-standalone.tar.bz2
symlbm_playground-standalone.tar.lz
symlbm_playground-standalone.tar.xz
symlbm_playground-standalone.tar.zst
symlbm_playground-standalone.zip
Fix handling of outer boundary cellsstandalone
As we can only use a multiplicative mask to distinguish between cell types and streaming memory offsets are statically resolved the buffers have to provide a well-defined padding in both directions. Otherwise undefined data is accessed which may distort results.
-rw-r--r--standalone_cpp_codegen.py6
-rw-r--r--template/standalone.mako41
2 files changed, 34 insertions, 13 deletions
diff --git a/standalone_cpp_codegen.py b/standalone_cpp_codegen.py
index 24a97af..e4dd0b7 100644
--- a/standalone_cpp_codegen.py
+++ b/standalone_cpp_codegen.py
@@ -12,7 +12,7 @@ lbm = LBM(D3Q19)
moments = lbm.moments(optimize = True)
collide = lbm.bgk(f_eq = lbm.equilibrium(), tau = 0.6, optimize = True)
-geometry = Geometry(64, 64, 64)
+geometry = Geometry(32, 32, 32)
program_src = Template(filename = str(Path(__file__).parent/'template/standalone.mako')).render(
descriptor = lbm.descriptor,
@@ -26,7 +26,9 @@ program_src = Template(filename = str(Path(__file__).parent/'template/standalone
collide_assignment = collide[1],
float_type = 'double',
- ccode = sympy.ccode
+ ccode = sympy.ccode,
+
+ enable_omp_simd = True
)
print(program_src)
diff --git a/template/standalone.mako b/template/standalone.mako
index 61aded3..39f0da0 100644
--- a/template/standalone.mako
+++ b/template/standalone.mako
@@ -17,8 +17,8 @@ void equilibrilize(${float_type}* f_next,
${float_type}* preshifted_f_prev = f_prev + gid;
% for i, w_i in enumerate(descriptor.w):
- preshifted_f_next[${pop_offset(i)}] = ${w_i}.f;
- preshifted_f_prev[${pop_offset(i)}] = ${w_i}.f;
+ preshifted_f_next[${pop_offset(i)}] = ${w_i.evalf()};
+ preshifted_f_prev[${pop_offset(i)}] = ${w_i.evalf()};
% endfor
}
@@ -28,6 +28,12 @@ def neighbor_offset(c_i):
2: lambda: c_i[1]*geometry.size_x + c_i[0],
3: lambda: c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0]
}.get(descriptor.d)()
+
+def padding():
+ return {
+ 2: lambda: 1*geometry.size_x + 1,
+ 3: lambda: 1*geometry.size_x*geometry.size_y + 1*geometry.size_x + 1
+ }.get(descriptor.d)()
%>
void collide_and_stream( ${float_type}* f_next,
@@ -37,10 +43,6 @@ void collide_and_stream( ${float_type}* f_next,
{
const int m = material[gid];
- if ( m == 0 ) {
- return;
- }
-
${float_type}* preshifted_f_next = f_next + gid;
const ${float_type}* preshifted_f_prev = f_prev + gid;
@@ -61,18 +63,23 @@ void collide_and_stream( ${float_type}* f_next,
% endfor
% for i, expr in enumerate(collide_assignment):
- preshifted_f_next[${pop_offset(i)}] = ${ccode(expr.rhs)};
+ const ${float_type} ${ccode(expr)}
+% endfor
+
+% for i, expr in enumerate(collide_assignment):
+ preshifted_f_next[${pop_offset(i)}] = m*f_next_${i} + (1.0-m)*${descriptor.w[i].evalf()};
% endfor
}
int main()
{
- auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q});
- auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q});
+ auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()});
+ auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()});
auto material = std::make_unique<int[]>(${geometry.volume});
- ${float_type}* f_prev = f_a.get();
- ${float_type}* f_next = f_b.get();
+ // buffers are padded by maximum neighbor overreach to prevent invalid memory access
+ ${float_type}* f_prev = f_a.get() + ${padding()};
+ ${float_type}* f_next = f_b.get() + ${padding()};
for (int iX = 0; iX < ${geometry.size_x}; ++iX) {
for (int iY = 0; iY < ${geometry.size_y}; ++iY) {
@@ -101,6 +108,9 @@ int main()
f_prev = f_a.get();
}
+% if enable_omp_simd:
+#pragma omp simd
+% endif
for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) {
collide_and_stream(f_next, f_prev, material.get(), iCell);
}
@@ -111,5 +121,14 @@ int main()
std::cout << "MLUPS: " << ${steps*geometry.volume}/(1e6*duration.count()) << std::endl;
+ // calculate average rho as a basic quality check
+ double rho_sum = 0.0;
+
+ for (std::size_t i = 0; i < ${geometry.volume*descriptor.q}; ++i) {
+ rho_sum += f_next[i];
+ }
+
+ std::cout << "avg rho: " << rho_sum/${geometry.volume} << std::endl;
+
return 0;
}