diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/LLBM/bounce_back.h | 30 | ||||
-rw-r--r-- | src/LLBM/bounce_back_moving_wall.h | 30 | ||||
-rw-r--r-- | src/LLBM/collect_moments.h | 25 | ||||
-rw-r--r-- | src/LLBM/collide.h | 89 | ||||
-rw-r--r-- | src/LLBM/equilibrium_density_wall.h | 144 | ||||
-rw-r--r-- | src/LLBM/equilibrium_velocity_wall.h | 144 | ||||
-rw-r--r-- | src/LLBM/initialize.h | 53 | ||||
-rw-r--r-- | src/LLBM/smagorinsky_collide.h | 129 | ||||
-rw-r--r-- | src/LLBM/wall.h | 4 | ||||
-rw-r--r-- | src/concepts.h | 22 | ||||
-rw-r--r-- | src/cuboid.h | 37 | ||||
-rw-r--r-- | src/export.h | 59 | ||||
-rw-r--r-- | src/lattice.h | 267 | ||||
-rw-r--r-- | src/operator.h | 41 | ||||
-rw-r--r-- | src/pattern/all.h | 5 | ||||
-rw-r--r-- | src/pattern/none.h | 48 | ||||
-rw-r--r-- | src/pattern/ps.h | 123 | ||||
-rw-r--r-- | src/pattern/sss.h | 63 | ||||
-rw-r--r-- | src/population.h | 41 | ||||
-rw-r--r-- | src/propagation.h | 28 | ||||
-rw-r--r-- | src/simd/256.h | 379 | ||||
-rw-r--r-- | src/simd/512.h | 339 |
22 files changed, 2100 insertions, 0 deletions
diff --git a/src/LLBM/bounce_back.h b/src/LLBM/bounce_back.h new file mode 100644 index 0000000..b710ee4 --- /dev/null +++ b/src/LLBM/bounce_back.h @@ -0,0 +1,30 @@ +#pragma once + +#include "concepts.h" + +struct BounceBackO { + +template <concepts::Arithmetic V> +static void apply(V f_curr[19], V f_next[19]) { + f_next[0] = f_curr[18]; + f_next[1] = f_curr[17]; + f_next[2] = f_curr[16]; + f_next[3] = f_curr[15]; + f_next[4] = f_curr[14]; + f_next[5] = f_curr[13]; + f_next[6] = f_curr[12]; + f_next[7] = f_curr[11]; + f_next[8] = f_curr[10]; + f_next[9] = f_curr[9]; + f_next[10] = f_curr[8]; + f_next[11] = f_curr[7]; + f_next[12] = f_curr[6]; + f_next[13] = f_curr[5]; + f_next[14] = f_curr[4]; + f_next[15] = f_curr[3]; + f_next[16] = f_curr[2]; + f_next[17] = f_curr[1]; + f_next[18] = f_curr[0]; +} + +}; diff --git a/src/LLBM/bounce_back_moving_wall.h b/src/LLBM/bounce_back_moving_wall.h new file mode 100644 index 0000000..c00a17e --- /dev/null +++ b/src/LLBM/bounce_back_moving_wall.h @@ -0,0 +1,30 @@ +#pragma once + +#include "concepts.h" + +struct BounceBackMovingWallO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T u[3]) { + f_next[0] = f_curr[18] + T{0.166666666666667}*u[1] + T{0.166666666666667}*u[2]; + f_next[1] = f_curr[17] - T{0.166666666666667}*u[0] + T{0.166666666666667}*u[2]; + f_next[2] = f_curr[16] + T{0.333333333333333}*u[2]; + f_next[3] = f_curr[15] + T{0.166666666666667}*u[0] + T{0.166666666666667}*u[2]; + f_next[4] = f_curr[14] - T{0.166666666666667}*u[1] + T{0.166666666666667}*u[2]; + f_next[5] = f_curr[13] - T{0.166666666666667}*u[0] + T{0.166666666666667}*u[1]; + f_next[6] = f_curr[12] + T{0.333333333333333}*u[1]; + f_next[7] = f_curr[11] + T{0.166666666666667}*u[0] + T{0.166666666666667}*u[1]; + f_next[8] = f_curr[10] - T{0.333333333333333}*u[0]; + f_next[9] = f_curr[9]; + f_next[10] = f_curr[8] + T{0.333333333333333}*u[0]; + f_next[11] = f_curr[7] - T{0.166666666666667}*u[0] - T{0.166666666666667}*u[1]; + f_next[12] = f_curr[6] - T{0.333333333333333}*u[1]; + f_next[13] = f_curr[5] + T{0.166666666666667}*u[0] - T{0.166666666666667}*u[1]; + f_next[14] = f_curr[4] + T{0.166666666666667}*u[1] - T{0.166666666666667}*u[2]; + f_next[15] = f_curr[3] - T{0.166666666666667}*u[0] - T{0.166666666666667}*u[2]; + f_next[16] = f_curr[2] - T{0.333333333333333}*u[2]; + f_next[17] = f_curr[1] + T{0.166666666666667}*u[0] - T{0.166666666666667}*u[2]; + f_next[18] = f_curr[0] - T{0.166666666666667}*u[1] - T{0.166666666666667}*u[2]; +} + +}; diff --git a/src/LLBM/collect_moments.h b/src/LLBM/collect_moments.h new file mode 100644 index 0000000..afd5d59 --- /dev/null +++ b/src/LLBM/collect_moments.h @@ -0,0 +1,25 @@ +#pragma once + +#include "concepts.h" + +struct CollectMomentsF { + +static constexpr unsigned size = 4; + +template <concepts::Arithmetic V> +static void observe(V f_curr[19], V* momenta) { + V m0 = f_curr[10] + f_curr[13] + f_curr[17]; + V m1 = f_curr[14] + f_curr[5] + f_curr[6]; + V m2 = f_curr[1] + f_curr[2] + f_curr[4]; + V m3 = m0 + m1 + m2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9]; + V m4 = -f_curr[11] + f_curr[7]; + V m5 = -f_curr[15] + f_curr[3]; + V m6 = V{1}/m3; + V m7 = f_curr[0] - f_curr[18]; + momenta[0] = m3; + momenta[1] = m6*(m0 + m4 + m5 - f_curr[1] - f_curr[5] - f_curr[8]); + momenta[2] = m6*(m1 + m4 + m7 - f_curr[12] - f_curr[13] - f_curr[4]); + momenta[3] = m6*(m2 + m5 + m7 - f_curr[14] - f_curr[16] - f_curr[17]); +} + +}; diff --git a/src/LLBM/collide.h b/src/LLBM/collide.h new file mode 100644 index 0000000..c08aade --- /dev/null +++ b/src/LLBM/collide.h @@ -0,0 +1,89 @@ +#pragma once + +#include "concepts.h" + +struct BgkCollideO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T tau) { + V rho; + V u[3]; + V m0 = f_curr[10] + f_curr[13] + f_curr[17]; + V m1 = f_curr[14] + f_curr[5] + f_curr[6]; + V m2 = f_curr[1] + f_curr[2] + f_curr[4]; + V m3 = m0 + m1 + m2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9]; + V m4 = -f_curr[11] + f_curr[7]; + V m5 = -f_curr[15] + f_curr[3]; + V m6 = T{1}/m3; + V m7 = f_curr[0] - f_curr[18]; + rho = m3; + u[0] = m6*(m0 + m4 + m5 - f_curr[1] - f_curr[5] - f_curr[8]); + u[1] = m6*(m1 + m4 + m7 - f_curr[12] - f_curr[13] - f_curr[4]); + u[2] = m6*(m2 + m5 + m7 - f_curr[14] - f_curr[16] - f_curr[17]); + V x0 = T{1}/tau; + V x1 = T{0.0138888888888889}*x0; + V x2 = u[1] + u[2]; + V x3 = T{9.00000000000000}*(x2*x2); + V x4 = T{6.00000000000000}*u[2]; + V x5 = u[1]*u[1]; + V x6 = T{3.00000000000000}*x5; + V x7 = -x6; + V x8 = u[0]*u[0]; + V x9 = T{3.00000000000000}*x8; + V x10 = T{2.00000000000000} - x9; + V x11 = x10 + x7; + V x12 = x11 + x4; + V x13 = u[2]*u[2]; + V x14 = T{3.00000000000000}*x13; + V x15 = -x14; + V x16 = T{6.00000000000000}*u[1]; + V x17 = x15 + x16; + V x18 = T{6.00000000000000}*u[0]; + V x19 = -u[0]; + V x20 = x19 + u[2]; + V x21 = x14 + x6 + T{-2.00000000000000}; + V x22 = x21 + x9; + V x23 = x22 - x4; + V x24 = T{0.0277777777777778}*x0; + V x25 = T{6.00000000000000}*x13; + V x26 = u[0] + u[2]; + V x27 = T{9.00000000000000}*(x26*x26); + V x28 = x15 + x18; + V x29 = -u[1]; + V x30 = x29 + u[2]; + V x31 = x19 + u[1]; + V x32 = -x16; + V x33 = x18 + x22; + V x34 = T{6.00000000000000}*x5; + V x35 = u[0] + u[1]; + V x36 = T{9.00000000000000}*(x35*x35); + V x37 = T{6.00000000000000}*x8; + V x38 = x9 + T{-2.00000000000000}; + V x39 = x29 + u[0]; + V x40 = -x18 + x22; + V x41 = -u[2]; + V x42 = x41 + u[1]; + V x43 = x22 + x4; + V x44 = x41 + u[0]; + f_next[0] = x1*(rho*(x12 + x17 + x3) - T{72.0000000000000}*f_curr[0]) + f_curr[0]; + f_next[1] = -x1*(rho*(x18 + x23 - T{9.00000000000000}*x20*x20) + T{72.0000000000000}*f_curr[1]) + f_curr[1]; + f_next[2] = x24*(rho*(x12 + x25) - T{36.0000000000000}*f_curr[2]) + f_curr[2]; + f_next[3] = x1*(rho*(x12 + x27 + x28) - T{72.0000000000000}*f_curr[3]) + f_curr[3]; + f_next[4] = -x1*(rho*(x16 + x23 - T{9.00000000000000}*x30*x30) + T{72.0000000000000}*f_curr[4]) + f_curr[4]; + f_next[5] = -x1*(rho*(x32 + x33 - T{9.00000000000000}*x31*x31) + T{72.0000000000000}*f_curr[5]) + f_curr[5]; + f_next[6] = x24*(rho*(x10 + x17 + x34) - T{36.0000000000000}*f_curr[6]) + f_curr[6]; + f_next[7] = x1*(rho*(x11 + x17 + x18 + x36) - T{72.0000000000000}*f_curr[7]) + f_curr[7]; + f_next[8] = -x24*(rho*(x18 + x21 - x37) + T{36.0000000000000}*f_curr[8]) + f_curr[8]; + f_next[9] = -T{0.166666666666667}*x0*(rho*x22 + T{6.00000000000000}*f_curr[9]) + f_curr[9]; + f_next[10] = x24*(rho*(x28 + x37 + x7 + T{2.00000000000000}) - T{36.0000000000000}*f_curr[10]) + f_curr[10]; + f_next[11] = -x1*(rho*(x16 + x33 - x36) + T{72.0000000000000}*f_curr[11]) + f_curr[11]; + f_next[12] = -x24*(rho*(x14 + x16 - x34 + x38) + T{36.0000000000000}*f_curr[12]) + f_curr[12]; + f_next[13] = -x1*(rho*(x16 + x40 - T{9.00000000000000}*x39*x39) + T{72.0000000000000}*f_curr[13]) + f_curr[13]; + f_next[14] = -x1*(rho*(x32 + x43 - T{9.00000000000000}*x42*x42) + T{72.0000000000000}*f_curr[14]) + f_curr[14]; + f_next[15] = -x1*(rho*(-x27 + x33 + x4) + T{72.0000000000000}*f_curr[15]) + f_curr[15]; + f_next[16] = -x24*(rho*(-x25 + x38 + x4 + x6) + T{36.0000000000000}*f_curr[16]) + f_curr[16]; + f_next[17] = -x1*(rho*(x4 + x40 - T{9.00000000000000}*x44*x44) + T{72.0000000000000}*f_curr[17]) + f_curr[17]; + f_next[18] = -x1*(rho*(x16 - x3 + x43) + T{72.0000000000000}*f_curr[18]) + f_curr[18]; +} + +}; diff --git a/src/LLBM/equilibrium_density_wall.h b/src/LLBM/equilibrium_density_wall.h new file mode 100644 index 0000000..598d59d --- /dev/null +++ b/src/LLBM/equilibrium_density_wall.h @@ -0,0 +1,144 @@ +#pragma once + +#include "wall.h" +#include "concepts.h" + +struct EquilibriumDensityWallO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T rho_w, WallNormal<1,0,0>) { + V u_w = (rho_w - f_curr[0] - T{2.00000000000000}*f_curr[11] - f_curr[12] - f_curr[14] - T{2.00000000000000}*f_curr[15] - f_curr[16] - f_curr[18] - T{2.00000000000000}*f_curr[1] - f_curr[2] - f_curr[4] - T{2.00000000000000}*f_curr[5] - f_curr[6] - T{2.00000000000000}*f_curr[8] - f_curr[9])/rho_w; + V rho = rho_w; + V u[3] { u_w, 0., 0. }; + V e0 = T{0.0277777777777778}*rho; + V e1 = u[1] + u[2]; + V e2 = T{4.50000000000000}*(e1*e1); + V e3 = T{3.00000000000000}*u[2]; + V e4 = u[1]*u[1]; + V e5 = T{1.50000000000000}*e4; + V e6 = -e5; + V e7 = u[0]*u[0]; + V e8 = T{1.50000000000000}*e7; + V e9 = T{1.00000000000000} - e8; + V e10 = e6 + e9; + V e11 = e10 + e3; + V e12 = T{3.00000000000000}*u[1]; + V e13 = u[2]*u[2]; + V e14 = T{1.50000000000000}*e13; + V e15 = -e14; + V e16 = e12 + e15; + V e17 = T{3.00000000000000}*u[0]; + V e18 = -u[2]; + V e19 = e18 + u[0]; + V e20 = -T{4.50000000000000}*e19*e19; + V e21 = e14 + e5 + T{-1.00000000000000}; + V e22 = e21 + e8; + V e23 = e22 - e3; + V e24 = T{0.0555555555555556}*rho; + V e25 = T{3.00000000000000}*e13; + V e26 = u[0] + u[2]; + V e27 = T{4.50000000000000}*(e26*e26); + V e28 = e15 + e17; + V e29 = e18 + u[1]; + V e30 = -T{4.50000000000000}*e29*e29; + V e31 = -e12; + V e32 = u[0] - u[1]; + V e33 = -T{4.50000000000000}*e32*e32; + V e34 = e17 + e22; + V e35 = T{3.00000000000000}*e4; + V e36 = u[0] + u[1]; + V e37 = T{4.50000000000000}*(e36*e36); + V e38 = T{3.00000000000000}*e7; + V e39 = e8 + T{-1.00000000000000}; + V e40 = -e17 + e22; + V e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T rho_w, WallNormal<-1,0,0>) { + V u_w = (-rho_w + f_curr[0] + T{2.00000000000000}*f_curr[10] + f_curr[12] + T{2.00000000000000}*f_curr[13] + f_curr[14] + f_curr[16] + T{2.00000000000000}*f_curr[17] + f_curr[18] + f_curr[2] + T{2.00000000000000}*f_curr[3] + f_curr[4] + f_curr[6] + T{2.00000000000000}*f_curr[7] + f_curr[9])/rho_w; + V rho = rho_w; + V u[3] { u_w, 0., 0. }; + V e0 = T{0.0277777777777778}*rho; + V e1 = u[1] + u[2]; + V e2 = T{4.50000000000000}*(e1*e1); + V e3 = T{3.00000000000000}*u[2]; + V e4 = u[1]*u[1]; + V e5 = T{1.50000000000000}*e4; + V e6 = -e5; + V e7 = u[0]*u[0]; + V e8 = T{1.50000000000000}*e7; + V e9 = T{1.00000000000000} - e8; + V e10 = e6 + e9; + V e11 = e10 + e3; + V e12 = T{3.00000000000000}*u[1]; + V e13 = u[2]*u[2]; + V e14 = T{1.50000000000000}*e13; + V e15 = -e14; + V e16 = e12 + e15; + V e17 = T{3.00000000000000}*u[0]; + V e18 = -u[2]; + V e19 = e18 + u[0]; + V e20 = -T{4.50000000000000}*e19*e19; + V e21 = e14 + e5 + T{-1.00000000000000}; + V e22 = e21 + e8; + V e23 = e22 - e3; + V e24 = T{0.0555555555555556}*rho; + V e25 = T{3.00000000000000}*e13; + V e26 = u[0] + u[2]; + V e27 = T{4.50000000000000}*(e26*e26); + V e28 = e15 + e17; + V e29 = e18 + u[1]; + V e30 = -T{4.50000000000000}*e29*e29; + V e31 = -e12; + V e32 = u[0] - u[1]; + V e33 = -T{4.50000000000000}*e32*e32; + V e34 = e17 + e22; + V e35 = T{3.00000000000000}*e4; + V e36 = u[0] + u[1]; + V e37 = T{4.50000000000000}*(e36*e36); + V e38 = T{3.00000000000000}*e7; + V e39 = e8 + T{-1.00000000000000}; + V e40 = -e17 + e22; + V e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +}; diff --git a/src/LLBM/equilibrium_velocity_wall.h b/src/LLBM/equilibrium_velocity_wall.h new file mode 100644 index 0000000..2d68e34 --- /dev/null +++ b/src/LLBM/equilibrium_velocity_wall.h @@ -0,0 +1,144 @@ +#pragma once + +#include "wall.h" +#include "concepts.h" + +struct EquilibriumVelocityWallO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T u_w, WallNormal<1,0,0>) { + V rho; + rho = -(f_curr[0] + T{2.00000000000000}*f_curr[11] + f_curr[12] + f_curr[14] + T{2.00000000000000}*f_curr[15] + f_curr[16] + f_curr[18] + T{2.00000000000000}*f_curr[1] + f_curr[2] + f_curr[4] + T{2.00000000000000}*f_curr[5] + f_curr[6] + T{2.00000000000000}*f_curr[8] + f_curr[9])/(u_w + T{-1.00000000000000}); + V u[3] { u_w, 0., 0. }; + V e0 = T{0.0277777777777778}*rho; + V e1 = u[1] + u[2]; + V e2 = T{4.50000000000000}*(e1*e1); + V e3 = T{3.00000000000000}*u[2]; + V e4 = u[1]*u[1]; + V e5 = T{1.50000000000000}*e4; + V e6 = -e5; + V e7 = u[0]*u[0]; + V e8 = T{1.50000000000000}*e7; + V e9 = T{1.00000000000000} - e8; + V e10 = e6 + e9; + V e11 = e10 + e3; + V e12 = T{3.00000000000000}*u[1]; + V e13 = u[2]*u[2]; + V e14 = T{1.50000000000000}*e13; + V e15 = -e14; + V e16 = e12 + e15; + V e17 = T{3.00000000000000}*u[0]; + V e18 = -u[2]; + V e19 = e18 + u[0]; + V e20 = -T{4.50000000000000}*e19*e19; + V e21 = e14 + e5 + T{-1.00000000000000}; + V e22 = e21 + e8; + V e23 = e22 - e3; + V e24 = T{0.0555555555555556}*rho; + V e25 = T{3.00000000000000}*e13; + V e26 = u[0] + u[2]; + V e27 = T{4.50000000000000}*(e26*e26); + V e28 = e15 + e17; + V e29 = e18 + u[1]; + V e30 = -T{4.50000000000000}*e29*e29; + V e31 = -e12; + V e32 = u[0] - u[1]; + V e33 = -T{4.50000000000000}*e32*e32; + V e34 = e17 + e22; + V e35 = T{3.00000000000000}*e4; + V e36 = u[0] + u[1]; + V e37 = T{4.50000000000000}*(e36*e36); + V e38 = T{3.00000000000000}*e7; + V e39 = e8 + T{-1.00000000000000}; + V e40 = -e17 + e22; + V e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T u_w, WallNormal<-1,0,0>) { + V rho; + rho = (f_curr[0] + T{2.00000000000000}*f_curr[10] + f_curr[12] + T{2.00000000000000}*f_curr[13] + f_curr[14] + f_curr[16] + T{2.00000000000000}*f_curr[17] + f_curr[18] + f_curr[2] + T{2.00000000000000}*f_curr[3] + f_curr[4] + f_curr[6] + T{2.00000000000000}*f_curr[7] + f_curr[9])/(u_w + T{1.00000000000000}); + V u[3] { u_w, 0., 0. }; + V e0 = T{0.0277777777777778}*rho; + V e1 = u[1] + u[2]; + V e2 = T{4.50000000000000}*(e1*e1); + V e3 = T{3.00000000000000}*u[2]; + V e4 = u[1]*u[1]; + V e5 = T{1.50000000000000}*e4; + V e6 = -e5; + V e7 = u[0]*u[0]; + V e8 = T{1.50000000000000}*e7; + V e9 = T{1.00000000000000} - e8; + V e10 = e6 + e9; + V e11 = e10 + e3; + V e12 = T{3.00000000000000}*u[1]; + V e13 = u[2]*u[2]; + V e14 = T{1.50000000000000}*e13; + V e15 = -e14; + V e16 = e12 + e15; + V e17 = T{3.00000000000000}*u[0]; + V e18 = -u[2]; + V e19 = e18 + u[0]; + V e20 = -T{4.50000000000000}*e19*e19; + V e21 = e14 + e5 + T{-1.00000000000000}; + V e22 = e21 + e8; + V e23 = e22 - e3; + V e24 = T{0.0555555555555556}*rho; + V e25 = T{3.00000000000000}*e13; + V e26 = u[0] + u[2]; + V e27 = T{4.50000000000000}*(e26*e26); + V e28 = e15 + e17; + V e29 = e18 + u[1]; + V e30 = -T{4.50000000000000}*e29*e29; + V e31 = -e12; + V e32 = u[0] - u[1]; + V e33 = -T{4.50000000000000}*e32*e32; + V e34 = e17 + e22; + V e35 = T{3.00000000000000}*e4; + V e36 = u[0] + u[1]; + V e37 = T{4.50000000000000}*(e36*e36); + V e38 = T{3.00000000000000}*e7; + V e39 = e8 + T{-1.00000000000000}; + V e40 = -e17 + e22; + V e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +}; diff --git a/src/LLBM/initialize.h b/src/LLBM/initialize.h new file mode 100644 index 0000000..b361217 --- /dev/null +++ b/src/LLBM/initialize.h @@ -0,0 +1,53 @@ +#pragma once + +#include "concepts.h" + +struct InitializeO { + +template <concepts::Arithmetic V, concepts::Arithmetic T = V> +static void apply(V f_curr[19], V f_next[19]) { + f_next[0] = T{0.0277777777777778}; + f_next[1] = T{0.0277777777777778}; + f_next[2] = T{0.0555555555555556}; + f_next[3] = T{0.0277777777777778}; + f_next[4] = T{0.0277777777777778}; + f_next[5] = T{0.0277777777777778}; + f_next[6] = T{0.0555555555555556}; + f_next[7] = T{0.0277777777777778}; + f_next[8] = T{0.0555555555555556}; + f_next[9] = T{0.333333333333333}; + f_next[10] = T{0.0555555555555556}; + f_next[11] = T{0.0277777777777778}; + f_next[12] = T{0.0555555555555556}; + f_next[13] = T{0.0277777777777778}; + f_next[14] = T{0.0277777777777778}; + f_next[15] = T{0.0277777777777778}; + f_next[16] = T{0.0555555555555556}; + f_next[17] = T{0.0277777777777778}; + f_next[18] = T{0.0277777777777778}; +} + +template <concepts::Arithmetic V, concepts::Arithmetic T = V> +static void apply(V f_curr[19], V f_next[19], T scale) { + f_next[0] = T{scale * 0.0277777777777778}; + f_next[1] = T{scale * 0.0277777777777778}; + f_next[2] = T{scale * 0.0555555555555556}; + f_next[3] = T{scale * 0.0277777777777778}; + f_next[4] = T{scale * 0.0277777777777778}; + f_next[5] = T{scale * 0.0277777777777778}; + f_next[6] = T{scale * 0.0555555555555556}; + f_next[7] = T{scale * 0.0277777777777778}; + f_next[8] = T{scale * 0.0555555555555556}; + f_next[9] = T{scale * 0.333333333333333}; + f_next[10] = T{scale * 0.0555555555555556}; + f_next[11] = T{scale * 0.0277777777777778}; + f_next[12] = T{scale * 0.0555555555555556}; + f_next[13] = T{scale * 0.0277777777777778}; + f_next[14] = T{scale * 0.0277777777777778}; + f_next[15] = T{scale * 0.0277777777777778}; + f_next[16] = T{scale * 0.0555555555555556}; + f_next[17] = T{scale * 0.0277777777777778}; + f_next[18] = T{scale * 0.0277777777777778}; +} + +}; diff --git a/src/LLBM/smagorinsky_collide.h b/src/LLBM/smagorinsky_collide.h new file mode 100644 index 0000000..1a90598 --- /dev/null +++ b/src/LLBM/smagorinsky_collide.h @@ -0,0 +1,129 @@ +#pragma once + +#include "concepts.h" + +struct SmagorinskyBgkCollideO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T tau, T smagorinsky) { + V rho; + V u[3]; + V x0 = f_curr[10] + f_curr[13] + f_curr[17]; + V x1 = f_curr[14] + f_curr[5] + f_curr[6]; + V x2 = f_curr[1] + f_curr[2] + f_curr[4]; + V x3 = x0 + x1 + x2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9]; + V x4 = -f_curr[11] + f_curr[7]; + V x5 = -f_curr[15] + f_curr[3]; + V x6 = T{1}/x3; + V x7 = f_curr[0] - f_curr[18]; + V x8 = T{2.00000000000000}*tau + T{-1.00000000000000}; + V x9 = T{72.0000000000000}*f_curr[5]; + V x26 = T{72.0000000000000}*f_curr[13]; + V x33 = T{72.0000000000000}*f_curr[11]; + V x44 = T{72.0000000000000}*f_curr[1]; + V x50 = T{72.0000000000000}*f_curr[17]; + V x56 = T{72.0000000000000}*f_curr[15]; + V x64 = T{72.0000000000000}*f_curr[4]; + V x68 = T{72.0000000000000}*f_curr[14]; + V x72 = T{72.0000000000000}*f_curr[18]; + rho = x3; + V x78 = T{2.00000000000000}*rho; + u[0] = x6*(x0 + x4 + x5 - f_curr[1] - f_curr[5] - f_curr[8]); + V x12 = -u[0]; + V x14 = T{6.00000000000000}*u[0]; + V x17 = u[0]*u[0]; + V x18 = T{3.00000000000000}*x17; + V x27 = -x14; + V x38 = T{2.00000000000000} - x18; + V x79 = T{6.00000000000000}*x17; + u[1] = x6*(x1 + x4 + x7 - f_curr[12] - f_curr[13] - f_curr[4]); + V x10 = T{6.00000000000000}*u[1]; + V x11 = -x10; + V x13 = x12 + u[1]; + V x19 = u[1]*u[1]; + V x20 = T{3.00000000000000}*x19; + V x21 = x18 + x20 + T{-2.00000000000000}; + V x28 = -u[1]; + V x29 = x28 + u[0]; + V x34 = u[0] + u[1]; + V x35 = T{9.00000000000000}*(x34*x34); + V x37 = -x20; + V x39 = x37 + x38; + V x85 = T{6.00000000000000}*x19; + u[2] = x6*(x2 + x5 + x7 - f_curr[14] - f_curr[16] - f_curr[17]); + V x15 = u[2]*u[2]; + V x16 = T{3.00000000000000}*x15; + V x22 = x16 + x21; + V x23 = x14 + x22; + V x24 = rho*(x11 + x23 - T{9.00000000000000}*x13*x13); + V x25 = x24 + x9; + V x30 = x10 + x22; + V x31 = rho*(x27 + x30 - T{9.00000000000000}*x29*x29); + V x32 = x26 + x31; + V x36 = rho*(x14 + x30 - x35); + V x40 = -x16; + V x41 = x10 + x40; + V x42 = rho*(x14 + x35 + x39 + x41) - T{72.0000000000000}*f_curr[7]; + V x43 = -x33 - x36 + x42; + V x45 = T{6.00000000000000}*u[2]; + V x46 = -x45; + V x47 = x12 + u[2]; + V x48 = rho*(x23 + x46 - T{9.00000000000000}*x47*x47); + V x49 = x44 + x48; + V x51 = -u[2]; + V x52 = x51 + u[0]; + V x53 = x22 + x45; + V x54 = rho*(x27 + x53 - T{9.00000000000000}*x52*x52); + V x55 = x50 + x54; + V x57 = u[0] + u[2]; + V x58 = T{9.00000000000000}*(x57*x57); + V x59 = rho*(x14 + x53 - x58); + V x60 = x39 + x45; + V x61 = x14 + x40; + V x62 = rho*(x58 + x60 + x61) - T{72.0000000000000}*f_curr[3]; + V x63 = -x56 - x59 + x62; + V x65 = x28 + u[2]; + V x66 = rho*(x30 + x46 - T{9.00000000000000}*x65*x65); + V x67 = x64 + x66; + V x69 = x51 + u[1]; + V x70 = rho*(x11 + x53 - T{9.00000000000000}*x69*x69); + V x71 = x68 + x70; + V x73 = u[1] + u[2]; + V x74 = T{9.00000000000000}*(x73*x73); + V x75 = rho*(x30 + x45 - x74); + V x76 = rho*(x41 + x60 + x74) - T{72.0000000000000}*f_curr[0]; + V x77 = -x72 - x75 + x76; + V x80 = x16 + T{-2.00000000000000}; + V x81 = x14 + x20 - x79 + x80; + V x82 = x37 + x61 + x79 + T{2.00000000000000}; + V x83 = -x44 - x48 - x50 - x54 + x63; + V x84 = -x24 - x26 - x31 + x43 - x9; + V x86 = x10 + x18 + x80 - x85; + V x87 = x38 + x41 + x85; + V x88 = -x64 - x66 - x68 - x70 + x77; + V x89 = T{6.00000000000000}*x15; + V x90 = x21 + x45 - x89; + V x91 = x60 + x89; + V x92 = T{1}/(T{20.0000000000000}*tau + T{2.00000000000000} + T{5.04537849152229}*simd::sqrt<T>((smagorinsky*smagorinsky)*simd::sqrt<T>((x25 + x32 + x43)*(x25 + x32 + x43) + (x49 + x55 + x63)*(x49 + x55 + x63) + (x67 + x71 + x77)*(x67 + x71 + x77) + T{0.500000000000000}*((-x78*x81 + x78*x82 + x83 + x84 - T{72}*f_curr[10] - T{72}*f_curr[8])*(-x78*x81 + x78*x82 + x83 + x84 - T{72}*f_curr[10] - T{72}*f_curr[8])) + T{0.500000000000000}*((-x78*x86 + x78*x87 + x84 + x88 - T{72}*f_curr[12] - T{72}*f_curr[6])*(-x78*x86 + x78*x87 + x84 + x88 - T{72}*f_curr[12] - T{72}*f_curr[6])) + T{0.500000000000000}*((-x78*x90 + x78*x91 + x83 + x88 - T{72}*f_curr[16] - T{72}*f_curr[2])*(-x78*x90 + x78*x91 + x83 + x88 - T{72}*f_curr[16] - T{72}*f_curr[2]))) + T{0.157134840263677}*(x8*x8))); + f_next[0] = T{0.333333333333333}*x76*x92 + f_curr[0]; + f_next[1] = -T{0.333333333333333}*x49*x92 + f_curr[1]; + f_next[2] = T{0.666666666666667}*x92*(rho*x91 - T{36.0000000000000}*f_curr[2]) + f_curr[2]; + f_next[3] = T{0.333333333333333}*x62*x92 + f_curr[3]; + f_next[4] = -T{0.333333333333333}*x67*x92 + f_curr[4]; + f_next[5] = -T{0.333333333333333}*x25*x92 + f_curr[5]; + f_next[6] = T{0.666666666666667}*x92*(rho*x87 - T{36.0000000000000}*f_curr[6]) + f_curr[6]; + f_next[7] = T{0.333333333333333}*x42*x92 + f_curr[7]; + f_next[8] = -T{0.666666666666667}*x92*(rho*x81 + T{36.0000000000000}*f_curr[8]) + f_curr[8]; + f_next[9] = -T{4.00000000000000}*x92*(rho*x22 + T{6.00000000000000}*f_curr[9]) + f_curr[9]; + f_next[10] = T{0.666666666666667}*x92*(rho*x82 - T{36.0000000000000}*f_curr[10]) + f_curr[10]; + f_next[11] = -T{0.333333333333333}*x92*(x33 + x36) + f_curr[11]; + f_next[12] = -T{0.666666666666667}*x92*(rho*x86 + T{36.0000000000000}*f_curr[12]) + f_curr[12]; + f_next[13] = -T{0.333333333333333}*x32*x92 + f_curr[13]; + f_next[14] = -T{0.333333333333333}*x71*x92 + f_curr[14]; + f_next[15] = -T{0.333333333333333}*x92*(x56 + x59) + f_curr[15]; + f_next[16] = -T{0.666666666666667}*x92*(rho*x90 + T{36.0000000000000}*f_curr[16]) + f_curr[16]; + f_next[17] = -T{0.333333333333333}*x55*x92 + f_curr[17]; + f_next[18] = -T{0.333333333333333}*x92*(x72 + x75) + f_curr[18]; +} + +}; diff --git a/src/LLBM/wall.h b/src/LLBM/wall.h new file mode 100644 index 0000000..26534e4 --- /dev/null +++ b/src/LLBM/wall.h @@ -0,0 +1,4 @@ +#pragma once + +template<int N_0, int N_1, int N_2=0> +struct WallNormal { }; diff --git a/src/concepts.h b/src/concepts.h new file mode 100644 index 0000000..61f7cbf --- /dev/null +++ b/src/concepts.h @@ -0,0 +1,22 @@ +#pragma once + +#include <concepts> + +namespace concepts { + +template <typename V> +concept Arithmetic = requires(V a, V b) { + a + b; + a += b; + + a - b; + a -= b; + + a * b; + a *= b; + + a / b; + a /= b; +}; + +} diff --git a/src/cuboid.h b/src/cuboid.h new file mode 100644 index 0000000..c4ff803 --- /dev/null +++ b/src/cuboid.h @@ -0,0 +1,37 @@ +#pragma once + +#include <array> + +class Cuboid { +private: + const std::array<std::size_t,3> _size; + const std::size_t _volume; + +public: + Cuboid(std::size_t nX, std::size_t nY, std::size_t nZ): + _size{nX, nY, nZ}, + _volume(nX*nY*nZ) { } + + std::size_t volume() const { + return _volume; + } + + std::size_t index(unsigned iX, unsigned iY, unsigned iZ) const { + return iX*_size[1]*_size[2] + iY*_size[2] + iZ; + } + + int operator[](unsigned i) const { + return _size[i]; + } + + template <typename F> + void traverse(F f) { + for (int iX = 0; iX < _size[0]; ++iX) { + for (int iY = 0; iY < _size[1]; ++iY) { + for (int iZ = 0; iZ < _size[2]; ++iZ) { + f(iX, iY, iZ, index(iX,iY,iZ)); + } + } + } + } +}; diff --git a/src/export.h b/src/export.h new file mode 100644 index 0000000..a3d38ec --- /dev/null +++ b/src/export.h @@ -0,0 +1,59 @@ +#pragma once + +#include <span> +#include <fstream> + +namespace export_format { + +template <std::floating_point T> +void vtk(Cuboid c, std::span<T> rho, std::span<T> u, std::string path) { + std::ofstream fout; + fout.open(path.c_str()); + + fout << "# vtk DataFile Version 3.0\n"; + fout << "lbm_output\n"; + fout << "ASCII\n"; + fout << "DATASET RECTILINEAR_GRID\n"; + fout << "DIMENSIONS " << c[0] << " " << c[1] << " " << c[2] << "\n"; + + fout << "X_COORDINATES " << c[0] << " float\n"; + for (std::size_t iX=0; iX < c[0]; ++iX) { + fout << iX << " "; + } + + fout << "\nY_COORDINATES " << c[1] << " float\n"; + for (std::size_t iY=0; iY < c[1]; ++iY) { + fout << iY << " "; + } + + fout << "\nZ_COORDINATES " << c[2] << " float\n"; + for (std::size_t iZ=0; iZ < c[2]; ++iZ) { + fout << iZ << " "; + } + fout << "\nPOINT_DATA " << c.volume() << "\n"; + + fout << "VECTORS velocity float\n"; + for (std::size_t iZ=0; iZ < c[2]; ++iZ) { + for (std::size_t iY=0; iY < c[1]; ++iY) { + for (std::size_t iX=0; iX < c[0]; ++iX) { + const std::size_t iCell = c.index(iX,iY,iZ); + fout << u[iCell] << " " << u[c.volume() + iCell] << " " << u[2*c.volume() + iCell] << "\n"; + } + } + } + + fout << "SCALARS density float 1\n"; + fout << "LOOKUP_TABLE default\n"; + for (std::size_t iZ=0; iZ < c[2]; ++iZ) { + for (std::size_t iY=0; iY < c[1]; ++iY) { + for (std::size_t iX=0; iX < c[0]; ++iX) { + const std::size_t iCell = c.index(iX,iY,iZ); + fout << rho[iCell] << "\n"; + } + } + } + + fout.close(); +} + +} diff --git a/src/lattice.h b/src/lattice.h new file mode 100644 index 0000000..bbebab4 --- /dev/null +++ b/src/lattice.h @@ -0,0 +1,267 @@ +#pragma once + +#include <memory> +#include <vector> + +#ifdef ENABLE_AVX512 +#include "simd/512.h" +#else +#include "simd/256.h" +#endif + +#include <omp.h> + +#include "cuboid.h" +#include "population.h" +#include "propagation.h" +#include "operator.h" +#include "export.h" + +#include "LLBM/initialize.h" +#include "LLBM/collect_moments.h" + +template <typename T> +class LatticeMask { +private: + const std::size_t _size; + + std::unique_ptr<typename simd::Mask<T>::storage_t[]> _simd_mask; + std::unique_ptr<bool[]> _base_mask; + +public: + LatticeMask(std::size_t nCells): + _size(((nCells + simd::Pack<T>::size - 1) / simd::Mask<T>::storage_size + 1) * simd::Mask<T>::storage_size), + _simd_mask(new typename simd::Mask<T>::storage_t[_size / simd::Mask<T>::storage_size] { }), + _base_mask(new bool[_size] { }) + { } + + void set(std::size_t iCell, bool state) { + _base_mask[iCell] = state; + } + + bool get(std::size_t iCell) const { + return _base_mask[iCell]; + } + + void serialize() { + for (std::size_t i=0; i < _size / simd::Mask<T>::storage_size; ++i) { + _simd_mask[i] = simd::Mask<T>::encode(_base_mask.get() + i*simd::Mask<T>::storage_size); + } + } + + typename simd::Mask<T>::storage_t* data() { + return _simd_mask.get(); + } + +}; + +template <concepts::PropagatablePopulationBuffer Buffer> +class Lattice { +private: + using value_t = typename Buffer::value_t; + using pack_t = simd::Pack<value_t>; + using mask_t = simd::Mask<value_t>; + + Cuboid _cuboid; + Buffer _population; + + template <typename COP> + void apply(COP cop, + std::size_t iCell, + pack_t f_curr[population::q], + pack_t f_next[population::q]) { + if (mask_t m = {cop.mask.data(), iCell}) { + cop(f_curr, f_next); + + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + simd::maskstore(get(iPop, stage::post_collision()) + iCell, m, f_next[iPop]); + } + } + } + +public: + Lattice(Cuboid cuboid): + _cuboid(cuboid), + _population(cuboid) + { + apply<InitializeO>(); + } + + template <t |