summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Kummerlaender2021-05-17 00:30:13 +0200
committerAdrian Kummerlaender2021-05-17 00:30:13 +0200
commita92271176a19e06611099c0eccc4e6a6887f4915 (patch)
tree54067b334bfae7d99c79cfb00da5891334f9514c
downloadSweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar
SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar.gz
SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar.bz2
SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar.lz
SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar.xz
SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar.zst
SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.zip
Extract public version of SweepLB
-rw-r--r--.editorconfig2
-rw-r--r--CMakeLists.txt72
-rw-r--r--box.cpp73
-rw-r--r--channel.cpp87
-rw-r--r--ldc.cpp76
-rw-r--r--shell.nix15
-rw-r--r--src/LLBM/bounce_back.h30
-rw-r--r--src/LLBM/bounce_back_moving_wall.h30
-rw-r--r--src/LLBM/collect_moments.h25
-rw-r--r--src/LLBM/collide.h89
-rw-r--r--src/LLBM/equilibrium_density_wall.h144
-rw-r--r--src/LLBM/equilibrium_velocity_wall.h144
-rw-r--r--src/LLBM/initialize.h53
-rw-r--r--src/LLBM/smagorinsky_collide.h129
-rw-r--r--src/LLBM/wall.h4
-rw-r--r--src/concepts.h22
-rw-r--r--src/cuboid.h37
-rw-r--r--src/export.h59
-rw-r--r--src/lattice.h267
-rw-r--r--src/operator.h41
-rw-r--r--src/pattern/all.h5
-rw-r--r--src/pattern/none.h48
-rw-r--r--src/pattern/ps.h123
-rw-r--r--src/pattern/sss.h63
-rw-r--r--src/population.h41
-rw-r--r--src/propagation.h28
-rw-r--r--src/simd/256.h379
-rw-r--r--src/simd/512.h339
28 files changed, 2425 insertions, 0 deletions
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..0b225af
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,2 @@
+[*]
+indent_style = tab
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..c5b383b
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,72 @@
+cmake_minimum_required(VERSION 3.10)
+project(SweepLB LANGUAGES CXX)
+
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+set(SIMD_MODE "AVX256" CACHE STRING "Generate either AVX256 or AVX512 instructions")
+option(USE_SIMD_CELL_LIST "Generate gather / scatter instructions for cell list processing" OFF)
+
+if(NOT SIMD_MODE)
+ set(SIMD_MODE "AVX256")
+endif()
+
+set(CXX_FLAGS "-O3 -march=native -mtune=native")
+
+if(${SIMD_MODE} MATCHES "AVX256")
+ message("Selecting AVX256")
+ set(SIMD_MODE_FLAGS "-mavx2")
+elseif(${SIMD_MODE} MATCHES "AVX512")
+ message("Selecting AVX512")
+ add_compile_definitions(ENABLE_AVX512)
+ set(SIMD_MODE_FLAGS "-mavx512f -mavx512dq")
+else()
+ message(FATAL_ERROR "Invalid SIMD mode, choose either AVX256 or AVX512")
+endif()
+
+if(USE_SIMD_CELL_LIST)
+ message("Enabling SIMD cell list processing.")
+ add_compile_definitions(SIMD_CELL_LIST)
+endif()
+
+set(CMAKE_CXX_FLAGS "${CXX_FLAGS} ${SIMD_MODE_FLAGS}")
+
+message("Using flags \"${CMAKE_CXX_FLAGS}\"")
+
+include_directories(
+ src/
+)
+
+find_package(OpenMP)
+
+set(EXAMPLES ldc box channel)
+set(PRECISIONS float double)
+set(PATTERNS sss ps)
+
+foreach(example IN LISTS EXAMPLES)
+ foreach(precision IN LISTS PRECISIONS)
+ foreach(pattern IN LISTS PATTERNS)
+ set(generated_name ${example}_${precision}_${pattern})
+ string(TOUPPER ${pattern} pattern)
+ add_executable(${generated_name} ${example}.cpp)
+ target_compile_definitions(${generated_name}
+ PRIVATE
+ SWEEPLB_PRECISION=${precision}
+ SWEEPLB_PATTERN=${pattern})
+ target_compile_features(${generated_name}
+ PRIVATE cxx_std_20)
+ target_link_libraries(
+ ${generated_name}
+ PUBLIC
+ rt)
+ if(OpenMP_CXX_FOUND)
+ target_link_libraries(
+ ${generated_name}
+ PUBLIC
+ OpenMP::OpenMP_CXX
+ )
+ endif()
+ endforeach()
+ endforeach()
+endforeach()
diff --git a/box.cpp b/box.cpp
new file mode 100644
index 0000000..ecb3296
--- /dev/null
+++ b/box.cpp
@@ -0,0 +1,73 @@
+#include "lattice.h"
+
+#include "LLBM/collide.h"
+#include "LLBM/initialize.h"
+#include "LLBM/bounce_back.h"
+
+#include <chrono>
+#include <iostream>
+
+#include "pattern/all.h"
+
+using T = SWEEPLB_PRECISION;
+using PATTERN = pattern::SWEEPLB_PATTERN<T>;
+
+void simulate(Cuboid cuboid, std::size_t nStep) {
+ const int nThread = omp_get_max_threads();
+
+ Lattice<PATTERN> lattice(cuboid);
+
+ std::vector<simd::Pack<T>::index_t> discontinuity;
+
+ LatticeMask<T> bulk_mask(lattice.volume());
+ LatticeMask<T> wall_mask(lattice.volume());
+
+ cuboid.traverse([&](int iX, int iY, int iZ, std::size_t iCell) {
+ if (std::abs(iX - cuboid[0]/2) < cuboid[0]/16 &&
+ std::abs(iY - cuboid[1]/2) < cuboid[1]/16 &&
+ std::abs(iZ - cuboid[2]/2) < cuboid[2]/16) {
+ discontinuity.emplace_back(iCell);
+ }
+ if ( iX > 0 && iX < cuboid[0]-1
+ && iY > 0 && iY < cuboid[1]-1
+ && iZ > 0 && iZ < cuboid[2]-1) {
+ bulk_mask.set(iCell, true);
+ } else {
+ wall_mask.set(iCell, true);
+ }
+ });
+
+ bulk_mask.serialize();
+ wall_mask.serialize();
+
+ lattice.apply<InitializeO>(discontinuity, 2.0);
+
+ T tau = 0.56;
+
+ auto start = std::chrono::steady_clock::now();
+
+ for (std::size_t iStep = 0; iStep < nStep; ++iStep) {
+ lattice.apply(Operator(BgkCollideO(), bulk_mask, tau),
+ Operator(BounceBackO(), wall_mask));
+ lattice.stream();
+ }
+
+ auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(
+ std::chrono::steady_clock::now() - start);
+
+ std::cout << cuboid[0]
+ << ", " << nStep
+ << ", " << nThread
+ << ", " << (nStep * lattice.volume()) / (1e6 * duration.count())
+ << std::endl;
+
+ lattice.write_momenta(bulk_mask, "result.vtk");
+}
+
+int main(int argc, char* argv[]) {
+ const std::size_t nX = atoi(argv[1]);
+ const std::size_t nY = atoi(argv[2]);
+ const std::size_t nZ = atoi(argv[3]);
+ const std::size_t steps = atoi(argv[4]);
+ simulate({ nX, nY, nZ }, steps);
+}
diff --git a/channel.cpp b/channel.cpp
new file mode 100644
index 0000000..fddb9a1
--- /dev/null
+++ b/channel.cpp
@@ -0,0 +1,87 @@
+#include "lattice.h"
+
+#include "LLBM/collide.h"
+#include "LLBM/initialize.h"
+#include "LLBM/bounce_back.h"
+#include "LLBM/bounce_back_moving_wall.h"
+#include "LLBM/equilibrium_density_wall.h"
+#include "LLBM/equilibrium_velocity_wall.h"
+
+#include <chrono>
+#include <iostream>
+
+#include "pattern/all.h"
+
+using T = SWEEPLB_PRECISION;
+using PATTERN = pattern::SWEEPLB_PATTERN<T>;
+
+void simulate(Cuboid cuboid, std::size_t nStep) {
+ const int nThread = omp_get_max_threads();
+
+ Lattice<PATTERN> lattice(cuboid);
+
+ LatticeMask<T> bulk_mask(cuboid.volume());
+ LatticeMask<T> wall_mask(cuboid.volume());
+ LatticeMask<T> inflow_mask(cuboid.volume());
+ LatticeMask<T> outflow_mask(cuboid.volume());
+
+ cuboid.traverse([&](int iX, int iY, int iZ, std::size_t iCell) {
+ if ( iY == 0 || iY == cuboid[1]-1
+ || iZ == 0 || iZ == cuboid[2]-1) {
+ wall_mask.set(iCell, true);
+ } else if (iX == 0) {
+ inflow_mask.set(iCell, true);
+ } else if (iX == cuboid[0]-1) {
+ outflow_mask.set(iCell, true);
+ } else {
+ bulk_mask.set(iCell, true);
+ }
+ });
+
+ bulk_mask.serialize();
+ wall_mask.serialize();
+ inflow_mask.serialize();
+ outflow_mask.serialize();
+
+ T tau = 0.56;
+
+ T u_inflow = 0.05;
+ T d_outflow = 1.;
+
+ for (std::size_t iStep = 0; iStep < 100; ++iStep) {
+ lattice.apply(Operator(BgkCollideO(), bulk_mask, tau),
+ Operator(BounceBackO(), wall_mask),
+ Operator(EquilibriumVelocityWallO(), inflow_mask, u_inflow, WallNormal<1,0,0>()),
+ Operator(EquilibriumDensityWallO(), outflow_mask, d_outflow, WallNormal<-1,0,0>()));
+ lattice.stream();
+ }
+
+ auto start = std::chrono::steady_clock::now();
+
+ for (std::size_t iStep = 0; iStep < nStep; ++iStep) {
+ lattice.apply(Operator(BgkCollideO(), bulk_mask, tau),
+ Operator(BounceBackO(), wall_mask),
+ Operator(EquilibriumVelocityWallO(), inflow_mask, u_inflow, WallNormal<1,0,0>()),
+ Operator(EquilibriumDensityWallO(), outflow_mask, d_outflow, WallNormal<-1,0,0>()));
+ lattice.stream();
+ }
+
+ auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(
+ std::chrono::steady_clock::now() - start);
+
+ std::cout << cuboid[0] << ", " << cuboid[1] << ", " << cuboid[2]
+ << ", " << nStep
+ << ", " << nThread
+ << ", " << (nStep * lattice.volume()) / (1e6 * duration.count())
+ << std::endl;
+
+ lattice.write_momenta(bulk_mask, "result.vtk");
+}
+
+int main(int argc, char* argv[]) {
+ const std::size_t nX = atoi(argv[1]);
+ const std::size_t nY = atoi(argv[2]);
+ const std::size_t nZ = atoi(argv[3]);
+ const std::size_t steps = atoi(argv[4]);
+ simulate({ nX, nY, nZ }, steps);
+}
diff --git a/ldc.cpp b/ldc.cpp
new file mode 100644
index 0000000..0917939
--- /dev/null
+++ b/ldc.cpp
@@ -0,0 +1,76 @@
+#include "lattice.h"
+
+#include "LLBM/collide.h"
+#include "LLBM/initialize.h"
+#include "LLBM/bounce_back.h"
+#include "LLBM/bounce_back_moving_wall.h"
+
+#include <chrono>
+#include <iostream>
+
+#include "pattern/all.h"
+
+using T = SWEEPLB_PRECISION;
+using PATTERN = pattern::SWEEPLB_PATTERN<T>;
+
+void simulate(Cuboid cuboid, std::size_t nStep) {
+ const int nThread = omp_get_max_threads();
+
+ Lattice<PATTERN> lattice(cuboid);
+
+ LatticeMask<T> bulk_mask(cuboid.volume());
+ LatticeMask<T> box_mask(cuboid.volume());
+ LatticeMask<T> lid_mask(cuboid.volume());
+
+ cuboid.traverse([&](int iX, int iY, int iZ, std::size_t iCell) {
+ if (iZ == cuboid[2]-1) {
+ lid_mask.set(iCell, true);
+ } else if (iX == 0 || iX == cuboid[0]-1
+ || iY == 0 || iY == cuboid[1]-1
+ || iZ == 0) {
+ box_mask.set(iCell, true);
+ } else {
+ bulk_mask.set(iCell, true);
+ }
+ });
+
+ bulk_mask.serialize();
+ box_mask.serialize();
+ lid_mask.serialize();
+
+ T tau = 0.51;
+ T u_lid[] { 0.05, 0., 0. };
+
+ for (std::size_t iStep = 0; iStep < 100; ++iStep) {
+ lattice.apply(Operator(BgkCollideO(), bulk_mask, tau),
+ Operator(BounceBackO(), box_mask),
+ Operator(BounceBackMovingWallO(), lid_mask, u_lid));
+ lattice.stream();
+ }
+
+ auto start = std::chrono::steady_clock::now();
+
+ for (std::size_t iStep = 0; iStep < nStep; ++iStep) {
+ lattice.apply(Operator(BgkCollideO(), bulk_mask, tau),
+ Operator(BounceBackO(), box_mask),
+ Operator(BounceBackMovingWallO(), lid_mask, u_lid));
+ lattice.stream();
+ }
+
+ auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(
+ std::chrono::steady_clock::now() - start);
+
+ std::cout << cuboid[0]
+ << ", " << nStep
+ << ", " << nThread
+ << ", " << (nStep * lattice.volume()) / (1e6 * duration.count())
+ << std::endl;
+
+ lattice.write_momenta(bulk_mask, "result.vtk");
+}
+
+int main(int argc, char* argv[]) {
+ const std::size_t n = atoi(argv[1]);
+ const std::size_t steps = atoi(argv[2]);
+ simulate({ n, n, n}, steps);
+}
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000..d156272
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,15 @@
+{ pkgs ? import <nixpkgs> { }, ... }:
+
+pkgs.stdenvNoCC.mkDerivation rec {
+ name = "sweeplb-env";
+ env = pkgs.buildEnv { name = name; paths = buildInputs; };
+
+ buildInputs = with pkgs; [
+ gcc10
+ cmake
+ ];
+
+ shellHook = ''
+ export NIX_SHELL_NAME="${name}"
+ '';
+}
diff --git a/src/LLBM/bounce_back.h b/src/LLBM/bounce_back.h
new file mode 100644
index 0000000..b710ee4
--- /dev/null
+++ b/src/LLBM/bounce_back.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "concepts.h"
+
+struct BounceBackO {
+
+template <concepts::Arithmetic V>
+static void apply(V f_curr[19], V f_next[19]) {
+ f_next[0] = f_curr[18];
+ f_next[1] = f_curr[17];
+ f_next[2] = f_curr[16];
+ f_next[3] = f_curr[15];
+ f_next[4] = f_curr[14];
+ f_next[5] = f_curr[13];
+ f_next[6] = f_curr[12];
+ f_next[7] = f_curr[11];
+ f_next[8] = f_curr[10];
+ f_next[9] = f_curr[9];
+ f_next[10] = f_curr[8];
+ f_next[11] = f_curr[7];
+ f_next[12] = f_curr[6];
+ f_next[13] = f_curr[5];
+ f_next[14] = f_curr[4];
+ f_next[15] = f_curr[3];
+ f_next[16] = f_curr[2];
+ f_next[17] = f_curr[1];
+ f_next[18] = f_curr[0];
+}
+
+};
diff --git a/src/LLBM/bounce_back_moving_wall.h b/src/LLBM/bounce_back_moving_wall.h
new file mode 100644
index 0000000..c00a17e
--- /dev/null
+++ b/src/LLBM/bounce_back_moving_wall.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include "concepts.h"
+
+struct BounceBackMovingWallO {
+
+template <concepts::Arithmetic V, concepts::Arithmetic T>
+static void apply(V f_curr[19], V f_next[19], T u[3]) {
+ f_next[0] = f_curr[18] + T{0.166666666666667}*u[1] + T{0.166666666666667}*u[2];
+ f_next[1] = f_curr[17] - T{0.166666666666667}*u[0] + T{0.166666666666667}*u[2];
+ f_next[2] = f_curr[16] + T{0.333333333333333}*u[2];
+ f_next[3] = f_curr[15] + T{0.166666666666667}*u[0] + T{0.166666666666667}*u[2];
+ f_next[4] = f_curr[14] - T{0.166666666666667}*u[1] + T{0.166666666666667}*u[2];
+ f_next[5] = f_curr[13] - T{0.166666666666667}*u[0] + T{0.166666666666667}*u[1];
+ f_next[6] = f_curr[12] + T{0.333333333333333}*u[1];
+ f_next[7] = f_curr[11] + T{0.166666666666667}*u[0] + T{0.166666666666667}*u[1];
+ f_next[8] = f_curr[10] - T{0.333333333333333}*u[0];
+ f_next[9] = f_curr[9];
+ f_next[10] = f_curr[8] + T{0.333333333333333}*u[0];
+ f_next[11] = f_curr[7] - T{0.166666666666667}*u[0] - T{0.166666666666667}*u[1];
+ f_next[12] = f_curr[6] - T{0.333333333333333}*u[1];
+ f_next[13] = f_curr[5] + T{0.166666666666667}*u[0] - T{0.166666666666667}*u[1];
+ f_next[14] = f_curr[4] + T{0.166666666666667}*u[1] - T{0.166666666666667}*u[2];
+ f_next[15] = f_curr[3] - T{0.166666666666667}*u[0] - T{0.166666666666667}*u[2];
+ f_next[16] = f_curr[2] - T{0.333333333333333}*u[2];
+ f_next[17] = f_curr[1] + T{0.166666666666667}*u[0] - T{0.166666666666667}*u[2];
+ f_next[18] = f_curr[0] - T{0.166666666666667}*u[1] - T{0.166666666666667}*u[2];
+}
+
+};
diff --git a/src/LLBM/collect_moments.h b/src/LLBM/collect_moments.h
new file mode 100644
index 0000000..afd5d59
--- /dev/null
+++ b/src/LLBM/collect_moments.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "concepts.h"
+
+struct CollectMomentsF {
+
+static constexpr unsigned size = 4;
+
+template <concepts::Arithmetic V>
+static void observe(V f_curr[19], V* momenta) {
+ V m0 = f_curr[10] + f_curr[13] + f_curr[17];
+ V m1 = f_curr[14] + f_curr[5] + f_curr[6];
+ V m2 = f_curr[1] + f_curr[2] + f_curr[4];
+ V m3 = m0 + m1 + m2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9];
+ V m4 = -f_curr[11] + f_curr[7];
+ V m5 = -f_curr[15] + f_curr[3];
+ V m6 = V{1}/m3;
+ V m7 = f_curr[0] - f_curr[18];
+ momenta[0] = m3;
+ momenta[1] = m6*(m0 + m4 + m5 - f_curr[1] - f_curr[5] - f_curr[8]);
+ momenta[2] = m6*(m1 + m4 + m7 - f_curr[12] - f_curr[13] - f_curr[4]);
+ momenta[3] = m6*(m2 + m5 + m7 - f_curr[14] - f_curr[16] - f_curr[17]);
+}
+
+};
diff --git a/src/LLBM/collide.h b/src/LLBM/collide.h
new file mode 100644
index 0000000..c08aade
--- /dev/null
+++ b/src/LLBM/collide.h
@@ -0,0 +1,89 @@
+#pragma once
+
+#include "concepts.h"
+
+struct BgkCollideO {
+
+template <concepts::Arithmetic V, concepts::Arithmetic T>
+static void apply(V f_curr[19], V f_next[19], T tau) {
+ V rho;
+ V u[3];
+ V m0 = f_curr[10] + f_curr[13] + f_curr[17];
+ V m1 = f_curr[14] + f_curr[5] + f_curr[6];
+ V m2 = f_curr[1] + f_curr[2] + f_curr[4];
+ V m3 = m0 + m1 + m2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9];
+ V m4 = -f_curr[11] + f_curr[7];
+ V m5 = -f_curr[15] + f_curr[3];
+ V m6 = T{1}/m3;
+ V m7 = f_curr[0] - f_curr[18];
+ rho = m3;
+ u[0] = m6*(m0 + m4 + m5 - f_curr[1] - f_curr[5] - f_curr[8]);
+ u[1] = m6*(m1 + m4 + m7 - f_curr[12] - f_curr[13] - f_curr[4]);
+ u[2] = m6*(m2 + m5 + m7 - f_curr[14] - f_curr[16] - f_curr[17]);
+ V x0 = T{1}/tau;
+ V x1 = T{0.0138888888888889}*x0;
+ V x2 = u[1] + u[2];
+ V x3 = T{9.00000000000000}*(x2*x2);
+ V x4 = T{6.00000000000000}*u[2];
+ V x5 = u[1]*u[1];
+ V x6 = T{3.00000000000000}*x5;
+ V x7 = -x6;
+ V x8 = u[0]*u[0];
+ V x9 = T{3.00000000000000}*x8;
+ V x10 = T{2.00000000000000} - x9;
+ V x11 = x10 + x7;
+ V x12 = x11 + x4;
+ V x13 = u[2]*u[2];
+ V x14 = T{3.00000000000000}*x13;
+ V x15 = -x14;
+ V x16 = T{6.00000000000000}*u[1];
+ V x17 = x15 + x16;
+ V x18 = T{6.00000000000000}*u[0];
+ V x19 = -u[0];
+ V x20 = x19 + u[2];
+ V x21 = x14 + x6 + T{-2.00000000000000};
+ V x22 = x21 + x9;
+ V x23 = x22 - x4;
+ V x24 = T{0.0277777777777778}*x0;
+ V x25 = T{6.00000000000000}*x13;
+ V x26 = u[0] + u[2];
+ V x27 = T{9.00000000000000}*(x26*x26);
+ V x28 = x15 + x18;
+ V x29 = -u[1];
+ V x30 = x29 + u[2];
+ V x31 = x19 + u[1];
+ V x32 = -x16;
+ V x33 = x18 + x22;
+ V x34 = T{6.00000000000000}*x5;
+ V x35 = u[0] + u[1];
+ V x36 = T{9.00000000000000}*(x35*x35);
+ V x37 = T{6.00000000000000}*x8;
+ V x38 = x9 + T{-2.00000000000000};
+ V x39 = x29 + u[0];
+ V x40 = -x18 + x22;
+ V x41 = -u[2];
+ V x42 = x41 + u[1];
+ V x43 = x22 + x4;
+ V x44 = x41 + u[0];
+ f_next[0] = x1*(rho*(x12 + x17 + x3) - T{72.0000000000000}*f_curr[0]) + f_curr[0];
+ f_next[1] = -x1*(rho*(x18 + x23 - T{9.00000000000000}*x20*x20) + T{72.0000000000000}*f_curr[1]) + f_curr[1];
+ f_next[2] = x24*(rho*(x12 + x25) - T{36.0000000000000}*f_curr[2]) + f_curr[2];
+ f_next[3] = x1*(rho*(x12 + x27 + x28) - T{72.0000000000000}*f_curr[3]) + f_curr[3];
+ f_next[4] = -x1*(rho*(x16 + x23 - T{9.00000000000000}*x30*x30) + T{72.0000000000000}*f_curr[4]) + f_curr[4];
+ f_next[5] = -x1*(rho*(x32 + x33 - T{9.00000000000000}*x31*x31) + T{72.0000000000000}*f_curr[5]) + f_curr[5];
+ f_next[6] = x24*(rho*(x10 + x17 + x34) - T{36.0000000000000}*f_curr[6]) + f_curr[6];
+ f_next[7] = x1*(rho*(x11 + x17 + x18 + x36) - T{72.0000000000000}*f_curr[7]) + f_curr[7];
+ f_next[8] = -x24*(rho*(x18 + x21 - x37) + T{36.0000000000000}*f_curr[8]) + f_curr[8];
+ f_next[9] = -T{0.166666666666667}*x0*(rho*x22 + T{6.00000000000000}*f_curr[9]) + f_curr[9];
+ f_next[10] = x24*(rho*(x28 + x37 + x7 + T{2.00000000000000}) - T{36.0000000000000}*f_curr[10]) + f_curr[10];
+ f_next[11] = -x1*(rho*(x16 + x33 - x36) + T{72.0000000000000}*f_curr[11]) + f_curr[11];
+ f_next[12] = -x24*(rho*(x14 + x16 - x34 + x38) + T{36.0000000000000}*f_curr[12]) + f_curr[12];
+ f_next[13] = -x1*(rho*(x16 + x40 - T{9.00000000000000}*x39*x39) + T{72.0000000000000}*f_curr[13]) + f_curr[13];
+ f_next[14] = -x1*(rho*(x32 + x43 - T{9.00000000000000}*x42*x42) + T{72.0000000000000}*f_curr[14]) + f_curr[14];
+ f_next[15] = -x1*(rho*(-x27 + x33 + x4) + T{72.0000000000000}*f_curr[15]) + f_curr[15];
+ f_next[16] = -x24*(rho*(-x25 + x38 + x4 + x6) + T{36.0000000000000}*f_curr[16]) + f_curr[16];
+ f_next[17] = -x1*(rho*(x4 + x40 - T{9.00000000000000}*x44*x44) + T{72.0000000000000}*f_curr[17]) + f_curr[17];
+ f_next[18] = -x1*(rho*(x16 - x3 + x43) + T{72.0000000000000}*f_curr[18]) + f_curr[18];
+}
+
+};
diff --git a/src/LLBM/equilibrium_density_wall.h b/src/LLBM/equilibrium_density_wall.h
new file mode 100644
index 0000000..598d59d
--- /dev/null
+++ b/src/LLBM/equilibrium_density_wall.h
@@ -0,0 +1,144 @@
+#pragma once
+
+#include "wall.h"
+#include "concepts.h"
+
+struct EquilibriumDensityWallO {
+
+template <concepts::Arithmetic V, concepts::Arithmetic T>
+static void apply(V f_curr[19], V f_next[19], T rho_w, WallNormal<1,0,0>) {
+ V u_w = (rho_w - f_curr[0] - T{2.00000000000000}*f_curr[11] - f_curr[12] - f_curr[14] - T{2.00000000000000}*f_curr[15] - f_curr[16] - f_curr[18] - T{2.00000000000000}*f_curr[1] - f_curr[2] - f_curr[4] - T{2.00000000000000}*f_curr[5] - f_curr[6] - T{2.00000000000000}*f_curr[8] - f_curr[9])/rho_w;
+ V rho = rho_w;
+ V u[3] { u_w, 0., 0. };
+ V e0 = T{0.0277777777777778}*rho;
+ V e1 = u[1] + u[2];
+ V e2 = T{4.50000000000000}*(e1*e1);
+ V e3 = T{3.00000000000000}*u[2];
+ V e4 = u[1]*u[1];
+ V e5 = T{1.50000000000000}*e4;
+ V e6 = -e5;
+ V e7 = u[0]*u[0];
+ V e8 = T{1.50000000000000}*e7;
+ V e9 = T{1.00000000000000} - e8;
+ V e10 = e6 + e9;
+ V e11 = e10 + e3;
+ V e12 = T{3.00000000000000}*u[1];
+ V e13 = u[2]*u[2];
+ V e14 = T{1.50000000000000}*e13;
+ V e15 = -e14;
+ V e16 = e12 + e15;
+ V e17 = T{3.00000000000000}*u[0];
+ V e18 = -u[2];
+ V e19 = e18 + u[0];
+ V e20 = -T{4.50000000000000}*e19*e19;
+ V e21 = e14 + e5 + T{-1.00000000000000};
+ V e22 = e21 + e8;
+ V e23 = e22 - e3;
+ V e24 = T{0.0555555555555556}*rho;
+ V e25 = T{3.00000000000000}*e13;
+ V e26 = u[0] + u[2];
+ V e27 = T{4.50000000000000}*(e26*e26);
+ V e28 = e15 + e17;
+ V e29 = e18 + u[1];
+ V e30 = -T{4.50000000000000}*e29*e29;
+ V e31 = -e12;
+ V e32 = u[0] - u[1];
+ V e33 = -T{4.50000000000000}*e32*e32;
+ V e34 = e17 + e22;
+ V e35 = T{3.00000000000000}*e4;
+ V e36 = u[0] + u[1];
+ V e37 = T{4.50000000000000}*(e36*e36);
+ V e38 = T{3.00000000000000}*e7;
+ V e39 = e8 + T{-1.00000000000000};
+ V e40 = -e17 + e22;
+ V e41 = e22 + e3;
+ f_next[0] = e0*(e11 + e16 + e2);
+ f_next[1] = -e0*(e17 + e20 + e23);
+ f_next[2] = e24*(e11 + e25);
+ f_next[3] = e0*(e11 + e27 + e28);
+ f_next[4] = -e0*(e12 + e23 + e30);
+ f_next[5] = -e0*(e31 + e33 + e34);
+ f_next[6] = e24*(e16 + e35 + e9);
+ f_next[7] = e0*(e10 + e16 + e17 + e37);
+ f_next[8] = -e24*(e17 + e21 - e38);
+ f_next[9] = -T{0.333333333333333}*e22*rho;
+ f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000});
+ f_next[11] = -e0*(e12 + e34 - e37);
+ f_next[12] = -e24*(e12 + e14 - e35 + e39);
+ f_next[13] = -e0*(e12 + e33 + e40);
+ f_next[14] = -e0*(e30 + e31 + e41);
+ f_next[15] = -e0*(-e27 + e3 + e34);
+ f_next[16] = -e24*(-e25 + e3 + e39 + e5);
+ f_next[17] = -e0*(e20 + e3 + e40);
+ f_next[18] = -e0*(e12 - e2 + e41);
+}
+
+template <concepts::Arithmetic V, concepts::Arithmetic T>
+static void apply(V f_curr[19], V f_next[19], T rho_w, WallNormal<-1,0,0>) {
+ V u_w = (-rho_w + f_curr[0] + T{2.00000000000000}*f_curr[10] + f_curr[12] + T{2.00000000000000}*f_curr[13] + f_curr[14] + f_curr[16] + T{2.00000000000000}*f_curr[17] + f_curr[18] + f_curr[2] + T{2.00000000000000}*f_curr[3] + f_curr[4] + f_curr[6] + T{2.00000000000000}*f_curr[7] + f_curr[9])/rho_w;
+ V rho = rho_w;
+ V u[3] { u_w, 0., 0. };
+ V e0 = T{0.0277777777777778}*rho;
+ V e1 = u[1] + u[2];
+ V e2 = T{4.50000000000000}*(e1*e1);
+ V e3 = T{3.00000000000000}*u[2];
+ V e4 = u[1]*u[1];
+ V e5 = T{1.50000000000000}*e4;
+ V e6 = -e5;
+ V e7 = u[0]*u[0];
+ V e8 = T{1.50000000000000}*e7;
+ V e9 = T{1.00000000000000} - e8;
+ V e10 = e6 + e9;
+ V e11 = e10 + e3;
+ V e12 = T{3.00000000000000}*u[1];
+ V e13 = u[2]*u[2];
+ V e14 = T{1.50000000000000}*e13;
+ V e15 = -e14;
+ V e16 = e12 + e15;
+ V e17 = T{3.00000000000000}*u[0];
+ V e18 = -u[2];
+ V e19 = e18 + u[0];
+ V e20 = -T{4.50000000000000}*e19*e19;
+ V e21 = e14 + e5 + T{-1.00000000000000};
+ V e22 = e21 + e8;
+ V e23 = e22 - e3;
+ V e24 = T{0.0555555555555556}*rho;
+ V e25 = T{3.00000000000000}*e13;
+ V e26 = u[0] + u[2];
+ V e27 = T{4.50000000000000}*(e26*e26);
+ V e28 = e15 + e17;
+ V e29 = e18 + u[1];
+ V e30 = -T{4.50000000000000}*e29*e29;
+ V e31 = -e12;
+ V e32 = u[0] - u[1];
+ V e33 = -T{4.50000000000000}*e32*e32;
+ V e34 = e17 + e22;
+ V e35 = T{3.00000000000000}*e4;
+ V e36 = u[0] + u[1];
+ V e37 = T{4.50000000000000}*(e36*e36);
+ V e38 = T{3.00000000000000}*e7;
+ V e39 = e8 + T{-1.00000000000000};
+ V e40 = -e17 + e22;
+ V e41 = e22 + e3;
+ f_next[0] = e0*(e11 + e16 + e2);
+ f_next[1] = -e0*(e17 + e20 + e23);
+ f_next[2] = e24*(e11 + e25);
+ f_next[3] = e0*(e11 + e27 + e28);
+ f_next[4] = -e0*(e12 + e23 + e30);
+ f_next[5] = -e0*(e31 + e33 + e34);
+ f_next[6] = e24*(e16 + e35 + e9);
+ f_next[7] = e0*(e10 + e16 + e17 + e37);
+ f_next[8] = -e24*(e17 + e21 - e38);
+ f_next[9] = -T{0.333333333333333}*e22*rho;
+ f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000});
+ f_next[11] = -e0*(e12 + e34 - e37);
+ f_next[12] = -e24*(e12 + e14 - e35 + e39);
+ f_next[13] = -e0*(e12 + e33 + e40);
+ f_next[14] = -e0*(e30 + e31 + e41);
+ f_next[15] = -e0*(-e27 + e3 + e34);
+ f_next[16] = -e24*(-e25 + e3 + e39 + e5);
+ f_next[17] = -e0*(e20 + e3 + e40);
+ f_next[18] = -e0*(e12 - e2 + e41);
+}
+
+};
diff --git a/src/LLBM/equilibrium_velocity_wall.h b/src/LLBM/equilibrium_velocity_wall.h
new file mode 100644
index 0000000..2d68e34
--- /dev/null
+++ b/src/LLBM/equilibrium_velocity_wall.h
@@ -0,0 +1,144 @@
+#pragma once
+
+#include "wall.h"
+#include "concepts.h"
+
+struct EquilibriumVelocityWallO {
+
+template <concepts::Arithmetic V, concepts::Arithmetic T>
+static void apply(V f_curr[19], V f_next[19], T u_w, WallNormal<1,0,0>) {
+ V rho;
+ rho = -(f_curr[0] + T{2.00000000000000}*f_curr[11] + f_curr[12] + f_curr[14] + T{2.00000000000000}*f_curr[15] + f_curr[16] + f_curr[18] + T{2.00000000000000}*f_curr[1] + f_curr[2] + f_curr[4] + T{2.00000000000000}*f_curr[5] + f_curr[6] + T{2.00000000000000}*f_curr[8] + f_curr[9])/(u_w + T{-1.00000000000000});
+ V u[3] { u_w, 0., 0. };
+ V e0 = T{0.0277777777777778}*rho;
+ V e1 = u[1] + u[2];
+ V e2 = T{4.50000000000000}*(e1*e1);
+ V e3 = T{3.00000000000000}*u[2];
+ V e4 = u[1]*u[1];
+ V e5 = T{1.50000000000000}*e4;
+ V e6 = -e5;
+ V e7 = u[0]*u[0];
+ V e8 = T{1.50000000000000}*e7;
+ V e9 = T{1.00000000000000} - e8;
+ V e10 = e6 + e9;
+ V e11 = e10 + e3;
+ V e12 = T{3.00000000000000}*u[1];
+ V e13 = u[2]*u[2];
+ V e14 = T{1.50000000000000}*e13;
+ V e15 = -e14;
+ V e16 = e12 + e15;
+ V e17 = T{3.00000000000000}*u[0];
+ V e18 = -u[2];
+ V e19 = e18 + u[0];
+ V e20 = -T{4.50000000000000}*e19*e19;
+ V e21 = e14 + e5 + T{-1.00000000000000};
+ V e22 = e21 + e8;
+ V e23 = e22 - e3;
+ V e24 = T{0.0555555555555556}*rho;
+ V e25 = T{3.00000000000000}*e13;
+ V e26 = u[0] + u[2];
+ V e27 = T{4.50000000000000}*(e26*e26);
+ V e28 = e15 + e17;
+ V e29 = e18 + u[1];
+ V e30 = -T{4.50000000000000}*e29*e29;
+ V e31 = -e12;
+ V e32 = u[0] - u[1];
+ V e33 = -T{4.50000000000000}*e32*e32;
+ V e34 = e17 + e22;
+ V e35 = T{3.00000000000000}*e4;
+ V e36 = u[0] + u[1];
+ V e37 = T{4.50000000000000}*(e36*e36);
+ V e38 = T{3.00000000000000}*e7;
+ V e39 = e8 + T{-1.00000000000000};
+ V e40 = -e17 + e22;
+ V e41 = e22 + e3;
+ f_next[0] = e0*(e11 + e16 + e2);
+ f_next[1] = -e0*(e17 + e20 + e23);
+ f_next[2] = e24*(e11 + e25);
+ f_next[3] = e0*(e11 + e27 + e28);
+ f_next[4] = -e0*(e12 + e23 + e30);
+ f_next[5] = -e0*(e31 + e33 + e34);
+ f_next[6] = e24*(e16 + e35 + e9);
+ f_next[7] = e0*(e10 + e16 + e17 + e37);
+ f_next[8] = -e24*(e17 + e21 - e38);
+ f_next[9] = -T{0.333333333333333}*e22*rho;
+ f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000});
+ f_next[11] = -e0*(e12 + e34 - e37);
+ f_next[12] = -e24*(e12 + e14 - e35 + e39);
+ f_next[13] = -e0*(e12 + e33 + e40);
+ f_next[14] = -e0*(e30 + e31 + e41);
+ f_next[15] = -e0*(-e27 + e3 + e34);
+ f_next[16] = -e24*(-e25 + e3 + e39 + e5);
+ f_next[17] = -e0*(e20 + e3 + e40);
+ f_next[18] = -e0*(e12 - e2 + e41);
+}
+
+template <concepts::Arithmetic V, concepts::Arithmetic T>
+static void apply(V f_curr[19], V f_next[19], T u_w, WallNormal<-1,0,0>) {
+ V rho;
+ rho = (f_curr[0] + T{2.00000000000000}*f_curr[10] + f_curr[12] + T{2.00000000000000}*f_curr[13] + f_curr[14] + f_curr[16] + T{2.00000000000000}*f_curr[17] + f_curr[18] + f_curr[2] + T{2.00000000000000}*f_curr[3] + f_curr[4] + f_curr[6] + T{2.00000000000000}*f_curr[7] + f_curr[9])/(u_w + T{1.00000000000000});
+ V u[3] { u_w, 0., 0. };
+ V e0 = T{0.0277777777777778}*rho;
+ V e1 = u[1] + u[2];
+ V e2 = T{4.50000000000000}*(e1*e1);
+ V e3 = T{3.00000000000000}*u[2];
+ V e4 = u[1]*u[1];
+ V e5 = T{1.50000000000000}*e4;
+ V e6 = -e5;
+ V e7 = u[0]*u[0];
+ V e8 = T{1.50000000000000}*e7;
+ V e9 = T{1.00000000000000} - e8;
+ V e10 = e6 + e9;
+ V e11 = e10 + e3;
+ V e12 = T{3.00000000000000}*u[1];
+ V e13 = u[2]*u[2];
+ V e14 = T{1.50000000000000}*e13;
+ V e15 = -e14;
+ V e16 = e12 + e15;
+ V e17 = T{3.00000000000000}*u[0];
+ V e18 = -u[2];
+ V e19 = e18 + u[0];
+ V e20 = -T{4.50000000000000}*e19*e19;
+ V e21 = e14 + e5 + T{-1.00000000000000};
+ V e22 = e21 + e8;
+ V e23 = e22 - e3;
+ V e24 = T{0.0555555555555556}*rho;
+ V e25 = T{3.00000000000000}*e13;
+ V e26 = u[0] + u[2];
+ V e27 = T{4.50000000000000}*(e26*e26);
+ V e28 = e15 + e17;
+ V e29 = e18 + u[1];
+ V e30 = -T{4.50000000000000}*e29*e29;
+ V e31 = -e12;
+ V e32 = u[0] - u[1];
+ V e33 = -T{4.50000000000000}*e32*e32;
+ V e34 = e17 + e22;
+ V e35 = T{3.00000000000000}*e4;
+ V e36 = u[0] + u[1];
+ V e37 = T{4.50000000000000}*(e36*e36);
+ V e38 = T{3.00000000000000}*e7;
+ V e39 = e8 + T{-1.00000000000000};
+ V e40 = -e17 + e22;
+ V e41 = e22 + e3;
+ f_next[0] = e0*(e11 + e16 + e2);
+ f_next[1] = -e0*(e17 + e20 + e23);
+ f_next[2] = e24*(e11 + e25);
+ f_next[3] = e0*(e11 + e27 + e28);
+ f_next[4] = -e0*(e12 + e23 + e30);
+ f_next[5] = -e0*(e31 + e33 + e34);
+ f_next[6] = e24*(e16 + e35 + e9);
+ f_next[7] = e0*(e10 + e16 + e17 + e37);
+ f_next[8] = -e24*(e17 + e21 - e38);
+ f_next[9] = -T{0.333333333333333}*e22*rho;
+ f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000});
+ f_next[11] = -e0*(e12 + e34 - e37);
+ f_next[12] = -e24*(e12 + e14 - e35 + e39);
+ f_next[13] = -e0*(e12 + e33 + e40);
+ f_next[14] = -e0*(e30 + e31 + e41);
+ f_next[15] = -e0*(-e27 + e3 + e34);
+ f_next[16] = -e24*(-e25 + e3 + e39 + e5);
+ f_next[17] = -e0*(e20 + e3 + e40);
+ f_next[18] = -e0*(e12 - e2 + e41);
+}
+
+};
diff --git a/src/LLBM/initialize.h b/src/LLBM/initialize.h
new file mode 100644
index 0000000..b361217
--- /dev/null
+++ b/src/LLBM/initialize.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include "concepts.h"
+
+struct InitializeO {
+
+template <concepts::Arithmetic V, concepts::Arithmetic T = V>
+static void apply(V f_curr[19], V f_next[19]) {
+ f_next[0] = T{0.0277777777777778};
+ f_next[1] = T{0.0277777777777778};
+ f_next[2] = T{0.0555555555555556};
+ f_next[3] = T{0.0277777777777778};
+ f_next[4] = T{0.0277777777777778};
+ f_next[5] = T{0.0277777777777778};
+ f_next[6] = T{0.0555555555555556};
+ f_next[7] = T{0.0277777777777778};
+ f_next[8] = T{0.0555555555555556};
+ f_next[9] = T{0.333333333333333};
+ f_next[10] = T{0.0555