diff options
author | Adrian Kummerlaender | 2021-05-17 00:30:13 +0200 |
---|---|---|
committer | Adrian Kummerlaender | 2021-05-17 00:30:13 +0200 |
commit | a92271176a19e06611099c0eccc4e6a6887f4915 (patch) | |
tree | 54067b334bfae7d99c79cfb00da5891334f9514c | |
download | SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar.gz SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar.bz2 SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar.lz SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar.xz SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.tar.zst SweepLB-a92271176a19e06611099c0eccc4e6a6887f4915.zip |
Extract public version of SweepLB
-rw-r--r-- | .editorconfig | 2 | ||||
-rw-r--r-- | CMakeLists.txt | 72 | ||||
-rw-r--r-- | box.cpp | 73 | ||||
-rw-r--r-- | channel.cpp | 87 | ||||
-rw-r--r-- | ldc.cpp | 76 | ||||
-rw-r--r-- | shell.nix | 15 | ||||
-rw-r--r-- | src/LLBM/bounce_back.h | 30 | ||||
-rw-r--r-- | src/LLBM/bounce_back_moving_wall.h | 30 | ||||
-rw-r--r-- | src/LLBM/collect_moments.h | 25 | ||||
-rw-r--r-- | src/LLBM/collide.h | 89 | ||||
-rw-r--r-- | src/LLBM/equilibrium_density_wall.h | 144 | ||||
-rw-r--r-- | src/LLBM/equilibrium_velocity_wall.h | 144 | ||||
-rw-r--r-- | src/LLBM/initialize.h | 53 | ||||
-rw-r--r-- | src/LLBM/smagorinsky_collide.h | 129 | ||||
-rw-r--r-- | src/LLBM/wall.h | 4 | ||||
-rw-r--r-- | src/concepts.h | 22 | ||||
-rw-r--r-- | src/cuboid.h | 37 | ||||
-rw-r--r-- | src/export.h | 59 | ||||
-rw-r--r-- | src/lattice.h | 267 | ||||
-rw-r--r-- | src/operator.h | 41 | ||||
-rw-r--r-- | src/pattern/all.h | 5 | ||||
-rw-r--r-- | src/pattern/none.h | 48 | ||||
-rw-r--r-- | src/pattern/ps.h | 123 | ||||
-rw-r--r-- | src/pattern/sss.h | 63 | ||||
-rw-r--r-- | src/population.h | 41 | ||||
-rw-r--r-- | src/propagation.h | 28 | ||||
-rw-r--r-- | src/simd/256.h | 379 | ||||
-rw-r--r-- | src/simd/512.h | 339 |
28 files changed, 2425 insertions, 0 deletions
diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..0b225af --- /dev/null +++ b/.editorconfig @@ -0,0 +1,2 @@ +[*] +indent_style = tab diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..c5b383b --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,72 @@ +cmake_minimum_required(VERSION 3.10) +project(SweepLB LANGUAGES CXX) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +set(SIMD_MODE "AVX256" CACHE STRING "Generate either AVX256 or AVX512 instructions") +option(USE_SIMD_CELL_LIST "Generate gather / scatter instructions for cell list processing" OFF) + +if(NOT SIMD_MODE) + set(SIMD_MODE "AVX256") +endif() + +set(CXX_FLAGS "-O3 -march=native -mtune=native") + +if(${SIMD_MODE} MATCHES "AVX256") + message("Selecting AVX256") + set(SIMD_MODE_FLAGS "-mavx2") +elseif(${SIMD_MODE} MATCHES "AVX512") + message("Selecting AVX512") + add_compile_definitions(ENABLE_AVX512) + set(SIMD_MODE_FLAGS "-mavx512f -mavx512dq") +else() + message(FATAL_ERROR "Invalid SIMD mode, choose either AVX256 or AVX512") +endif() + +if(USE_SIMD_CELL_LIST) + message("Enabling SIMD cell list processing.") + add_compile_definitions(SIMD_CELL_LIST) +endif() + +set(CMAKE_CXX_FLAGS "${CXX_FLAGS} ${SIMD_MODE_FLAGS}") + +message("Using flags \"${CMAKE_CXX_FLAGS}\"") + +include_directories( + src/ +) + +find_package(OpenMP) + +set(EXAMPLES ldc box channel) +set(PRECISIONS float double) +set(PATTERNS sss ps) + +foreach(example IN LISTS EXAMPLES) + foreach(precision IN LISTS PRECISIONS) + foreach(pattern IN LISTS PATTERNS) + set(generated_name ${example}_${precision}_${pattern}) + string(TOUPPER ${pattern} pattern) + add_executable(${generated_name} ${example}.cpp) + target_compile_definitions(${generated_name} + PRIVATE + SWEEPLB_PRECISION=${precision} + SWEEPLB_PATTERN=${pattern}) + target_compile_features(${generated_name} + PRIVATE cxx_std_20) + target_link_libraries( + ${generated_name} + PUBLIC + rt) + if(OpenMP_CXX_FOUND) + target_link_libraries( + ${generated_name} + PUBLIC + OpenMP::OpenMP_CXX + ) + endif() + endforeach() + endforeach() +endforeach() @@ -0,0 +1,73 @@ +#include "lattice.h" + +#include "LLBM/collide.h" +#include "LLBM/initialize.h" +#include "LLBM/bounce_back.h" + +#include <chrono> +#include <iostream> + +#include "pattern/all.h" + +using T = SWEEPLB_PRECISION; +using PATTERN = pattern::SWEEPLB_PATTERN<T>; + +void simulate(Cuboid cuboid, std::size_t nStep) { + const int nThread = omp_get_max_threads(); + + Lattice<PATTERN> lattice(cuboid); + + std::vector<simd::Pack<T>::index_t> discontinuity; + + LatticeMask<T> bulk_mask(lattice.volume()); + LatticeMask<T> wall_mask(lattice.volume()); + + cuboid.traverse([&](int iX, int iY, int iZ, std::size_t iCell) { + if (std::abs(iX - cuboid[0]/2) < cuboid[0]/16 && + std::abs(iY - cuboid[1]/2) < cuboid[1]/16 && + std::abs(iZ - cuboid[2]/2) < cuboid[2]/16) { + discontinuity.emplace_back(iCell); + } + if ( iX > 0 && iX < cuboid[0]-1 + && iY > 0 && iY < cuboid[1]-1 + && iZ > 0 && iZ < cuboid[2]-1) { + bulk_mask.set(iCell, true); + } else { + wall_mask.set(iCell, true); + } + }); + + bulk_mask.serialize(); + wall_mask.serialize(); + + lattice.apply<InitializeO>(discontinuity, 2.0); + + T tau = 0.56; + + auto start = std::chrono::steady_clock::now(); + + for (std::size_t iStep = 0; iStep < nStep; ++iStep) { + lattice.apply(Operator(BgkCollideO(), bulk_mask, tau), + Operator(BounceBackO(), wall_mask)); + lattice.stream(); + } + + auto duration = std::chrono::duration_cast<std::chrono::duration<double>>( + std::chrono::steady_clock::now() - start); + + std::cout << cuboid[0] + << ", " << nStep + << ", " << nThread + << ", " << (nStep * lattice.volume()) / (1e6 * duration.count()) + << std::endl; + + lattice.write_momenta(bulk_mask, "result.vtk"); +} + +int main(int argc, char* argv[]) { + const std::size_t nX = atoi(argv[1]); + const std::size_t nY = atoi(argv[2]); + const std::size_t nZ = atoi(argv[3]); + const std::size_t steps = atoi(argv[4]); + simulate({ nX, nY, nZ }, steps); +} diff --git a/channel.cpp b/channel.cpp new file mode 100644 index 0000000..fddb9a1 --- /dev/null +++ b/channel.cpp @@ -0,0 +1,87 @@ +#include "lattice.h" + +#include "LLBM/collide.h" +#include "LLBM/initialize.h" +#include "LLBM/bounce_back.h" +#include "LLBM/bounce_back_moving_wall.h" +#include "LLBM/equilibrium_density_wall.h" +#include "LLBM/equilibrium_velocity_wall.h" + +#include <chrono> +#include <iostream> + +#include "pattern/all.h" + +using T = SWEEPLB_PRECISION; +using PATTERN = pattern::SWEEPLB_PATTERN<T>; + +void simulate(Cuboid cuboid, std::size_t nStep) { + const int nThread = omp_get_max_threads(); + + Lattice<PATTERN> lattice(cuboid); + + LatticeMask<T> bulk_mask(cuboid.volume()); + LatticeMask<T> wall_mask(cuboid.volume()); + LatticeMask<T> inflow_mask(cuboid.volume()); + LatticeMask<T> outflow_mask(cuboid.volume()); + + cuboid.traverse([&](int iX, int iY, int iZ, std::size_t iCell) { + if ( iY == 0 || iY == cuboid[1]-1 + || iZ == 0 || iZ == cuboid[2]-1) { + wall_mask.set(iCell, true); + } else if (iX == 0) { + inflow_mask.set(iCell, true); + } else if (iX == cuboid[0]-1) { + outflow_mask.set(iCell, true); + } else { + bulk_mask.set(iCell, true); + } + }); + + bulk_mask.serialize(); + wall_mask.serialize(); + inflow_mask.serialize(); + outflow_mask.serialize(); + + T tau = 0.56; + + T u_inflow = 0.05; + T d_outflow = 1.; + + for (std::size_t iStep = 0; iStep < 100; ++iStep) { + lattice.apply(Operator(BgkCollideO(), bulk_mask, tau), + Operator(BounceBackO(), wall_mask), + Operator(EquilibriumVelocityWallO(), inflow_mask, u_inflow, WallNormal<1,0,0>()), + Operator(EquilibriumDensityWallO(), outflow_mask, d_outflow, WallNormal<-1,0,0>())); + lattice.stream(); + } + + auto start = std::chrono::steady_clock::now(); + + for (std::size_t iStep = 0; iStep < nStep; ++iStep) { + lattice.apply(Operator(BgkCollideO(), bulk_mask, tau), + Operator(BounceBackO(), wall_mask), + Operator(EquilibriumVelocityWallO(), inflow_mask, u_inflow, WallNormal<1,0,0>()), + Operator(EquilibriumDensityWallO(), outflow_mask, d_outflow, WallNormal<-1,0,0>())); + lattice.stream(); + } + + auto duration = std::chrono::duration_cast<std::chrono::duration<double>>( + std::chrono::steady_clock::now() - start); + + std::cout << cuboid[0] << ", " << cuboid[1] << ", " << cuboid[2] + << ", " << nStep + << ", " << nThread + << ", " << (nStep * lattice.volume()) / (1e6 * duration.count()) + << std::endl; + + lattice.write_momenta(bulk_mask, "result.vtk"); +} + +int main(int argc, char* argv[]) { + const std::size_t nX = atoi(argv[1]); + const std::size_t nY = atoi(argv[2]); + const std::size_t nZ = atoi(argv[3]); + const std::size_t steps = atoi(argv[4]); + simulate({ nX, nY, nZ }, steps); +} @@ -0,0 +1,76 @@ +#include "lattice.h" + +#include "LLBM/collide.h" +#include "LLBM/initialize.h" +#include "LLBM/bounce_back.h" +#include "LLBM/bounce_back_moving_wall.h" + +#include <chrono> +#include <iostream> + +#include "pattern/all.h" + +using T = SWEEPLB_PRECISION; +using PATTERN = pattern::SWEEPLB_PATTERN<T>; + +void simulate(Cuboid cuboid, std::size_t nStep) { + const int nThread = omp_get_max_threads(); + + Lattice<PATTERN> lattice(cuboid); + + LatticeMask<T> bulk_mask(cuboid.volume()); + LatticeMask<T> box_mask(cuboid.volume()); + LatticeMask<T> lid_mask(cuboid.volume()); + + cuboid.traverse([&](int iX, int iY, int iZ, std::size_t iCell) { + if (iZ == cuboid[2]-1) { + lid_mask.set(iCell, true); + } else if (iX == 0 || iX == cuboid[0]-1 + || iY == 0 || iY == cuboid[1]-1 + || iZ == 0) { + box_mask.set(iCell, true); + } else { + bulk_mask.set(iCell, true); + } + }); + + bulk_mask.serialize(); + box_mask.serialize(); + lid_mask.serialize(); + + T tau = 0.51; + T u_lid[] { 0.05, 0., 0. }; + + for (std::size_t iStep = 0; iStep < 100; ++iStep) { + lattice.apply(Operator(BgkCollideO(), bulk_mask, tau), + Operator(BounceBackO(), box_mask), + Operator(BounceBackMovingWallO(), lid_mask, u_lid)); + lattice.stream(); + } + + auto start = std::chrono::steady_clock::now(); + + for (std::size_t iStep = 0; iStep < nStep; ++iStep) { + lattice.apply(Operator(BgkCollideO(), bulk_mask, tau), + Operator(BounceBackO(), box_mask), + Operator(BounceBackMovingWallO(), lid_mask, u_lid)); + lattice.stream(); + } + + auto duration = std::chrono::duration_cast<std::chrono::duration<double>>( + std::chrono::steady_clock::now() - start); + + std::cout << cuboid[0] + << ", " << nStep + << ", " << nThread + << ", " << (nStep * lattice.volume()) / (1e6 * duration.count()) + << std::endl; + + lattice.write_momenta(bulk_mask, "result.vtk"); +} + +int main(int argc, char* argv[]) { + const std::size_t n = atoi(argv[1]); + const std::size_t steps = atoi(argv[2]); + simulate({ n, n, n}, steps); +} diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..d156272 --- /dev/null +++ b/shell.nix @@ -0,0 +1,15 @@ +{ pkgs ? import <nixpkgs> { }, ... }: + +pkgs.stdenvNoCC.mkDerivation rec { + name = "sweeplb-env"; + env = pkgs.buildEnv { name = name; paths = buildInputs; }; + + buildInputs = with pkgs; [ + gcc10 + cmake + ]; + + shellHook = '' + export NIX_SHELL_NAME="${name}" + ''; +} diff --git a/src/LLBM/bounce_back.h b/src/LLBM/bounce_back.h new file mode 100644 index 0000000..b710ee4 --- /dev/null +++ b/src/LLBM/bounce_back.h @@ -0,0 +1,30 @@ +#pragma once + +#include "concepts.h" + +struct BounceBackO { + +template <concepts::Arithmetic V> +static void apply(V f_curr[19], V f_next[19]) { + f_next[0] = f_curr[18]; + f_next[1] = f_curr[17]; + f_next[2] = f_curr[16]; + f_next[3] = f_curr[15]; + f_next[4] = f_curr[14]; + f_next[5] = f_curr[13]; + f_next[6] = f_curr[12]; + f_next[7] = f_curr[11]; + f_next[8] = f_curr[10]; + f_next[9] = f_curr[9]; + f_next[10] = f_curr[8]; + f_next[11] = f_curr[7]; + f_next[12] = f_curr[6]; + f_next[13] = f_curr[5]; + f_next[14] = f_curr[4]; + f_next[15] = f_curr[3]; + f_next[16] = f_curr[2]; + f_next[17] = f_curr[1]; + f_next[18] = f_curr[0]; +} + +}; diff --git a/src/LLBM/bounce_back_moving_wall.h b/src/LLBM/bounce_back_moving_wall.h new file mode 100644 index 0000000..c00a17e --- /dev/null +++ b/src/LLBM/bounce_back_moving_wall.h @@ -0,0 +1,30 @@ +#pragma once + +#include "concepts.h" + +struct BounceBackMovingWallO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T u[3]) { + f_next[0] = f_curr[18] + T{0.166666666666667}*u[1] + T{0.166666666666667}*u[2]; + f_next[1] = f_curr[17] - T{0.166666666666667}*u[0] + T{0.166666666666667}*u[2]; + f_next[2] = f_curr[16] + T{0.333333333333333}*u[2]; + f_next[3] = f_curr[15] + T{0.166666666666667}*u[0] + T{0.166666666666667}*u[2]; + f_next[4] = f_curr[14] - T{0.166666666666667}*u[1] + T{0.166666666666667}*u[2]; + f_next[5] = f_curr[13] - T{0.166666666666667}*u[0] + T{0.166666666666667}*u[1]; + f_next[6] = f_curr[12] + T{0.333333333333333}*u[1]; + f_next[7] = f_curr[11] + T{0.166666666666667}*u[0] + T{0.166666666666667}*u[1]; + f_next[8] = f_curr[10] - T{0.333333333333333}*u[0]; + f_next[9] = f_curr[9]; + f_next[10] = f_curr[8] + T{0.333333333333333}*u[0]; + f_next[11] = f_curr[7] - T{0.166666666666667}*u[0] - T{0.166666666666667}*u[1]; + f_next[12] = f_curr[6] - T{0.333333333333333}*u[1]; + f_next[13] = f_curr[5] + T{0.166666666666667}*u[0] - T{0.166666666666667}*u[1]; + f_next[14] = f_curr[4] + T{0.166666666666667}*u[1] - T{0.166666666666667}*u[2]; + f_next[15] = f_curr[3] - T{0.166666666666667}*u[0] - T{0.166666666666667}*u[2]; + f_next[16] = f_curr[2] - T{0.333333333333333}*u[2]; + f_next[17] = f_curr[1] + T{0.166666666666667}*u[0] - T{0.166666666666667}*u[2]; + f_next[18] = f_curr[0] - T{0.166666666666667}*u[1] - T{0.166666666666667}*u[2]; +} + +}; diff --git a/src/LLBM/collect_moments.h b/src/LLBM/collect_moments.h new file mode 100644 index 0000000..afd5d59 --- /dev/null +++ b/src/LLBM/collect_moments.h @@ -0,0 +1,25 @@ +#pragma once + +#include "concepts.h" + +struct CollectMomentsF { + +static constexpr unsigned size = 4; + +template <concepts::Arithmetic V> +static void observe(V f_curr[19], V* momenta) { + V m0 = f_curr[10] + f_curr[13] + f_curr[17]; + V m1 = f_curr[14] + f_curr[5] + f_curr[6]; + V m2 = f_curr[1] + f_curr[2] + f_curr[4]; + V m3 = m0 + m1 + m2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9]; + V m4 = -f_curr[11] + f_curr[7]; + V m5 = -f_curr[15] + f_curr[3]; + V m6 = V{1}/m3; + V m7 = f_curr[0] - f_curr[18]; + momenta[0] = m3; + momenta[1] = m6*(m0 + m4 + m5 - f_curr[1] - f_curr[5] - f_curr[8]); + momenta[2] = m6*(m1 + m4 + m7 - f_curr[12] - f_curr[13] - f_curr[4]); + momenta[3] = m6*(m2 + m5 + m7 - f_curr[14] - f_curr[16] - f_curr[17]); +} + +}; diff --git a/src/LLBM/collide.h b/src/LLBM/collide.h new file mode 100644 index 0000000..c08aade --- /dev/null +++ b/src/LLBM/collide.h @@ -0,0 +1,89 @@ +#pragma once + +#include "concepts.h" + +struct BgkCollideO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T tau) { + V rho; + V u[3]; + V m0 = f_curr[10] + f_curr[13] + f_curr[17]; + V m1 = f_curr[14] + f_curr[5] + f_curr[6]; + V m2 = f_curr[1] + f_curr[2] + f_curr[4]; + V m3 = m0 + m1 + m2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9]; + V m4 = -f_curr[11] + f_curr[7]; + V m5 = -f_curr[15] + f_curr[3]; + V m6 = T{1}/m3; + V m7 = f_curr[0] - f_curr[18]; + rho = m3; + u[0] = m6*(m0 + m4 + m5 - f_curr[1] - f_curr[5] - f_curr[8]); + u[1] = m6*(m1 + m4 + m7 - f_curr[12] - f_curr[13] - f_curr[4]); + u[2] = m6*(m2 + m5 + m7 - f_curr[14] - f_curr[16] - f_curr[17]); + V x0 = T{1}/tau; + V x1 = T{0.0138888888888889}*x0; + V x2 = u[1] + u[2]; + V x3 = T{9.00000000000000}*(x2*x2); + V x4 = T{6.00000000000000}*u[2]; + V x5 = u[1]*u[1]; + V x6 = T{3.00000000000000}*x5; + V x7 = -x6; + V x8 = u[0]*u[0]; + V x9 = T{3.00000000000000}*x8; + V x10 = T{2.00000000000000} - x9; + V x11 = x10 + x7; + V x12 = x11 + x4; + V x13 = u[2]*u[2]; + V x14 = T{3.00000000000000}*x13; + V x15 = -x14; + V x16 = T{6.00000000000000}*u[1]; + V x17 = x15 + x16; + V x18 = T{6.00000000000000}*u[0]; + V x19 = -u[0]; + V x20 = x19 + u[2]; + V x21 = x14 + x6 + T{-2.00000000000000}; + V x22 = x21 + x9; + V x23 = x22 - x4; + V x24 = T{0.0277777777777778}*x0; + V x25 = T{6.00000000000000}*x13; + V x26 = u[0] + u[2]; + V x27 = T{9.00000000000000}*(x26*x26); + V x28 = x15 + x18; + V x29 = -u[1]; + V x30 = x29 + u[2]; + V x31 = x19 + u[1]; + V x32 = -x16; + V x33 = x18 + x22; + V x34 = T{6.00000000000000}*x5; + V x35 = u[0] + u[1]; + V x36 = T{9.00000000000000}*(x35*x35); + V x37 = T{6.00000000000000}*x8; + V x38 = x9 + T{-2.00000000000000}; + V x39 = x29 + u[0]; + V x40 = -x18 + x22; + V x41 = -u[2]; + V x42 = x41 + u[1]; + V x43 = x22 + x4; + V x44 = x41 + u[0]; + f_next[0] = x1*(rho*(x12 + x17 + x3) - T{72.0000000000000}*f_curr[0]) + f_curr[0]; + f_next[1] = -x1*(rho*(x18 + x23 - T{9.00000000000000}*x20*x20) + T{72.0000000000000}*f_curr[1]) + f_curr[1]; + f_next[2] = x24*(rho*(x12 + x25) - T{36.0000000000000}*f_curr[2]) + f_curr[2]; + f_next[3] = x1*(rho*(x12 + x27 + x28) - T{72.0000000000000}*f_curr[3]) + f_curr[3]; + f_next[4] = -x1*(rho*(x16 + x23 - T{9.00000000000000}*x30*x30) + T{72.0000000000000}*f_curr[4]) + f_curr[4]; + f_next[5] = -x1*(rho*(x32 + x33 - T{9.00000000000000}*x31*x31) + T{72.0000000000000}*f_curr[5]) + f_curr[5]; + f_next[6] = x24*(rho*(x10 + x17 + x34) - T{36.0000000000000}*f_curr[6]) + f_curr[6]; + f_next[7] = x1*(rho*(x11 + x17 + x18 + x36) - T{72.0000000000000}*f_curr[7]) + f_curr[7]; + f_next[8] = -x24*(rho*(x18 + x21 - x37) + T{36.0000000000000}*f_curr[8]) + f_curr[8]; + f_next[9] = -T{0.166666666666667}*x0*(rho*x22 + T{6.00000000000000}*f_curr[9]) + f_curr[9]; + f_next[10] = x24*(rho*(x28 + x37 + x7 + T{2.00000000000000}) - T{36.0000000000000}*f_curr[10]) + f_curr[10]; + f_next[11] = -x1*(rho*(x16 + x33 - x36) + T{72.0000000000000}*f_curr[11]) + f_curr[11]; + f_next[12] = -x24*(rho*(x14 + x16 - x34 + x38) + T{36.0000000000000}*f_curr[12]) + f_curr[12]; + f_next[13] = -x1*(rho*(x16 + x40 - T{9.00000000000000}*x39*x39) + T{72.0000000000000}*f_curr[13]) + f_curr[13]; + f_next[14] = -x1*(rho*(x32 + x43 - T{9.00000000000000}*x42*x42) + T{72.0000000000000}*f_curr[14]) + f_curr[14]; + f_next[15] = -x1*(rho*(-x27 + x33 + x4) + T{72.0000000000000}*f_curr[15]) + f_curr[15]; + f_next[16] = -x24*(rho*(-x25 + x38 + x4 + x6) + T{36.0000000000000}*f_curr[16]) + f_curr[16]; + f_next[17] = -x1*(rho*(x4 + x40 - T{9.00000000000000}*x44*x44) + T{72.0000000000000}*f_curr[17]) + f_curr[17]; + f_next[18] = -x1*(rho*(x16 - x3 + x43) + T{72.0000000000000}*f_curr[18]) + f_curr[18]; +} + +}; diff --git a/src/LLBM/equilibrium_density_wall.h b/src/LLBM/equilibrium_density_wall.h new file mode 100644 index 0000000..598d59d --- /dev/null +++ b/src/LLBM/equilibrium_density_wall.h @@ -0,0 +1,144 @@ +#pragma once + +#include "wall.h" +#include "concepts.h" + +struct EquilibriumDensityWallO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T rho_w, WallNormal<1,0,0>) { + V u_w = (rho_w - f_curr[0] - T{2.00000000000000}*f_curr[11] - f_curr[12] - f_curr[14] - T{2.00000000000000}*f_curr[15] - f_curr[16] - f_curr[18] - T{2.00000000000000}*f_curr[1] - f_curr[2] - f_curr[4] - T{2.00000000000000}*f_curr[5] - f_curr[6] - T{2.00000000000000}*f_curr[8] - f_curr[9])/rho_w; + V rho = rho_w; + V u[3] { u_w, 0., 0. }; + V e0 = T{0.0277777777777778}*rho; + V e1 = u[1] + u[2]; + V e2 = T{4.50000000000000}*(e1*e1); + V e3 = T{3.00000000000000}*u[2]; + V e4 = u[1]*u[1]; + V e5 = T{1.50000000000000}*e4; + V e6 = -e5; + V e7 = u[0]*u[0]; + V e8 = T{1.50000000000000}*e7; + V e9 = T{1.00000000000000} - e8; + V e10 = e6 + e9; + V e11 = e10 + e3; + V e12 = T{3.00000000000000}*u[1]; + V e13 = u[2]*u[2]; + V e14 = T{1.50000000000000}*e13; + V e15 = -e14; + V e16 = e12 + e15; + V e17 = T{3.00000000000000}*u[0]; + V e18 = -u[2]; + V e19 = e18 + u[0]; + V e20 = -T{4.50000000000000}*e19*e19; + V e21 = e14 + e5 + T{-1.00000000000000}; + V e22 = e21 + e8; + V e23 = e22 - e3; + V e24 = T{0.0555555555555556}*rho; + V e25 = T{3.00000000000000}*e13; + V e26 = u[0] + u[2]; + V e27 = T{4.50000000000000}*(e26*e26); + V e28 = e15 + e17; + V e29 = e18 + u[1]; + V e30 = -T{4.50000000000000}*e29*e29; + V e31 = -e12; + V e32 = u[0] - u[1]; + V e33 = -T{4.50000000000000}*e32*e32; + V e34 = e17 + e22; + V e35 = T{3.00000000000000}*e4; + V e36 = u[0] + u[1]; + V e37 = T{4.50000000000000}*(e36*e36); + V e38 = T{3.00000000000000}*e7; + V e39 = e8 + T{-1.00000000000000}; + V e40 = -e17 + e22; + V e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T rho_w, WallNormal<-1,0,0>) { + V u_w = (-rho_w + f_curr[0] + T{2.00000000000000}*f_curr[10] + f_curr[12] + T{2.00000000000000}*f_curr[13] + f_curr[14] + f_curr[16] + T{2.00000000000000}*f_curr[17] + f_curr[18] + f_curr[2] + T{2.00000000000000}*f_curr[3] + f_curr[4] + f_curr[6] + T{2.00000000000000}*f_curr[7] + f_curr[9])/rho_w; + V rho = rho_w; + V u[3] { u_w, 0., 0. }; + V e0 = T{0.0277777777777778}*rho; + V e1 = u[1] + u[2]; + V e2 = T{4.50000000000000}*(e1*e1); + V e3 = T{3.00000000000000}*u[2]; + V e4 = u[1]*u[1]; + V e5 = T{1.50000000000000}*e4; + V e6 = -e5; + V e7 = u[0]*u[0]; + V e8 = T{1.50000000000000}*e7; + V e9 = T{1.00000000000000} - e8; + V e10 = e6 + e9; + V e11 = e10 + e3; + V e12 = T{3.00000000000000}*u[1]; + V e13 = u[2]*u[2]; + V e14 = T{1.50000000000000}*e13; + V e15 = -e14; + V e16 = e12 + e15; + V e17 = T{3.00000000000000}*u[0]; + V e18 = -u[2]; + V e19 = e18 + u[0]; + V e20 = -T{4.50000000000000}*e19*e19; + V e21 = e14 + e5 + T{-1.00000000000000}; + V e22 = e21 + e8; + V e23 = e22 - e3; + V e24 = T{0.0555555555555556}*rho; + V e25 = T{3.00000000000000}*e13; + V e26 = u[0] + u[2]; + V e27 = T{4.50000000000000}*(e26*e26); + V e28 = e15 + e17; + V e29 = e18 + u[1]; + V e30 = -T{4.50000000000000}*e29*e29; + V e31 = -e12; + V e32 = u[0] - u[1]; + V e33 = -T{4.50000000000000}*e32*e32; + V e34 = e17 + e22; + V e35 = T{3.00000000000000}*e4; + V e36 = u[0] + u[1]; + V e37 = T{4.50000000000000}*(e36*e36); + V e38 = T{3.00000000000000}*e7; + V e39 = e8 + T{-1.00000000000000}; + V e40 = -e17 + e22; + V e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +}; diff --git a/src/LLBM/equilibrium_velocity_wall.h b/src/LLBM/equilibrium_velocity_wall.h new file mode 100644 index 0000000..2d68e34 --- /dev/null +++ b/src/LLBM/equilibrium_velocity_wall.h @@ -0,0 +1,144 @@ +#pragma once + +#include "wall.h" +#include "concepts.h" + +struct EquilibriumVelocityWallO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T u_w, WallNormal<1,0,0>) { + V rho; + rho = -(f_curr[0] + T{2.00000000000000}*f_curr[11] + f_curr[12] + f_curr[14] + T{2.00000000000000}*f_curr[15] + f_curr[16] + f_curr[18] + T{2.00000000000000}*f_curr[1] + f_curr[2] + f_curr[4] + T{2.00000000000000}*f_curr[5] + f_curr[6] + T{2.00000000000000}*f_curr[8] + f_curr[9])/(u_w + T{-1.00000000000000}); + V u[3] { u_w, 0., 0. }; + V e0 = T{0.0277777777777778}*rho; + V e1 = u[1] + u[2]; + V e2 = T{4.50000000000000}*(e1*e1); + V e3 = T{3.00000000000000}*u[2]; + V e4 = u[1]*u[1]; + V e5 = T{1.50000000000000}*e4; + V e6 = -e5; + V e7 = u[0]*u[0]; + V e8 = T{1.50000000000000}*e7; + V e9 = T{1.00000000000000} - e8; + V e10 = e6 + e9; + V e11 = e10 + e3; + V e12 = T{3.00000000000000}*u[1]; + V e13 = u[2]*u[2]; + V e14 = T{1.50000000000000}*e13; + V e15 = -e14; + V e16 = e12 + e15; + V e17 = T{3.00000000000000}*u[0]; + V e18 = -u[2]; + V e19 = e18 + u[0]; + V e20 = -T{4.50000000000000}*e19*e19; + V e21 = e14 + e5 + T{-1.00000000000000}; + V e22 = e21 + e8; + V e23 = e22 - e3; + V e24 = T{0.0555555555555556}*rho; + V e25 = T{3.00000000000000}*e13; + V e26 = u[0] + u[2]; + V e27 = T{4.50000000000000}*(e26*e26); + V e28 = e15 + e17; + V e29 = e18 + u[1]; + V e30 = -T{4.50000000000000}*e29*e29; + V e31 = -e12; + V e32 = u[0] - u[1]; + V e33 = -T{4.50000000000000}*e32*e32; + V e34 = e17 + e22; + V e35 = T{3.00000000000000}*e4; + V e36 = u[0] + u[1]; + V e37 = T{4.50000000000000}*(e36*e36); + V e38 = T{3.00000000000000}*e7; + V e39 = e8 + T{-1.00000000000000}; + V e40 = -e17 + e22; + V e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T u_w, WallNormal<-1,0,0>) { + V rho; + rho = (f_curr[0] + T{2.00000000000000}*f_curr[10] + f_curr[12] + T{2.00000000000000}*f_curr[13] + f_curr[14] + f_curr[16] + T{2.00000000000000}*f_curr[17] + f_curr[18] + f_curr[2] + T{2.00000000000000}*f_curr[3] + f_curr[4] + f_curr[6] + T{2.00000000000000}*f_curr[7] + f_curr[9])/(u_w + T{1.00000000000000}); + V u[3] { u_w, 0., 0. }; + V e0 = T{0.0277777777777778}*rho; + V e1 = u[1] + u[2]; + V e2 = T{4.50000000000000}*(e1*e1); + V e3 = T{3.00000000000000}*u[2]; + V e4 = u[1]*u[1]; + V e5 = T{1.50000000000000}*e4; + V e6 = -e5; + V e7 = u[0]*u[0]; + V e8 = T{1.50000000000000}*e7; + V e9 = T{1.00000000000000} - e8; + V e10 = e6 + e9; + V e11 = e10 + e3; + V e12 = T{3.00000000000000}*u[1]; + V e13 = u[2]*u[2]; + V e14 = T{1.50000000000000}*e13; + V e15 = -e14; + V e16 = e12 + e15; + V e17 = T{3.00000000000000}*u[0]; + V e18 = -u[2]; + V e19 = e18 + u[0]; + V e20 = -T{4.50000000000000}*e19*e19; + V e21 = e14 + e5 + T{-1.00000000000000}; + V e22 = e21 + e8; + V e23 = e22 - e3; + V e24 = T{0.0555555555555556}*rho; + V e25 = T{3.00000000000000}*e13; + V e26 = u[0] + u[2]; + V e27 = T{4.50000000000000}*(e26*e26); + V e28 = e15 + e17; + V e29 = e18 + u[1]; + V e30 = -T{4.50000000000000}*e29*e29; + V e31 = -e12; + V e32 = u[0] - u[1]; + V e33 = -T{4.50000000000000}*e32*e32; + V e34 = e17 + e22; + V e35 = T{3.00000000000000}*e4; + V e36 = u[0] + u[1]; + V e37 = T{4.50000000000000}*(e36*e36); + V e38 = T{3.00000000000000}*e7; + V e39 = e8 + T{-1.00000000000000}; + V e40 = -e17 + e22; + V e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +}; diff --git a/src/LLBM/initialize.h b/src/LLBM/initialize.h new file mode 100644 index 0000000..b361217 --- /dev/null +++ b/src/LLBM/initialize.h @@ -0,0 +1,53 @@ +#pragma once + +#include "concepts.h" + +struct InitializeO { + +template <concepts::Arithmetic V, concepts::Arithmetic T = V> +static void apply(V f_curr[19], V f_next[19]) { + f_next[0] = T{0.0277777777777778}; + f_next[1] = T{0.0277777777777778}; + f_next[2] = T{0.0555555555555556}; + f_next[3] = T{0.0277777777777778}; + f_next[4] = T{0.0277777777777778}; + f_next[5] = T{0.0277777777777778}; + f_next[6] = T{0.0555555555555556}; + f_next[7] = T{0.0277777777777778}; + f_next[8] = T{0.0555555555555556}; + f_next[9] = T{0.333333333333333}; + f_next[10] = T{0.0555 |