diff options
| -rw-r--r-- | .editorconfig | 2 | ||||
| -rw-r--r-- | CMakeLists.txt | 72 | ||||
| -rw-r--r-- | box.cpp | 73 | ||||
| -rw-r--r-- | channel.cpp | 87 | ||||
| -rw-r--r-- | ldc.cpp | 76 | ||||
| -rw-r--r-- | shell.nix | 15 | ||||
| -rw-r--r-- | src/LLBM/bounce_back.h | 30 | ||||
| -rw-r--r-- | src/LLBM/bounce_back_moving_wall.h | 30 | ||||
| -rw-r--r-- | src/LLBM/collect_moments.h | 25 | ||||
| -rw-r--r-- | src/LLBM/collide.h | 89 | ||||
| -rw-r--r-- | src/LLBM/equilibrium_density_wall.h | 144 | ||||
| -rw-r--r-- | src/LLBM/equilibrium_velocity_wall.h | 144 | ||||
| -rw-r--r-- | src/LLBM/initialize.h | 53 | ||||
| -rw-r--r-- | src/LLBM/smagorinsky_collide.h | 129 | ||||
| -rw-r--r-- | src/LLBM/wall.h | 4 | ||||
| -rw-r--r-- | src/concepts.h | 22 | ||||
| -rw-r--r-- | src/cuboid.h | 37 | ||||
| -rw-r--r-- | src/export.h | 59 | ||||
| -rw-r--r-- | src/lattice.h | 267 | ||||
| -rw-r--r-- | src/operator.h | 41 | ||||
| -rw-r--r-- | src/pattern/all.h | 5 | ||||
| -rw-r--r-- | src/pattern/none.h | 48 | ||||
| -rw-r--r-- | src/pattern/ps.h | 123 | ||||
| -rw-r--r-- | src/pattern/sss.h | 63 | ||||
| -rw-r--r-- | src/population.h | 41 | ||||
| -rw-r--r-- | src/propagation.h | 28 | ||||
| -rw-r--r-- | src/simd/256.h | 379 | ||||
| -rw-r--r-- | src/simd/512.h | 339 | 
28 files changed, 2425 insertions, 0 deletions
diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..0b225af --- /dev/null +++ b/.editorconfig @@ -0,0 +1,2 @@ +[*] +indent_style = tab diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..c5b383b --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,72 @@ +cmake_minimum_required(VERSION 3.10) +project(SweepLB LANGUAGES CXX) + +if(NOT CMAKE_BUILD_TYPE) +  set(CMAKE_BUILD_TYPE Release) +endif() + +set(SIMD_MODE "AVX256" CACHE STRING "Generate either AVX256 or AVX512 instructions") +option(USE_SIMD_CELL_LIST "Generate gather / scatter instructions for cell list processing" OFF) + +if(NOT SIMD_MODE) +	set(SIMD_MODE "AVX256") +endif() + +set(CXX_FLAGS "-O3 -march=native -mtune=native") + +if(${SIMD_MODE} MATCHES "AVX256") +	message("Selecting AVX256") +	set(SIMD_MODE_FLAGS "-mavx2") +elseif(${SIMD_MODE} MATCHES "AVX512") +	message("Selecting AVX512") +	add_compile_definitions(ENABLE_AVX512) +	set(SIMD_MODE_FLAGS "-mavx512f -mavx512dq") +else() +  message(FATAL_ERROR "Invalid SIMD mode, choose either AVX256 or AVX512") +endif() + +if(USE_SIMD_CELL_LIST) +  message("Enabling SIMD cell list processing.") +  add_compile_definitions(SIMD_CELL_LIST) +endif() + +set(CMAKE_CXX_FLAGS "${CXX_FLAGS} ${SIMD_MODE_FLAGS}") + +message("Using flags \"${CMAKE_CXX_FLAGS}\"") + +include_directories( +  src/ +) + +find_package(OpenMP) + +set(EXAMPLES ldc box channel) +set(PRECISIONS float double) +set(PATTERNS sss ps) + +foreach(example IN LISTS EXAMPLES) +	foreach(precision IN LISTS PRECISIONS) +		foreach(pattern IN LISTS PATTERNS) +			set(generated_name ${example}_${precision}_${pattern}) +			string(TOUPPER ${pattern} pattern) +			add_executable(${generated_name} ${example}.cpp) +			target_compile_definitions(${generated_name} +				PRIVATE +				SWEEPLB_PRECISION=${precision} +				SWEEPLB_PATTERN=${pattern}) +			target_compile_features(${generated_name} +				PRIVATE cxx_std_20) +			target_link_libraries( +				${generated_name} +				PUBLIC +				  rt) +			if(OpenMP_CXX_FOUND) +				target_link_libraries( +					${generated_name} +					PUBLIC +					  OpenMP::OpenMP_CXX +				) +			endif() +	  endforeach() +  endforeach() +endforeach() @@ -0,0 +1,73 @@ +#include "lattice.h" + +#include "LLBM/collide.h" +#include "LLBM/initialize.h" +#include "LLBM/bounce_back.h" + +#include <chrono> +#include <iostream> + +#include "pattern/all.h" + +using T = SWEEPLB_PRECISION; +using PATTERN = pattern::SWEEPLB_PATTERN<T>; + +void simulate(Cuboid cuboid, std::size_t nStep) { +	const int nThread = omp_get_max_threads(); + +	Lattice<PATTERN> lattice(cuboid); + +	std::vector<simd::Pack<T>::index_t> discontinuity; + +	LatticeMask<T> bulk_mask(lattice.volume()); +	LatticeMask<T> wall_mask(lattice.volume()); + +	cuboid.traverse([&](int iX, int iY, int iZ, std::size_t iCell) { +		if (std::abs(iX - cuboid[0]/2) < cuboid[0]/16 && +		    std::abs(iY - cuboid[1]/2) < cuboid[1]/16 && +		    std::abs(iZ - cuboid[2]/2) < cuboid[2]/16) { +			discontinuity.emplace_back(iCell); +		} +		if (   iX > 0 && iX < cuboid[0]-1 +		    && iY > 0 && iY < cuboid[1]-1 +		    && iZ > 0 && iZ < cuboid[2]-1) { +			bulk_mask.set(iCell, true); +		} else { +			wall_mask.set(iCell, true); +		} +	}); + +	bulk_mask.serialize(); +	wall_mask.serialize(); + +	lattice.apply<InitializeO>(discontinuity, 2.0); + +	T tau = 0.56; + +	auto start = std::chrono::steady_clock::now(); + +	for (std::size_t iStep = 0; iStep < nStep; ++iStep) { +		lattice.apply(Operator(BgkCollideO(), bulk_mask, tau), +		              Operator(BounceBackO(), wall_mask)); +		lattice.stream(); +	} + +	auto duration = std::chrono::duration_cast<std::chrono::duration<double>>( +		std::chrono::steady_clock::now() - start); + +	std::cout << cuboid[0] +	          << ", " << nStep +	          << ", " << nThread +	          << ", " << (nStep * lattice.volume()) / (1e6 * duration.count()) +	          << std::endl; + +	lattice.write_momenta(bulk_mask, "result.vtk"); +} + +int main(int argc, char* argv[]) { +	const std::size_t nX     = atoi(argv[1]); +	const std::size_t nY     = atoi(argv[2]); +	const std::size_t nZ     = atoi(argv[3]); +	const std::size_t steps = atoi(argv[4]); +	simulate({ nX, nY, nZ }, steps); +} diff --git a/channel.cpp b/channel.cpp new file mode 100644 index 0000000..fddb9a1 --- /dev/null +++ b/channel.cpp @@ -0,0 +1,87 @@ +#include "lattice.h" + +#include "LLBM/collide.h" +#include "LLBM/initialize.h" +#include "LLBM/bounce_back.h" +#include "LLBM/bounce_back_moving_wall.h" +#include "LLBM/equilibrium_density_wall.h" +#include "LLBM/equilibrium_velocity_wall.h" + +#include <chrono> +#include <iostream> + +#include "pattern/all.h" + +using T = SWEEPLB_PRECISION; +using PATTERN = pattern::SWEEPLB_PATTERN<T>; + +void simulate(Cuboid cuboid, std::size_t nStep) { +	const int nThread = omp_get_max_threads(); + +	Lattice<PATTERN> lattice(cuboid); + +	LatticeMask<T> bulk_mask(cuboid.volume()); +	LatticeMask<T> wall_mask(cuboid.volume()); +	LatticeMask<T> inflow_mask(cuboid.volume()); +	LatticeMask<T> outflow_mask(cuboid.volume()); + +	cuboid.traverse([&](int iX, int iY, int iZ, std::size_t iCell) { +		if (   iY == 0 || iY == cuboid[1]-1 +		    || iZ == 0 || iZ == cuboid[2]-1) { +			wall_mask.set(iCell, true); +		} else if (iX == 0) { +			inflow_mask.set(iCell, true); +		} else if (iX == cuboid[0]-1) { +			outflow_mask.set(iCell, true); +		} else { +			bulk_mask.set(iCell, true); +		} +	}); + +	bulk_mask.serialize(); +	wall_mask.serialize(); +	inflow_mask.serialize(); +	outflow_mask.serialize(); + +	T tau = 0.56; + +	T u_inflow  = 0.05; +	T d_outflow = 1.; + +	for (std::size_t iStep = 0; iStep < 100; ++iStep) { +		lattice.apply(Operator(BgkCollideO(), bulk_mask, tau), +		              Operator(BounceBackO(), wall_mask), +		              Operator(EquilibriumVelocityWallO(), inflow_mask, u_inflow, WallNormal<1,0,0>()), +		              Operator(EquilibriumDensityWallO(), outflow_mask, d_outflow, WallNormal<-1,0,0>())); +		lattice.stream(); +	} + +	auto start = std::chrono::steady_clock::now(); + +	for (std::size_t iStep = 0; iStep < nStep; ++iStep) { +		lattice.apply(Operator(BgkCollideO(), bulk_mask, tau), +		              Operator(BounceBackO(), wall_mask), +		              Operator(EquilibriumVelocityWallO(), inflow_mask, u_inflow, WallNormal<1,0,0>()), +		              Operator(EquilibriumDensityWallO(), outflow_mask, d_outflow, WallNormal<-1,0,0>())); +		lattice.stream(); +	} + +	auto duration = std::chrono::duration_cast<std::chrono::duration<double>>( +	  std::chrono::steady_clock::now() - start); + +	std::cout << cuboid[0] << ", " << cuboid[1] << ", " << cuboid[2] +	          << ", " << nStep +	          << ", " << nThread +	          << ", " << (nStep * lattice.volume()) / (1e6 * duration.count()) +	          << std::endl; + +	lattice.write_momenta(bulk_mask, "result.vtk"); +} + +int main(int argc, char* argv[]) { +	const std::size_t nX    = atoi(argv[1]); +	const std::size_t nY    = atoi(argv[2]); +	const std::size_t nZ    = atoi(argv[3]); +	const std::size_t steps = atoi(argv[4]); +	simulate({ nX, nY, nZ }, steps); +} @@ -0,0 +1,76 @@ +#include "lattice.h" + +#include "LLBM/collide.h" +#include "LLBM/initialize.h" +#include "LLBM/bounce_back.h" +#include "LLBM/bounce_back_moving_wall.h" + +#include <chrono> +#include <iostream> + +#include "pattern/all.h" + +using T = SWEEPLB_PRECISION; +using PATTERN = pattern::SWEEPLB_PATTERN<T>; + +void simulate(Cuboid cuboid, std::size_t nStep) { +	const int nThread = omp_get_max_threads(); + +	Lattice<PATTERN> lattice(cuboid); + +	LatticeMask<T> bulk_mask(cuboid.volume()); +	LatticeMask<T> box_mask(cuboid.volume()); +	LatticeMask<T> lid_mask(cuboid.volume()); + +	cuboid.traverse([&](int iX, int iY, int iZ, std::size_t iCell) { +		if (iZ == cuboid[2]-1) { +			lid_mask.set(iCell, true); +		} else if (iX == 0 || iX == cuboid[0]-1 +			      || iY == 0 || iY == cuboid[1]-1 +			      || iZ == 0) { +			box_mask.set(iCell, true); +		} else { +			bulk_mask.set(iCell, true); +		} +	}); + +	bulk_mask.serialize(); +	box_mask.serialize(); +	lid_mask.serialize(); + +	T tau = 0.51; +	T u_lid[] { 0.05, 0., 0. }; + +	for (std::size_t iStep = 0; iStep < 100; ++iStep) { +		lattice.apply(Operator(BgkCollideO(), bulk_mask, tau), +		              Operator(BounceBackO(), box_mask), +		              Operator(BounceBackMovingWallO(), lid_mask, u_lid)); +		lattice.stream(); +	} + +	auto start = std::chrono::steady_clock::now(); + +	for (std::size_t iStep = 0; iStep < nStep; ++iStep) { +		lattice.apply(Operator(BgkCollideO(), bulk_mask, tau), +		              Operator(BounceBackO(), box_mask), +		              Operator(BounceBackMovingWallO(), lid_mask, u_lid)); +		lattice.stream(); +	} + +	auto duration = std::chrono::duration_cast<std::chrono::duration<double>>( +		std::chrono::steady_clock::now() - start); + +	std::cout << cuboid[0] +	          << ", " << nStep +	          << ", " << nThread +	          << ", " << (nStep * lattice.volume()) / (1e6 * duration.count()) +	          << std::endl; + +	lattice.write_momenta(bulk_mask, "result.vtk"); +} + +int main(int argc, char* argv[]) { +	const std::size_t n     = atoi(argv[1]); +	const std::size_t steps = atoi(argv[2]); +	simulate({ n, n, n}, steps); +} diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..d156272 --- /dev/null +++ b/shell.nix @@ -0,0 +1,15 @@ +{ pkgs ? import <nixpkgs> { }, ... }: + +pkgs.stdenvNoCC.mkDerivation rec { +  name = "sweeplb-env"; +  env = pkgs.buildEnv { name = name; paths = buildInputs; }; + +  buildInputs = with pkgs; [ +    gcc10 +    cmake +  ]; + +  shellHook = '' +    export NIX_SHELL_NAME="${name}" +  ''; +} diff --git a/src/LLBM/bounce_back.h b/src/LLBM/bounce_back.h new file mode 100644 index 0000000..b710ee4 --- /dev/null +++ b/src/LLBM/bounce_back.h @@ -0,0 +1,30 @@ +#pragma once + +#include "concepts.h" + +struct BounceBackO { + +template <concepts::Arithmetic V> +static void apply(V f_curr[19], V f_next[19]) { +	f_next[0] = f_curr[18]; +	f_next[1] = f_curr[17]; +	f_next[2] = f_curr[16]; +	f_next[3] = f_curr[15]; +	f_next[4] = f_curr[14]; +	f_next[5] = f_curr[13]; +	f_next[6] = f_curr[12]; +	f_next[7] = f_curr[11]; +	f_next[8] = f_curr[10]; +	f_next[9] = f_curr[9]; +	f_next[10] = f_curr[8]; +	f_next[11] = f_curr[7]; +	f_next[12] = f_curr[6]; +	f_next[13] = f_curr[5]; +	f_next[14] = f_curr[4]; +	f_next[15] = f_curr[3]; +	f_next[16] = f_curr[2]; +	f_next[17] = f_curr[1]; +	f_next[18] = f_curr[0]; +} + +}; diff --git a/src/LLBM/bounce_back_moving_wall.h b/src/LLBM/bounce_back_moving_wall.h new file mode 100644 index 0000000..c00a17e --- /dev/null +++ b/src/LLBM/bounce_back_moving_wall.h @@ -0,0 +1,30 @@ +#pragma once + +#include "concepts.h" + +struct BounceBackMovingWallO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T u[3]) { +	f_next[0] = f_curr[18] + T{0.166666666666667}*u[1] + T{0.166666666666667}*u[2]; +	f_next[1] = f_curr[17] - T{0.166666666666667}*u[0] + T{0.166666666666667}*u[2]; +	f_next[2] = f_curr[16] + T{0.333333333333333}*u[2]; +	f_next[3] = f_curr[15] + T{0.166666666666667}*u[0] + T{0.166666666666667}*u[2]; +	f_next[4] = f_curr[14] - T{0.166666666666667}*u[1] + T{0.166666666666667}*u[2]; +	f_next[5] = f_curr[13] - T{0.166666666666667}*u[0] + T{0.166666666666667}*u[1]; +	f_next[6] = f_curr[12] + T{0.333333333333333}*u[1]; +	f_next[7] = f_curr[11] + T{0.166666666666667}*u[0] + T{0.166666666666667}*u[1]; +	f_next[8] = f_curr[10] - T{0.333333333333333}*u[0]; +	f_next[9] = f_curr[9]; +	f_next[10] = f_curr[8] + T{0.333333333333333}*u[0]; +	f_next[11] = f_curr[7] - T{0.166666666666667}*u[0] - T{0.166666666666667}*u[1]; +	f_next[12] = f_curr[6] - T{0.333333333333333}*u[1]; +	f_next[13] = f_curr[5] + T{0.166666666666667}*u[0] - T{0.166666666666667}*u[1]; +	f_next[14] = f_curr[4] + T{0.166666666666667}*u[1] - T{0.166666666666667}*u[2]; +	f_next[15] = f_curr[3] - T{0.166666666666667}*u[0] - T{0.166666666666667}*u[2]; +	f_next[16] = f_curr[2] - T{0.333333333333333}*u[2]; +	f_next[17] = f_curr[1] + T{0.166666666666667}*u[0] - T{0.166666666666667}*u[2]; +	f_next[18] = f_curr[0] - T{0.166666666666667}*u[1] - T{0.166666666666667}*u[2]; +} + +}; diff --git a/src/LLBM/collect_moments.h b/src/LLBM/collect_moments.h new file mode 100644 index 0000000..afd5d59 --- /dev/null +++ b/src/LLBM/collect_moments.h @@ -0,0 +1,25 @@ +#pragma once + +#include "concepts.h" + +struct CollectMomentsF { + +static constexpr unsigned size = 4; + +template <concepts::Arithmetic V> +static void observe(V f_curr[19], V* momenta) { +	V m0 = f_curr[10] + f_curr[13] + f_curr[17]; +	V m1 = f_curr[14] + f_curr[5] + f_curr[6]; +	V m2 = f_curr[1] + f_curr[2] + f_curr[4]; +	V m3 = m0 + m1 + m2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9]; +	V m4 = -f_curr[11] + f_curr[7]; +	V m5 = -f_curr[15] + f_curr[3]; +	V m6 = V{1}/m3; +	V m7 = f_curr[0] - f_curr[18]; +	momenta[0] = m3; +	momenta[1] = m6*(m0 + m4 + m5 - f_curr[1] - f_curr[5] - f_curr[8]); +	momenta[2] = m6*(m1 + m4 + m7 - f_curr[12] - f_curr[13] - f_curr[4]); +	momenta[3] = m6*(m2 + m5 + m7 - f_curr[14] - f_curr[16] - f_curr[17]); +} + +}; diff --git a/src/LLBM/collide.h b/src/LLBM/collide.h new file mode 100644 index 0000000..c08aade --- /dev/null +++ b/src/LLBM/collide.h @@ -0,0 +1,89 @@ +#pragma once + +#include "concepts.h" + +struct BgkCollideO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T tau) { +	V rho; +	V u[3]; +	V m0 = f_curr[10] + f_curr[13] + f_curr[17]; +	V m1 = f_curr[14] + f_curr[5] + f_curr[6]; +	V m2 = f_curr[1] + f_curr[2] + f_curr[4]; +	V m3 = m0 + m1 + m2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9]; +	V m4 = -f_curr[11] + f_curr[7]; +	V m5 = -f_curr[15] + f_curr[3]; +	V m6 = T{1}/m3; +	V m7 = f_curr[0] - f_curr[18]; +	rho = m3; +	u[0] = m6*(m0 + m4 + m5 - f_curr[1] - f_curr[5] - f_curr[8]); +	u[1] = m6*(m1 + m4 + m7 - f_curr[12] - f_curr[13] - f_curr[4]); +	u[2] = m6*(m2 + m5 + m7 - f_curr[14] - f_curr[16] - f_curr[17]); +	V x0 = T{1}/tau; +	V x1 = T{0.0138888888888889}*x0; +	V x2 = u[1] + u[2]; +	V x3 = T{9.00000000000000}*(x2*x2); +	V x4 = T{6.00000000000000}*u[2]; +	V x5 = u[1]*u[1]; +	V x6 = T{3.00000000000000}*x5; +	V x7 = -x6; +	V x8 = u[0]*u[0]; +	V x9 = T{3.00000000000000}*x8; +	V x10 = T{2.00000000000000} - x9; +	V x11 = x10 + x7; +	V x12 = x11 + x4; +	V x13 = u[2]*u[2]; +	V x14 = T{3.00000000000000}*x13; +	V x15 = -x14; +	V x16 = T{6.00000000000000}*u[1]; +	V x17 = x15 + x16; +	V x18 = T{6.00000000000000}*u[0]; +	V x19 = -u[0]; +	V x20 = x19 + u[2]; +	V x21 = x14 + x6 + T{-2.00000000000000}; +	V x22 = x21 + x9; +	V x23 = x22 - x4; +	V x24 = T{0.0277777777777778}*x0; +	V x25 = T{6.00000000000000}*x13; +	V x26 = u[0] + u[2]; +	V x27 = T{9.00000000000000}*(x26*x26); +	V x28 = x15 + x18; +	V x29 = -u[1]; +	V x30 = x29 + u[2]; +	V x31 = x19 + u[1]; +	V x32 = -x16; +	V x33 = x18 + x22; +	V x34 = T{6.00000000000000}*x5; +	V x35 = u[0] + u[1]; +	V x36 = T{9.00000000000000}*(x35*x35); +	V x37 = T{6.00000000000000}*x8; +	V x38 = x9 + T{-2.00000000000000}; +	V x39 = x29 + u[0]; +	V x40 = -x18 + x22; +	V x41 = -u[2]; +	V x42 = x41 + u[1]; +	V x43 = x22 + x4; +	V x44 = x41 + u[0]; +	f_next[0] = x1*(rho*(x12 + x17 + x3) - T{72.0000000000000}*f_curr[0]) + f_curr[0]; +	f_next[1] = -x1*(rho*(x18 + x23 - T{9.00000000000000}*x20*x20) + T{72.0000000000000}*f_curr[1]) + f_curr[1]; +	f_next[2] = x24*(rho*(x12 + x25) - T{36.0000000000000}*f_curr[2]) + f_curr[2]; +	f_next[3] = x1*(rho*(x12 + x27 + x28) - T{72.0000000000000}*f_curr[3]) + f_curr[3]; +	f_next[4] = -x1*(rho*(x16 + x23 - T{9.00000000000000}*x30*x30) + T{72.0000000000000}*f_curr[4]) + f_curr[4]; +	f_next[5] = -x1*(rho*(x32 + x33 - T{9.00000000000000}*x31*x31) + T{72.0000000000000}*f_curr[5]) + f_curr[5]; +	f_next[6] = x24*(rho*(x10 + x17 + x34) - T{36.0000000000000}*f_curr[6]) + f_curr[6]; +	f_next[7] = x1*(rho*(x11 + x17 + x18 + x36) - T{72.0000000000000}*f_curr[7]) + f_curr[7]; +	f_next[8] = -x24*(rho*(x18 + x21 - x37) + T{36.0000000000000}*f_curr[8]) + f_curr[8]; +	f_next[9] = -T{0.166666666666667}*x0*(rho*x22 + T{6.00000000000000}*f_curr[9]) + f_curr[9]; +	f_next[10] = x24*(rho*(x28 + x37 + x7 + T{2.00000000000000}) - T{36.0000000000000}*f_curr[10]) + f_curr[10]; +	f_next[11] = -x1*(rho*(x16 + x33 - x36) + T{72.0000000000000}*f_curr[11]) + f_curr[11]; +	f_next[12] = -x24*(rho*(x14 + x16 - x34 + x38) + T{36.0000000000000}*f_curr[12]) + f_curr[12]; +	f_next[13] = -x1*(rho*(x16 + x40 - T{9.00000000000000}*x39*x39) + T{72.0000000000000}*f_curr[13]) + f_curr[13]; +	f_next[14] = -x1*(rho*(x32 + x43 - T{9.00000000000000}*x42*x42) + T{72.0000000000000}*f_curr[14]) + f_curr[14]; +	f_next[15] = -x1*(rho*(-x27 + x33 + x4) + T{72.0000000000000}*f_curr[15]) + f_curr[15]; +	f_next[16] = -x24*(rho*(-x25 + x38 + x4 + x6) + T{36.0000000000000}*f_curr[16]) + f_curr[16]; +	f_next[17] = -x1*(rho*(x4 + x40 - T{9.00000000000000}*x44*x44) + T{72.0000000000000}*f_curr[17]) + f_curr[17]; +	f_next[18] = -x1*(rho*(x16 - x3 + x43) + T{72.0000000000000}*f_curr[18]) + f_curr[18]; +} + +}; diff --git a/src/LLBM/equilibrium_density_wall.h b/src/LLBM/equilibrium_density_wall.h new file mode 100644 index 0000000..598d59d --- /dev/null +++ b/src/LLBM/equilibrium_density_wall.h @@ -0,0 +1,144 @@ +#pragma once + +#include "wall.h" +#include "concepts.h" + +struct EquilibriumDensityWallO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T rho_w, WallNormal<1,0,0>) { +	V u_w = (rho_w - f_curr[0] - T{2.00000000000000}*f_curr[11] - f_curr[12] - f_curr[14] - T{2.00000000000000}*f_curr[15] - f_curr[16] - f_curr[18] - T{2.00000000000000}*f_curr[1] - f_curr[2] - f_curr[4] - T{2.00000000000000}*f_curr[5] - f_curr[6] - T{2.00000000000000}*f_curr[8] - f_curr[9])/rho_w; +	V rho = rho_w; +	V u[3] { u_w, 0., 0. }; +	V e0 = T{0.0277777777777778}*rho; +	V e1 = u[1] + u[2]; +	V e2 = T{4.50000000000000}*(e1*e1); +	V e3 = T{3.00000000000000}*u[2]; +	V e4 = u[1]*u[1]; +	V e5 = T{1.50000000000000}*e4; +	V e6 = -e5; +	V e7 = u[0]*u[0]; +	V e8 = T{1.50000000000000}*e7; +	V e9 = T{1.00000000000000} - e8; +	V e10 = e6 + e9; +	V e11 = e10 + e3; +	V e12 = T{3.00000000000000}*u[1]; +	V e13 = u[2]*u[2]; +	V e14 = T{1.50000000000000}*e13; +	V e15 = -e14; +	V e16 = e12 + e15; +	V e17 = T{3.00000000000000}*u[0]; +	V e18 = -u[2]; +	V e19 = e18 + u[0]; +	V e20 = -T{4.50000000000000}*e19*e19; +	V e21 = e14 + e5 + T{-1.00000000000000}; +	V e22 = e21 + e8; +	V e23 = e22 - e3; +	V e24 = T{0.0555555555555556}*rho; +	V e25 = T{3.00000000000000}*e13; +	V e26 = u[0] + u[2]; +	V e27 = T{4.50000000000000}*(e26*e26); +	V e28 = e15 + e17; +	V e29 = e18 + u[1]; +	V e30 = -T{4.50000000000000}*e29*e29; +	V e31 = -e12; +	V e32 = u[0] - u[1]; +	V e33 = -T{4.50000000000000}*e32*e32; +	V e34 = e17 + e22; +	V e35 = T{3.00000000000000}*e4; +	V e36 = u[0] + u[1]; +	V e37 = T{4.50000000000000}*(e36*e36); +	V e38 = T{3.00000000000000}*e7; +	V e39 = e8 + T{-1.00000000000000}; +	V e40 = -e17 + e22; +	V e41 = e22 + e3; +	f_next[0] = e0*(e11 + e16 + e2); +	f_next[1] = -e0*(e17 + e20 + e23); +	f_next[2] = e24*(e11 + e25); +	f_next[3] = e0*(e11 + e27 + e28); +	f_next[4] = -e0*(e12 + e23 + e30); +	f_next[5] = -e0*(e31 + e33 + e34); +	f_next[6] = e24*(e16 + e35 + e9); +	f_next[7] = e0*(e10 + e16 + e17 + e37); +	f_next[8] = -e24*(e17 + e21 - e38); +	f_next[9] = -T{0.333333333333333}*e22*rho; +	f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); +	f_next[11] = -e0*(e12 + e34 - e37); +	f_next[12] = -e24*(e12 + e14 - e35 + e39); +	f_next[13] = -e0*(e12 + e33 + e40); +	f_next[14] = -e0*(e30 + e31 + e41); +	f_next[15] = -e0*(-e27 + e3 + e34); +	f_next[16] = -e24*(-e25 + e3 + e39 + e5); +	f_next[17] = -e0*(e20 + e3 + e40); +	f_next[18] = -e0*(e12 - e2 + e41); +} + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T rho_w, WallNormal<-1,0,0>) { +	V u_w = (-rho_w + f_curr[0] + T{2.00000000000000}*f_curr[10] + f_curr[12] + T{2.00000000000000}*f_curr[13] + f_curr[14] + f_curr[16] + T{2.00000000000000}*f_curr[17] + f_curr[18] + f_curr[2] + T{2.00000000000000}*f_curr[3] + f_curr[4] + f_curr[6] + T{2.00000000000000}*f_curr[7] + f_curr[9])/rho_w; +	V rho = rho_w; +	V u[3] { u_w, 0., 0. }; +	V e0 = T{0.0277777777777778}*rho; +	V e1 = u[1] + u[2]; +	V e2 = T{4.50000000000000}*(e1*e1); +	V e3 = T{3.00000000000000}*u[2]; +	V e4 = u[1]*u[1]; +	V e5 = T{1.50000000000000}*e4; +	V e6 = -e5; +	V e7 = u[0]*u[0]; +	V e8 = T{1.50000000000000}*e7; +	V e9 = T{1.00000000000000} - e8; +	V e10 = e6 + e9; +	V e11 = e10 + e3; +	V e12 = T{3.00000000000000}*u[1]; +	V e13 = u[2]*u[2]; +	V e14 = T{1.50000000000000}*e13; +	V e15 = -e14; +	V e16 = e12 + e15; +	V e17 = T{3.00000000000000}*u[0]; +	V e18 = -u[2]; +	V e19 = e18 + u[0]; +	V e20 = -T{4.50000000000000}*e19*e19; +	V e21 = e14 + e5 + T{-1.00000000000000}; +	V e22 = e21 + e8; +	V e23 = e22 - e3; +	V e24 = T{0.0555555555555556}*rho; +	V e25 = T{3.00000000000000}*e13; +	V e26 = u[0] + u[2]; +	V e27 = T{4.50000000000000}*(e26*e26); +	V e28 = e15 + e17; +	V e29 = e18 + u[1]; +	V e30 = -T{4.50000000000000}*e29*e29; +	V e31 = -e12; +	V e32 = u[0] - u[1]; +	V e33 = -T{4.50000000000000}*e32*e32; +	V e34 = e17 + e22; +	V e35 = T{3.00000000000000}*e4; +	V e36 = u[0] + u[1]; +	V e37 = T{4.50000000000000}*(e36*e36); +	V e38 = T{3.00000000000000}*e7; +	V e39 = e8 + T{-1.00000000000000}; +	V e40 = -e17 + e22; +	V e41 = e22 + e3; +	f_next[0] = e0*(e11 + e16 + e2); +	f_next[1] = -e0*(e17 + e20 + e23); +	f_next[2] = e24*(e11 + e25); +	f_next[3] = e0*(e11 + e27 + e28); +	f_next[4] = -e0*(e12 + e23 + e30); +	f_next[5] = -e0*(e31 + e33 + e34); +	f_next[6] = e24*(e16 + e35 + e9); +	f_next[7] = e0*(e10 + e16 + e17 + e37); +	f_next[8] = -e24*(e17 + e21 - e38); +	f_next[9] = -T{0.333333333333333}*e22*rho; +	f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); +	f_next[11] = -e0*(e12 + e34 - e37); +	f_next[12] = -e24*(e12 + e14 - e35 + e39); +	f_next[13] = -e0*(e12 + e33 + e40); +	f_next[14] = -e0*(e30 + e31 + e41); +	f_next[15] = -e0*(-e27 + e3 + e34); +	f_next[16] = -e24*(-e25 + e3 + e39 + e5); +	f_next[17] = -e0*(e20 + e3 + e40); +	f_next[18] = -e0*(e12 - e2 + e41); +} + +}; diff --git a/src/LLBM/equilibrium_velocity_wall.h b/src/LLBM/equilibrium_velocity_wall.h new file mode 100644 index 0000000..2d68e34 --- /dev/null +++ b/src/LLBM/equilibrium_velocity_wall.h @@ -0,0 +1,144 @@ +#pragma once + +#include "wall.h" +#include "concepts.h" + +struct EquilibriumVelocityWallO { + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T u_w, WallNormal<1,0,0>) { +	V rho; +	rho = -(f_curr[0] + T{2.00000000000000}*f_curr[11] + f_curr[12] + f_curr[14] + T{2.00000000000000}*f_curr[15] + f_curr[16] + f_curr[18] + T{2.00000000000000}*f_curr[1] + f_curr[2] + f_curr[4] + T{2.00000000000000}*f_curr[5] + f_curr[6] + T{2.00000000000000}*f_curr[8] + f_curr[9])/(u_w + T{-1.00000000000000}); +	V u[3] { u_w, 0., 0. }; +	V e0 = T{0.0277777777777778}*rho; +	V e1 = u[1] + u[2]; +	V e2 = T{4.50000000000000}*(e1*e1); +	V e3 = T{3.00000000000000}*u[2]; +	V e4 = u[1]*u[1]; +	V e5 = T{1.50000000000000}*e4; +	V e6 = -e5; +	V e7 = u[0]*u[0]; +	V e8 = T{1.50000000000000}*e7; +	V e9 = T{1.00000000000000} - e8; +	V e10 = e6 + e9; +	V e11 = e10 + e3; +	V e12 = T{3.00000000000000}*u[1]; +	V e13 = u[2]*u[2]; +	V e14 = T{1.50000000000000}*e13; +	V e15 = -e14; +	V e16 = e12 + e15; +	V e17 = T{3.00000000000000}*u[0]; +	V e18 = -u[2]; +	V e19 = e18 + u[0]; +	V e20 = -T{4.50000000000000}*e19*e19; +	V e21 = e14 + e5 + T{-1.00000000000000}; +	V e22 = e21 + e8; +	V e23 = e22 - e3; +	V e24 = T{0.0555555555555556}*rho; +	V e25 = T{3.00000000000000}*e13; +	V e26 = u[0] + u[2]; +	V e27 = T{4.50000000000000}*(e26*e26); +	V e28 = e15 + e17; +	V e29 = e18 + u[1]; +	V e30 = -T{4.50000000000000}*e29*e29; +	V e31 = -e12; +	V e32 = u[0] - u[1]; +	V e33 = -T{4.50000000000000}*e32*e32; +	V e34 = e17 + e22; +	V e35 = T{3.00000000000000}*e4; +	V e36 = u[0] + u[1]; +	V e37 = T{4.50000000000000}*(e36*e36); +	V e38 = T{3.00000000000000}*e7; +	V e39 = e8 + T{-1.00000000000000}; +	V e40 = -e17 + e22; +	V e41 = e22 + e3; +	f_next[0] = e0*(e11 + e16 + e2); +	f_next[1] = -e0*(e17 + e20 + e23); +	f_next[2] = e24*(e11 + e25); +	f_next[3] = e0*(e11 + e27 + e28); +	f_next[4] = -e0*(e12 + e23 + e30); +	f_next[5] = -e0*(e31 + e33 + e34); +	f_next[6] = e24*(e16 + e35 + e9); +	f_next[7] = e0*(e10 + e16 + e17 + e37); +	f_next[8] = -e24*(e17 + e21 - e38); +	f_next[9] = -T{0.333333333333333}*e22*rho; +	f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); +	f_next[11] = -e0*(e12 + e34 - e37); +	f_next[12] = -e24*(e12 + e14 - e35 + e39); +	f_next[13] = -e0*(e12 + e33 + e40); +	f_next[14] = -e0*(e30 + e31 + e41); +	f_next[15] = -e0*(-e27 + e3 + e34); +	f_next[16] = -e24*(-e25 + e3 + e39 + e5); +	f_next[17] = -e0*(e20 + e3 + e40); +	f_next[18] = -e0*(e12 - e2 + e41); +} + +template <concepts::Arithmetic V, concepts::Arithmetic T> +static void apply(V f_curr[19], V f_next[19], T u_w, WallNormal<-1,0,0>) { +	V rho; +	rho = (f_curr[0] + T{2.00000000000000}*f_curr[10] + f_curr[12] + T{2.00000000000000}*f_curr[13] + f_curr[14] + f_curr[16] + T{2.00000000000000}*f_curr[17] + f_curr[18] + f_curr[2] + T{2.00000000000000}*f_curr[3] + f_curr[4] + f_curr[6] + T{2.00000000000000}*f_curr[7] + f_curr[9])/(u_w + T{1.00000000000000}); +	V u[3] { u_w, 0., 0. }; +	V e0 = T{0.0277777777777778}*rho; +	V e1 = u[1] + u[2]; +	V e2 = T{4.50000000000000}*(e1*e1); +	V e3 = T{3.00000000000000}*u[2]; +	V e4 = u[1]*u[1]; +	V e5 = T{1.50000000000000}*e4; +	V e6 = -e5; +	V e7 = u[0]*u[0]; +	V e8 = T{1.50000000000000}*e7; +	V e9 = T{1.00000000000000} - e8; +	V e10 = e6 + e9; +	V e11 = e10 + e3; +	V e12 = T{3.00000000000000}*u[1]; +	V e13 = u[2]*u[2]; +	V e14 = T{1.50000000000000}*e13; +	V e15 = -e14; +	V e16 = e12 + e15; +	V e17 = T{3.00000000000000}*u[0]; +	V e18 = -u[2]; +	V e19 = e18 + u[0]; +	V e20 = -T{4.50000000000000}*e19*e19; +	V e21 = e14 + e5 + T{-1.00000000000000}; +	V e22 = e21 + e8; +	V e23 = e22 - e3; +	V e24 = T{0.0555555555555556}*rho; +	V e25 = T{3.00000000000000}*e13; +	V e26 = u[0] + u[2]; +	V e27 = T{4.50000000000000}*(e26*e26); +	V e28 = e15 + e17; +	V e29 = e18 + u[1]; +	V e30 = -T{4.50000000000000}*e29*e29; +	V e31 = -e12; +	V e32 = u[0] - u[1]; +	V e33 = -T{4.50000000000000}*e32*e32; +	V e34 = e17 + e22; +	V e35 = T{3.00000000000000}*e4; +	V e36 = u[0] + u[1]; +	V e37 = T{4.50000000000000}*(e36*e36); +	V e38 = T{3.00000000000000}*e7; +	V e39 = e8 + T{-1.00000000000000}; +	V e40 = -e17 + e22; +	V e41 = e22 + e3; +	f_next[0] = e0*(e11 + e16 + e2); +	f_next[1] = -e0*(e17 + e20 + e23); +	f_next[2] = e24*(e11 + e25); +	f_next[3] = e0*(e11 + e27 + e28); +	f_next[4] = -e0*(e12 + e23 + e30); +	f_next[5] = -e0*(e31 + e33 + e34); +	f_next[6] = e24*(e16 + e35 + e9); +	f_next[7] = e0*(e10 + e16 + e17 + e37); +	f_next[8] = -e24*(e17 + e21 - e38); +	f_next[9] = -T{0.333333333333333}*e22*rho; +	f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); +	f_next[11] = -e0*(e12 + e34 - e37); +	f_next[12] = -e24*(e12 + e14 - e35 + e39); +	f_next[13] = -e0*(e12 + e33 + e40); +	f_next[14] = -e0*(e30 + e31 + e41); +	f_next[15] = -e0*(-e27 + e3 + e34); +	f_next[16] = -e24*(-e25 + e3 + e39 + e5); +	f_next[17] = -e0*(e20 + e3 + e40); +	f_next[18] = -e0*(e12 - e2 + e41); +} + +}; diff --git a/src/LLBM/initialize.h b/src/LLBM/initialize.h new file mode 100644 index 0000000..b361217 --- /dev/null +++ b/src/LLBM/initialize.h @@ -0,0 +1,53 @@ +#pragma once + +#include "concepts.h" + +struct InitializeO { + +template <concepts::Arithmetic V, concepts::Arithmetic T = V> +static void apply(V f_curr[19], V f_next[19]) { +	f_next[0] = T{0.0277777777777778}; +	f_next[1] = T{0.0277777777777778}; +	f_next[2] = T{0.0555555555555556}; +	f_next[3] = T{0.0277777777777778}; +	f_next[4] = T{0.0277777777777778}; +	f_next[5] = T{0.0277777777777778}; +	f_next[6] = T{0.0555555555555556}; +	f_next[7] = T{0.0277777777777778}; +	f_next[8] = T{0.0555555555555556}; +	f_next[9] = T{0.333333333333333}; +	f_next[10] = T{0.0555555555555556}; +	f_next[11] = T{0.0277777777777778}; +	f_next[12] = T{0.0555555555555556}; +	f_next[13] = T{0.0277777777777778}; +	f_next[14] = T{0.0277777777777778}; +	f_next[15] = T{0.0277777777777778}; +	f_next[16] = T{0.0555555555555556}; +	f_next[17] = T{0.0277777777777778}; +	f_next[18] = T{0.0277777777777778}; +} + +template <concepts::Arithmetic V, concepts::Arithmetic T = V> +static void apply(V f_curr[19], V f_next[19], T scale) { +	f_next[0]  = T{scale * 0.0277777777777778}; +	f_next[1]  = T{scale * 0.0277777777777778}; +	f_next[2]  = T{scale * 0.0555555555555556}; +	f_next[3]  = T{scale * 0.0277777777777778}; +	f_next[4]  = T{scale * 0.0277777777777778}; +	f_next[5]  = T{scale * 0.0277777777777778}; +	f_next[6]  = T{scale * 0.0555555555555556}; +	f_next[7]  = T{scale * 0.0277777777777778}; +	f_next[8]  = T{scale * 0.0555555555555556}; +	f_next[9]  = T{scale * 0.333333333333333}; +	f_next[10] = T{scale * 0.0555555555555556}; +	f_next[11] = T{scale * 0.0277777777777778}; +	f_next[12] = T{scale * 0.0555555555555556}; +	f_next[13] = T{scale * 0.0277777777777778}; +	f_next[14] = T{scale * 0.0277777777777778}; +	f_next[15] = T{scale * 0.0277777777777778}; +	f_next[16] = T{scale * 0.0555555555555556};  | 
