From 4ec94c97879aafef15f7663135745e4ba61e62cf Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Mon, 17 May 2021 00:15:33 +0200 Subject: Extract first public LiterateLB version --- tangle/LLBM/base.h | 6 + tangle/LLBM/boundary.h | 13 ++ tangle/LLBM/bulk.h | 3 + tangle/LLBM/call_tag.h | 12 + tangle/LLBM/descriptor.h | 298 ++++++++++++++++++++++++ tangle/LLBM/kernel/bounce_back.h | 44 ++++ tangle/LLBM/kernel/bounce_back_moving_wall.h | 44 ++++ tangle/LLBM/kernel/bouzidi.h | 39 ++++ tangle/LLBM/kernel/collect_curl.h | 44 ++++ tangle/LLBM/kernel/collect_moments.h | 45 ++++ tangle/LLBM/kernel/collect_q_criterion.h | 104 +++++++++ tangle/LLBM/kernel/collect_shear_layer_normal.h | 139 +++++++++++ tangle/LLBM/kernel/collect_streamlines.h | 48 ++++ tangle/LLBM/kernel/collect_velocity_norm.h | 61 +++++ tangle/LLBM/kernel/collide.h | 131 +++++++++++ tangle/LLBM/kernel/equilibrium_density_wall.h | 226 ++++++++++++++++++ tangle/LLBM/kernel/equilibrium_velocity_wall.h | 222 ++++++++++++++++++ tangle/LLBM/kernel/executor.h | 171 ++++++++++++++ tangle/LLBM/kernel/free_slip.h | 82 +++++++ tangle/LLBM/kernel/initialize.h | 44 ++++ tangle/LLBM/kernel/propagate.h | 20 ++ tangle/LLBM/kernel/smagorinsky_collide.h | 183 +++++++++++++++ tangle/LLBM/lattice.h | 131 +++++++++++ tangle/LLBM/materials.h | 108 +++++++++ tangle/LLBM/memory.h | 134 +++++++++++ tangle/LLBM/operator.h | 27 +++ tangle/LLBM/propagate.h | 111 +++++++++ tangle/LLBM/sdf.h | 75 ++++++ tangle/LLBM/sdf_boundary.h | 114 +++++++++ tangle/LLBM/volumetric.h | 134 +++++++++++ tangle/LLBM/wall.h | 4 + tangle/asset/noise/blue_0.png | Bin 0 -> 3763 bytes tangle/asset/noise/blue_1.png | Bin 0 -> 3775 bytes tangle/asset/noise/blue_2.png | Bin 0 -> 3752 bytes tangle/asset/noise/blue_3.png | Bin 0 -> 3789 bytes tangle/asset/noise/blue_4.png | Bin 0 -> 3754 bytes tangle/asset/palette/4wave_ROTB.png | Bin 0 -> 1572 bytes tangle/asset/palette/4wave_equal.png | Bin 0 -> 1485 bytes tangle/asset/palette/5wave_cool.png | Bin 0 -> 1601 bytes tangle/asset/palette/autumn.png | Bin 0 -> 827 bytes tangle/asset/palette/blue.png | Bin 0 -> 1223 bytes tangle/asset/palette/blue_orange.png | Bin 0 -> 1457 bytes tangle/asset/palette/green_brown.png | Bin 0 -> 1269 bytes tangle/asset/palette/orange.png | Bin 0 -> 1165 bytes tangle/asset/shader/blur.frag | 20 ++ tangle/benchmark-ldc.cu | 95 ++++++++ tangle/channel-with-sphere.cu | 85 +++++++ tangle/ldc-2d.cu | 82 +++++++ tangle/ldc-3d.cu | 58 +++++ tangle/magnus.cu | 116 +++++++++ tangle/nozzle.cu | 73 ++++++ tangle/sampler/curl_norm.h | 77 ++++++ tangle/sampler/q_criterion.h | 90 +++++++ tangle/sampler/sampler.h | 32 +++ tangle/sampler/shear_layer.h | 67 ++++++ tangle/sampler/velocity_norm.h | 79 +++++++ tangle/taylor-couette.cu | 75 ++++++ tangle/tmp/noise_overview.png | Bin 0 -> 14996 bytes tangle/tmp/test_noise.png | Bin 0 -> 26571 bytes tangle/util/camera.h | 86 +++++++ tangle/util/colormap.h | 38 +++ tangle/util/noise.h | 39 ++++ tangle/util/render_window.h | 95 ++++++++ tangle/util/texture.h | 25 ++ tangle/util/timer.h | 19 ++ tangle/util/volumetric_example.h | 121 ++++++++++ 66 files changed, 4189 insertions(+) create mode 100644 tangle/LLBM/base.h create mode 100644 tangle/LLBM/boundary.h create mode 100644 tangle/LLBM/bulk.h create mode 100644 tangle/LLBM/call_tag.h create mode 100644 tangle/LLBM/descriptor.h create mode 100644 tangle/LLBM/kernel/bounce_back.h create mode 100644 tangle/LLBM/kernel/bounce_back_moving_wall.h create mode 100644 tangle/LLBM/kernel/bouzidi.h create mode 100644 tangle/LLBM/kernel/collect_curl.h create mode 100644 tangle/LLBM/kernel/collect_moments.h create mode 100644 tangle/LLBM/kernel/collect_q_criterion.h create mode 100644 tangle/LLBM/kernel/collect_shear_layer_normal.h create mode 100644 tangle/LLBM/kernel/collect_streamlines.h create mode 100644 tangle/LLBM/kernel/collect_velocity_norm.h create mode 100644 tangle/LLBM/kernel/collide.h create mode 100644 tangle/LLBM/kernel/equilibrium_density_wall.h create mode 100644 tangle/LLBM/kernel/equilibrium_velocity_wall.h create mode 100644 tangle/LLBM/kernel/executor.h create mode 100644 tangle/LLBM/kernel/free_slip.h create mode 100644 tangle/LLBM/kernel/initialize.h create mode 100644 tangle/LLBM/kernel/propagate.h create mode 100644 tangle/LLBM/kernel/smagorinsky_collide.h create mode 100644 tangle/LLBM/lattice.h create mode 100644 tangle/LLBM/materials.h create mode 100644 tangle/LLBM/memory.h create mode 100644 tangle/LLBM/operator.h create mode 100644 tangle/LLBM/propagate.h create mode 100644 tangle/LLBM/sdf.h create mode 100644 tangle/LLBM/sdf_boundary.h create mode 100644 tangle/LLBM/volumetric.h create mode 100644 tangle/LLBM/wall.h create mode 100644 tangle/asset/noise/blue_0.png create mode 100644 tangle/asset/noise/blue_1.png create mode 100644 tangle/asset/noise/blue_2.png create mode 100644 tangle/asset/noise/blue_3.png create mode 100644 tangle/asset/noise/blue_4.png create mode 100644 tangle/asset/palette/4wave_ROTB.png create mode 100644 tangle/asset/palette/4wave_equal.png create mode 100644 tangle/asset/palette/5wave_cool.png create mode 100644 tangle/asset/palette/autumn.png create mode 100644 tangle/asset/palette/blue.png create mode 100644 tangle/asset/palette/blue_orange.png create mode 100644 tangle/asset/palette/green_brown.png create mode 100644 tangle/asset/palette/orange.png create mode 100644 tangle/asset/shader/blur.frag create mode 100644 tangle/benchmark-ldc.cu create mode 100644 tangle/channel-with-sphere.cu create mode 100644 tangle/ldc-2d.cu create mode 100644 tangle/ldc-3d.cu create mode 100644 tangle/magnus.cu create mode 100644 tangle/nozzle.cu create mode 100644 tangle/sampler/curl_norm.h create mode 100644 tangle/sampler/q_criterion.h create mode 100644 tangle/sampler/sampler.h create mode 100644 tangle/sampler/shear_layer.h create mode 100644 tangle/sampler/velocity_norm.h create mode 100644 tangle/taylor-couette.cu create mode 100644 tangle/tmp/noise_overview.png create mode 100644 tangle/tmp/test_noise.png create mode 100644 tangle/util/camera.h create mode 100644 tangle/util/colormap.h create mode 100644 tangle/util/noise.h create mode 100644 tangle/util/render_window.h create mode 100644 tangle/util/texture.h create mode 100644 tangle/util/timer.h create mode 100644 tangle/util/volumetric_example.h (limited to 'tangle') diff --git a/tangle/LLBM/base.h b/tangle/LLBM/base.h new file mode 100644 index 0000000..e356e02 --- /dev/null +++ b/tangle/LLBM/base.h @@ -0,0 +1,6 @@ +#pragma once + +#include "descriptor.h" +#include "memory.h" +#include "lattice.h" +#include "materials.h" diff --git a/tangle/LLBM/boundary.h b/tangle/LLBM/boundary.h new file mode 100644 index 0000000..08793a0 --- /dev/null +++ b/tangle/LLBM/boundary.h @@ -0,0 +1,13 @@ +#include "kernel/bounce_back.h" + +#include "kernel/bounce_back_moving_wall.h" + +#include "kernel/free_slip.h" + +#include "kernel/bouzidi.h" + +#include "kernel/equilibrium_velocity_wall.h" + +#include "kernel/equilibrium_density_wall.h" + +#include "sdf_boundary.h" diff --git a/tangle/LLBM/bulk.h b/tangle/LLBM/bulk.h new file mode 100644 index 0000000..62d704a --- /dev/null +++ b/tangle/LLBM/bulk.h @@ -0,0 +1,3 @@ +#include "kernel/collide.h" + +#include "kernel/smagorinsky_collide.h" diff --git a/tangle/LLBM/call_tag.h b/tangle/LLBM/call_tag.h new file mode 100644 index 0000000..fe559ef --- /dev/null +++ b/tangle/LLBM/call_tag.h @@ -0,0 +1,12 @@ +#pragma once + +namespace tag { + +struct call_by_cell_id { }; +struct call_by_list_index { }; +struct call_by_spatial_cell_mask { }; + +struct post_process_by_list_index { }; +struct post_process_by_spatial_cell_mask { }; + +} diff --git a/tangle/LLBM/descriptor.h b/tangle/LLBM/descriptor.h new file mode 100644 index 0000000..d4ca44d --- /dev/null +++ b/tangle/LLBM/descriptor.h @@ -0,0 +1,298 @@ +#pragma once + +#include +#include +#include +#include + +#ifdef __CUDA_ARCH__ + #define DATA device_data +#else + #define DATA host_data +#endif + +using pop_index_t = std::uint8_t; + +namespace descriptor { + +struct D2Q9 { + static constexpr unsigned d = 2; + static constexpr unsigned q = 9; +}; + +struct D3Q19 { + static constexpr unsigned d = 3; + static constexpr unsigned q = 19; +}; + +namespace device_data { + template + __constant__ pop_index_t opposite[DESCRIPTOR::q] { }; + + template + __constant__ int c[DESCRIPTOR::q][DESCRIPTOR::d] { }; + + template + __constant__ float c_length[DESCRIPTOR::q] { }; + + template + __constant__ float weight[DESCRIPTOR::q] { }; + + template <> + __constant__ pop_index_t opposite[9] = { + 8, 7, 6, 5, 4, 3, 2, 1, 0 + }; + + template <> + __constant__ int c[9][2] = { + {-1,-1}, {-1,0}, {-1,1}, {0,-1}, {0,0}, {0,1}, {1,-1}, {1,0}, {1,1} + }; + + template <> + __constant__ float c_length[9] = { + 1.41421356237310, 1.00000000000000, 1.41421356237310, 1.00000000000000, 0, 1.00000000000000, 1.41421356237310, 1.00000000000000, 1.41421356237310 + }; + + template <> + __constant__ float weight[9] = { + 0.0277777777777778, 0.111111111111111, 0.0277777777777778, 0.111111111111111, 0.444444444444444, 0.111111111111111, 0.0277777777777778, 0.111111111111111, 0.0277777777777778 + }; + + template <> + __constant__ pop_index_t opposite[19] = { + 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + }; + + template <> + __constant__ int c[19][3] = { + {0,1,1}, {-1,0,1}, {0,0,1}, {1,0,1}, {0,-1,1}, {-1,1,0}, {0,1,0}, {1,1,0}, {-1,0,0}, {0,0,0}, {1,0,0}, {-1,-1,0}, {0,-1,0}, {1,-1,0}, {0,1,-1}, {-1,0,-1}, {0,0,-1}, {1,0,-1}, {0,-1,-1} + }; + + template <> + __constant__ float c_length[19] = { + 1.41421356237310, 1.41421356237310, 1.00000000000000, 1.41421356237310, 1.41421356237310, 1.41421356237310, 1.00000000000000, 1.41421356237310, 1.00000000000000, 0, 1.00000000000000, 1.41421356237310, 1.00000000000000, 1.41421356237310, 1.41421356237310, 1.41421356237310, 1.00000000000000, 1.41421356237310, 1.41421356237310 + }; + + template <> + __constant__ float weight[19] = { + 0.0277777777777778, 0.0277777777777778, 0.0555555555555556, 0.0277777777777778, 0.0277777777777778, 0.0277777777777778, 0.0555555555555556, 0.0277777777777778, 0.0555555555555556, 0.333333333333333, 0.0555555555555556, 0.0277777777777778, 0.0555555555555556, 0.0277777777777778, 0.0277777777777778, 0.0277777777777778, 0.0555555555555556, 0.0277777777777778, 0.0277777777777778 + }; +} + +namespace host_data { + template + constexpr pop_index_t opposite[DESCRIPTOR::q] { }; + + template + constexpr int c[DESCRIPTOR::q][DESCRIPTOR::d] { }; + + template + constexpr float c_length[DESCRIPTOR::q] { }; + + template + constexpr float weight[DESCRIPTOR::q] { }; + + template <> + constexpr pop_index_t opposite[9] = { + 8, 7, 6, 5, 4, 3, 2, 1, 0 + }; + + template <> + constexpr int c[9][2] = { + {-1,-1}, {-1,0}, {-1,1}, {0,-1}, {0,0}, {0,1}, {1,-1}, {1,0}, {1,1} + }; + + template <> + constexpr float c_length[9] = { + 1.41421356237310, 1.00000000000000, 1.41421356237310, 1.00000000000000, 0, 1.00000000000000, 1.41421356237310, 1.00000000000000, 1.41421356237310 + }; + + template <> + constexpr float weight[9] = { + 0.0277777777777778, 0.111111111111111, 0.0277777777777778, 0.111111111111111, 0.444444444444444, 0.111111111111111, 0.0277777777777778, 0.111111111111111, 0.0277777777777778 + }; + + template <> + constexpr pop_index_t opposite[19] = { + 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + }; + + template <> + constexpr int c[19][3] = { + {0,1,1}, {-1,0,1}, {0,0,1}, {1,0,1}, {0,-1,1}, {-1,1,0}, {0,1,0}, {1,1,0}, {-1,0,0}, {0,0,0}, {1,0,0}, {-1,-1,0}, {0,-1,0}, {1,-1,0}, {0,1,-1}, {-1,0,-1}, {0,0,-1}, {1,0,-1}, {0,-1,-1} + }; + + template <> + constexpr float c_length[19] = { + 1.41421356237310, 1.41421356237310, 1.00000000000000, 1.41421356237310, 1.41421356237310, 1.41421356237310, 1.00000000000000, 1.41421356237310, 1.00000000000000, 0, 1.00000000000000, 1.41421356237310, 1.00000000000000, 1.41421356237310, 1.41421356237310, 1.41421356237310, 1.00000000000000, 1.41421356237310, 1.41421356237310 + }; + + template <> + constexpr float weight[19] = { + 0.0277777777777778, 0.0277777777777778, 0.0555555555555556, 0.0277777777777778, 0.0277777777777778, 0.0277777777777778, 0.0555555555555556, 0.0277777777777778, 0.0555555555555556, 0.333333333333333, 0.0555555555555556, 0.0277777777777778, 0.0555555555555556, 0.0277777777777778, 0.0277777777777778, 0.0277777777777778, 0.0555555555555556, 0.0277777777777778, 0.0277777777777778 + }; +} + +template +__host__ __device__ +pop_index_t opposite(pop_index_t iPop) { + return DESCRIPTOR::q - 1 - iPop; +} + +template +__host__ __device__ +int velocity(pop_index_t iPop, unsigned iDim) { + return DATA::template c[iPop][iDim]; +} + +template +__host__ __device__ +std::enable_if_t velocity(pop_index_t iPop) { + return make_float2(DATA::template c[iPop][0], + DATA::template c[iPop][1]); +} + +template +__host__ __device__ +std::enable_if_t velocity(pop_index_t iPop) { + return make_float3(DATA::template c[iPop][0], + DATA::template c[iPop][1], + DATA::template c[iPop][2]); +} + +template +__host__ __device__ +float velocity_length(pop_index_t iPop) { + return DATA::template c_length[iPop]; +} + +template +__host__ __device__ +float weight(pop_index_t iPop) { + return DATA::template weight[iPop]; +} + +template +struct CuboidD; + +template <> +struct CuboidD<2> { + const std::size_t nX; + const std::size_t nY; + const std::size_t nZ; + const std::size_t volume; + + CuboidD(std::size_t x, std::size_t y): + nX(x), nY(y), nZ(1), + volume(x*y) { }; +}; + +template <> +struct CuboidD<3> { + const std::size_t nX; + const std::size_t nY; + const std::size_t nZ; + const std::size_t volume; + const std::size_t plane; + + CuboidD(std::size_t x, std::size_t y, std::size_t z): + nX(x), nY(y), nZ(z), + volume(x*y*z), + plane(x*y) { }; +}; + +template +using Cuboid = CuboidD; + +__host__ __device__ +std::size_t gid(const CuboidD<2>& c, int iX, int iY, int iZ=0) { + return iY*c.nX + iX; +} + +__host__ __device__ +std::size_t gid(const CuboidD<3>& c, int iX, int iY, int iZ) { + return iZ*c.plane + iY*c.nX + iX; +} + +__host__ __device__ +int offset(const CuboidD<2>& c, int iX, int iY) { + return iY*c.nX + iX; +} + +template +__host__ __device__ +int offset(const CuboidD<2>& c, pop_index_t iPop) { + static_assert(DESCRIPTOR::d == 2, "Dimensions must match"); + return offset(c, + descriptor::velocity(iPop, 0), + descriptor::velocity(iPop, 1) + ); +} + +__host__ __device__ +int offset(const CuboidD<3>& c, int iX, int iY, int iZ) { + return iZ*c.plane + iY*c.nX + iX; +} + +template +__host__ __device__ +int offset(const CuboidD<3>& c, pop_index_t iPop) { + static_assert(DESCRIPTOR::d == 3, "Dimensions must match"); + return offset(c, + descriptor::velocity(iPop, 0), + descriptor::velocity(iPop, 1), + descriptor::velocity(iPop, 2) + ); +} + +template +__host__ __device__ +std::size_t neighbor(const CuboidD<2>& c, std::size_t iCell, pop_index_t iPop) { + return iCell + offset(c, iPop); +} + +template +__host__ __device__ +std::size_t neighbor(const CuboidD<3>& c, std::size_t iCell, pop_index_t iPop) { + return iCell + offset(c, iPop); +} + +__host__ __device__ +uint2 gidInverse(const CuboidD<2>& c, std::size_t gid) { + int iY = gid / c.nX; + int iX = gid % c.nX; + return make_uint2(iX, iY); +} + +__host__ __device__ +uint3 gidInverse(const CuboidD<3>& c, std::size_t gid) { + int iZ = gid / c.plane; + int iY = (gid % c.plane) / c.nX; + int iX = (gid % c.plane) % c.nX; + return make_uint3(iX,iY,iZ); +} + +__host__ __device__ +float2 gidInverseSmooth(const CuboidD<2>& c, std::size_t gid) { + int iY = gid / c.nX; + int iX = gid % c.nX; + return make_float2(iX, iY); +} + +__host__ __device__ +float3 gidInverseSmooth(const CuboidD<3>& c, std::size_t gid) { + int iZ = gid / c.plane; + int iY = (gid % c.plane) / c.nX; + int iX = (gid % c.plane) % c.nX; + return make_float3(iX,iY,iZ); +} + +bool isInside(const CuboidD<2>& c, std::size_t gid) { + return gid < c.volume; +} + +bool isInside(const CuboidD<3>& c, std::size_t gid) { + return gid < c.volume; +} + +} diff --git a/tangle/LLBM/kernel/bounce_back.h b/tangle/LLBM/kernel/bounce_back.h new file mode 100644 index 0000000..d7ec4a7 --- /dev/null +++ b/tangle/LLBM/kernel/bounce_back.h @@ -0,0 +1,44 @@ +#pragma once +#include + +struct BounceBackO { + +using call_tag = tag::call_by_cell_id; + +template +__device__ static void apply(descriptor::D2Q9, S f_curr[9], S f_next[9], std::size_t gid) { + f_next[0] = f_curr[8]; + f_next[1] = f_curr[7]; + f_next[2] = f_curr[6]; + f_next[3] = f_curr[5]; + f_next[4] = f_curr[4]; + f_next[5] = f_curr[3]; + f_next[6] = f_curr[2]; + f_next[7] = f_curr[1]; + f_next[8] = f_curr[0]; +} + +template +__device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std::size_t gid) { + f_next[0] = f_curr[18]; + f_next[1] = f_curr[17]; + f_next[2] = f_curr[16]; + f_next[3] = f_curr[15]; + f_next[4] = f_curr[14]; + f_next[5] = f_curr[13]; + f_next[6] = f_curr[12]; + f_next[7] = f_curr[11]; + f_next[8] = f_curr[10]; + f_next[9] = f_curr[9]; + f_next[10] = f_curr[8]; + f_next[11] = f_curr[7]; + f_next[12] = f_curr[6]; + f_next[13] = f_curr[5]; + f_next[14] = f_curr[4]; + f_next[15] = f_curr[3]; + f_next[16] = f_curr[2]; + f_next[17] = f_curr[1]; + f_next[18] = f_curr[0]; +} + +}; diff --git a/tangle/LLBM/kernel/bounce_back_moving_wall.h b/tangle/LLBM/kernel/bounce_back_moving_wall.h new file mode 100644 index 0000000..c4b40f9 --- /dev/null +++ b/tangle/LLBM/kernel/bounce_back_moving_wall.h @@ -0,0 +1,44 @@ +#pragma once +#include + +struct BounceBackMovingWallO { + +using call_tag = tag::call_by_cell_id; + +template +__device__ static void apply(descriptor::D2Q9, S f_curr[9], S f_next[9], std::size_t gid, T u_0, T u_1) { + f_next[0] = -T{0.166666666666667}*u_0 - T{0.166666666666667}*u_1 + f_curr[8]; + f_next[1] = -T{0.666666666666667}*u_0 + f_curr[7]; + f_next[2] = -T{0.166666666666667}*u_0 + T{0.166666666666667}*u_1 + f_curr[6]; + f_next[3] = -T{0.666666666666667}*u_1 + f_curr[5]; + f_next[4] = f_curr[4]; + f_next[5] = T{0.666666666666667}*u_1 + f_curr[3]; + f_next[6] = T{0.166666666666667}*u_0 - T{0.166666666666667}*u_1 + f_curr[2]; + f_next[7] = T{0.666666666666667}*u_0 + f_curr[1]; + f_next[8] = T{0.166666666666667}*u_0 + T{0.166666666666667}*u_1 + f_curr[0]; +} + +template +__device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std::size_t gid, T u_0, T u_1, T u_2) { + f_next[0] = T{0.166666666666667}*u_1 + T{0.166666666666667}*u_2 + f_curr[18]; + f_next[1] = -T{0.166666666666667}*u_0 + T{0.166666666666667}*u_2 + f_curr[17]; + f_next[2] = T{0.333333333333333}*u_2 + f_curr[16]; + f_next[3] = T{0.166666666666667}*u_0 + T{0.166666666666667}*u_2 + f_curr[15]; + f_next[4] = -T{0.166666666666667}*u_1 + T{0.166666666666667}*u_2 + f_curr[14]; + f_next[5] = -T{0.166666666666667}*u_0 + T{0.166666666666667}*u_1 + f_curr[13]; + f_next[6] = T{0.333333333333333}*u_1 + f_curr[12]; + f_next[7] = T{0.166666666666667}*u_0 + T{0.166666666666667}*u_1 + f_curr[11]; + f_next[8] = -T{0.333333333333333}*u_0 + f_curr[10]; + f_next[9] = f_curr[9]; + f_next[10] = T{0.333333333333333}*u_0 + f_curr[8]; + f_next[11] = -T{0.166666666666667}*u_0 - T{0.166666666666667}*u_1 + f_curr[7]; + f_next[12] = -T{0.333333333333333}*u_1 + f_curr[6]; + f_next[13] = T{0.166666666666667}*u_0 - T{0.166666666666667}*u_1 + f_curr[5]; + f_next[14] = T{0.166666666666667}*u_1 - T{0.166666666666667}*u_2 + f_curr[4]; + f_next[15] = -T{0.166666666666667}*u_0 - T{0.166666666666667}*u_2 + f_curr[3]; + f_next[16] = -T{0.333333333333333}*u_2 + f_curr[2]; + f_next[17] = T{0.166666666666667}*u_0 - T{0.166666666666667}*u_2 + f_curr[1]; + f_next[18] = -T{0.166666666666667}*u_1 - T{0.166666666666667}*u_2 + f_curr[0]; +} + +}; diff --git a/tangle/LLBM/kernel/bouzidi.h b/tangle/LLBM/kernel/bouzidi.h new file mode 100644 index 0000000..b1abd2c --- /dev/null +++ b/tangle/LLBM/kernel/bouzidi.h @@ -0,0 +1,39 @@ +#pragma once +#include +#include + +template +struct BouzidiConfig { + std::size_t* boundary; // boundary cell to be interpolated + std::size_t* solid; // adjacent solid cell + std::size_t* fluid; // adjacent fluid cell + S* distance; // precomputed distance factor q + S* correction; // correction for moving walls + pop_index_t* missing; // population to be reconstructed +}; + +struct BouzidiO { + +using call_tag = tag::call_by_list_index; + +template +__device__ static void apply( + LatticeView lattice + , std::size_t index + , std::size_t count + , BouzidiConfig config +) { + pop_index_t& iPop = config.missing[index]; + pop_index_t jPop = descriptor::opposite(iPop); + pop_index_t kPop = config.boundary[index] == config.fluid[index] ? iPop : jPop; + + S f_bound_j = *lattice.pop(jPop, config.boundary[index]); + S f_fluid_j = *lattice.pop(kPop, config.fluid[index]); + S* f_next_i = lattice.pop(iPop, config.solid[index]); + + *f_next_i = config.distance[index] * f_bound_j + + (1. - config.distance[index]) * f_fluid_j + + config.correction[index]; +} + +}; diff --git a/tangle/LLBM/kernel/collect_curl.h b/tangle/LLBM/kernel/collect_curl.h new file mode 100644 index 0000000..054b359 --- /dev/null +++ b/tangle/LLBM/kernel/collect_curl.h @@ -0,0 +1,44 @@ +#pragma once +#include + +struct CollectCurlF { + +using call_tag = tag::call_by_spatial_cell_mask; + +template +__device__ static void apply( + descriptor::D3Q19 + , S f_curr[19] + , descriptor::CuboidD<3> cuboid + , std::size_t gid + , std::size_t iX + , std::size_t iY + , std::size_t iZ + , S* moments_u + , cudaSurfaceObject_t surface + , S* curl_norm = nullptr +) { + auto u_x = [moments_u,cuboid,gid] __device__ (int x, int y, int z) -> T { + return moments_u[3*(gid + descriptor::offset(cuboid,x,y,z)) + 0]; + }; + auto u_y = [moments_u,cuboid,gid] __device__ (int x, int y, int z) -> T { + return moments_u[3*(gid + descriptor::offset(cuboid,x,y,z)) + 1]; + }; + auto u_z = [moments_u,cuboid,gid] __device__ (int x, int y, int z) -> T { + return moments_u[3*(gid + descriptor::offset(cuboid,x,y,z)) + 2]; + }; + + T curl_0 = T{0.500000000000000}*u_y(0, 0, -1) - T{0.500000000000000}*u_y(0, 0, 1) - T{0.500000000000000}*u_z(0, -1, 0) + T{0.500000000000000}*u_z(0, 1, 0); + T curl_1 = -T{0.500000000000000}*u_x(0, 0, -1) + T{0.500000000000000}*u_x(0, 0, 1) + T{0.500000000000000}*u_z(-1, 0, 0) - T{0.500000000000000}*u_z(1, 0, 0); + T curl_2 = T{0.500000000000000}*u_x(0, -1, 0) - T{0.500000000000000}*u_x(0, 1, 0) - T{0.500000000000000}*u_y(-1, 0, 0) + T{0.500000000000000}*u_y(1, 0, 0); + float3 curl = make_float3(curl_0, curl_1, curl_2); + float norm = length(curl); + + surf3Dwrite(norm, surface, iX*sizeof(float), iY, iZ); + + if (curl_norm != nullptr) { + curl_norm[gid] = norm; + } +} + +}; diff --git a/tangle/LLBM/kernel/collect_moments.h b/tangle/LLBM/kernel/collect_moments.h new file mode 100644 index 0000000..b2ef09b --- /dev/null +++ b/tangle/LLBM/kernel/collect_moments.h @@ -0,0 +1,45 @@ +#pragma once +#include + +struct CollectMomentsF { + +using call_tag = tag::call_by_cell_id; + +template +__device__ static void apply(descriptor::D2Q9, S f_curr[9], std::size_t gid, T* cell_rho, T* cell_u) { + T m0 = f_curr[1] + f_curr[2]; + T m1 = f_curr[3] + f_curr[6]; + T m2 = m0 + m1 + f_curr[0] + f_curr[4] + f_curr[5] + f_curr[7] + f_curr[8]; + T m3 = f_curr[0] - f_curr[8]; + T m4 = T{1} / (m2); + T rho = m2; + T u_0 = -m4*(m0 + m3 - f_curr[6] - f_curr[7]); + T u_1 = -m4*(m1 + m3 - f_curr[2] - f_curr[5]); + + cell_rho[gid] = rho; + cell_u[2*gid+0] = u_0; + cell_u[2*gid+1] = u_1; +} + +template +__device__ static void apply(descriptor::D3Q19, S f_curr[19], std::size_t gid, T* cell_rho, T* cell_u) { + T m0 = f_curr[10] + f_curr[13] + f_curr[17]; + T m1 = f_curr[14] + f_curr[5] + f_curr[6]; + T m2 = f_curr[1] + f_curr[2] + f_curr[4]; + T m3 = m0 + m1 + m2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9]; + T m4 = -f_curr[11] + f_curr[7]; + T m5 = -f_curr[15] + f_curr[3]; + T m6 = T{1} / (m3); + T m7 = f_curr[0] - f_curr[18]; + T rho = m3; + T u_0 = m6*(m0 + m4 + m5 - f_curr[1] - f_curr[5] - f_curr[8]); + T u_1 = m6*(m1 + m4 + m7 - f_curr[12] - f_curr[13] - f_curr[4]); + T u_2 = m6*(m2 + m5 + m7 - f_curr[14] - f_curr[16] - f_curr[17]); + + cell_rho[gid] = rho; + cell_u[3*gid+0] = u_0; + cell_u[3*gid+1] = u_1; + cell_u[3*gid+2] = u_2; +} + +}; diff --git a/tangle/LLBM/kernel/collect_q_criterion.h b/tangle/LLBM/kernel/collect_q_criterion.h new file mode 100644 index 0000000..19b7f68 --- /dev/null +++ b/tangle/LLBM/kernel/collect_q_criterion.h @@ -0,0 +1,104 @@ +#pragma once +#include + +struct CollectQCriterionF { + +using call_tag = tag::call_by_spatial_cell_mask; + +template +__device__ static void apply( + descriptor::D3Q19 + , S f_curr[19] + , descriptor::CuboidD<3> cuboid + , std::size_t gid + , std::size_t iX + , std::size_t iY + , std::size_t iZ + , T* cell_rho + , T* cell_u + , T* cell_curl_norm + , cudaSurfaceObject_t surface + , T* cell_q = nullptr +) { + const T rho = cell_rho[gid]; + const T u_0 = cell_u[3*gid + 0]; + const T u_1 = cell_u[3*gid + 1]; + const T u_2 = cell_u[3*gid + 2]; + + T x0 = T{72.0000000000000}*f_curr[5]; + T x1 = T{72.0000000000000}*f_curr[13]; + T x2 = T{6.00000000000000}*u_1; + T x3 = -x2; + T x4 = -u_0; + T x5 = x4 + u_1; + T x6 = u_1*u_1; + T x7 = T{3.00000000000000}*x6; + T x8 = u_0*u_0; + T x9 = T{3.00000000000000}*x8; + T x10 = x9 + T{-2.00000000000000}; + T x11 = x10 + x7; + T x12 = u_2*u_2; + T x13 = T{3.00000000000000}*x12; + T x14 = T{6.00000000000000}*u_0; + T x15 = x13 + x14; + T x16 = rho*(x11 + x15 + x3 - T{9.00000000000000}*x5*x5); + T x17 = -x14; + T x18 = -u_1; + T x19 = x18 + u_0; + T x20 = x13 + x2; + T x21 = x11 + x20; + T x22 = rho*(x17 + x21 - T{9.00000000000000}*x19*x19); + T x23 = u_0 + u_1; + T x24 = T{9.00000000000000}*(x23*x23); + T x25 = -x7; + T x26 = T{2.00000000000000} - x9; + T x27 = x25 + x26; + T x28 = -x13; + T x29 = x2 + x28; + T x30 = -rho*(x14 + x21 - x24) + rho*(x14 + x24 + x27 + x29) - T{72.0000000000000}*f_curr[11] - T{72.0000000000000}*f_curr[7]; + T x31 = T{72.0000000000000}*f_curr[1]; + T x32 = T{72.0000000000000}*f_curr[17]; + T x33 = x4 + u_2; + T x34 = T{6.00000000000000}*u_2; + T x35 = x11 - x34; + T x36 = rho*(x15 + x35 - T{9.00000000000000}*x33*x33); + T x37 = -u_2; + T x38 = x37 + u_0; + T x39 = x11 + x34; + T x40 = x13 + x39; + T x41 = rho*(x17 + x40 - T{9.00000000000000}*x38*x38); + T x42 = u_0 + u_2; + T x43 = T{9.00000000000000}*(x42*x42); + T x44 = x27 + x34; + T x45 = x14 + x28; + T x46 = -rho*(x15 + x39 - x43) + rho*(x43 + x44 + x45) - T{72.0000000000000}*f_curr[15] - T{72.0000000000000}*f_curr[3]; + T x47 = T{72.0000000000000}*f_curr[4]; + T x48 = T{72.0000000000000}*f_curr[14]; + T x49 = x18 + u_2; + T x50 = rho*(x20 + x35 - T{9.00000000000000}*x49*x49); + T x51 = x37 + u_1; + T x52 = rho*(x3 + x40 - T{9.00000000000000}*x51*x51); + T x53 = u_1 + u_2; + T x54 = T{9.00000000000000}*(x53*x53); + T x55 = -rho*(x20 + x39 - x54) + rho*(x29 + x44 + x54) - T{72.0000000000000}*f_curr[0] - T{72.0000000000000}*f_curr[18]; + T x56 = T{2.00000000000000}*rho; + T x57 = T{6.00000000000000}*x8; + T x58 = -x31 - x32 - x36 - x41 + x46; + T x59 = -x0 - x1 - x16 - x22 + x30; + T x60 = T{6.00000000000000}*x6; + T x61 = -x47 - x48 - x50 - x52 + x55; + T x62 = T{6.00000000000000}*x12; + T strain = T{0.0277777777777778}*sqrt((x0 + x1 + x16 + x22 + x30)*(x0 + x1 + x16 + x22 + x30) + (x31 + x32 + x36 + x41 + x46)*(x31 + x32 + x36 + x41 + x46) + (x47 + x48 + x50 + x52 + x55)*(x47 + x48 + x50 + x52 + x55) + T{0.500000000000000}*((-x56*(x39 - x62) + x56*(x44 + x62) + x58 + x61 - 72*f_curr[16] - 72*f_curr[2])*(-x56*(x39 - x62) + x56*(x44 + x62) + x58 + x61 - 72*f_curr[16] - 72*f_curr[2])) + T{0.500000000000000}*((-x56*(x10 + x20 - x60) + x56*(x26 + x29 + x60) + x59 + x61 - 72*f_curr[12] - 72*f_curr[6])*(-x56*(x10 + x20 - x60) + x56*(x26 + x29 + x60) + x59 + x61 - 72*f_curr[12] - 72*f_curr[6])) + T{0.500000000000000}*((-x56*(x15 - x57 + x7 - 2) + x56*(x25 + x45 + x57 + 2) + x58 + x59 - 72*f_curr[10] - 72*f_curr[8])*(-x56*(x15 - x57 + x7 - 2) + x56*(x25 + x45 + x57 + 2) + x58 + x59 - 72*f_curr[10] - 72*f_curr[8]))); + + float vorticity = cell_curl_norm[gid]; + float q = vorticity*vorticity - strain*strain; + q = q > 0 ? q : 0; + + surf3Dwrite(q, surface, iX*sizeof(float), iY, iZ); + + if (cell_q != nullptr) { + cell_q[gid] = q; + } +} + +}; diff --git a/tangle/LLBM/kernel/collect_shear_layer_normal.h b/tangle/LLBM/kernel/collect_shear_layer_normal.h new file mode 100644 index 0000000..7bf6eff --- /dev/null +++ b/tangle/LLBM/kernel/collect_shear_layer_normal.h @@ -0,0 +1,139 @@ +#pragma once +#include + +struct CollectShearLayerNormalsF { + +using call_tag = tag::call_by_cell_id; + +template +__device__ static void apply( + descriptor::D3Q19 + , S f_curr[19] + , std::size_t gid + , T* cell_rho + , T* cell_u + , T* cell_shear_normal +) { + T x0 = f_curr[10] + f_curr[13] + f_curr[17]; + T x1 = f_curr[14] + f_curr[5] + f_curr[6]; + T x2 = f_curr[1] + f_curr[2] + f_curr[4]; + T x3 = x0 + x1 + x2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9]; + T x4 = -f_curr[11] + f_curr[7]; + T x5 = -f_curr[15] + f_curr[3]; + T x6 = T{1} / (x3); + T x7 = f_curr[0] - f_curr[18]; + T x14 = T{72.0000000000000}*f_curr[5]; + T x15 = T{72.0000000000000}*f_curr[13]; + T x39 = T{72.0000000000000}*f_curr[1]; + T x40 = T{72.0000000000000}*f_curr[17]; + T x61 = T{72.0000000000000}*f_curr[4]; + T x62 = T{72.0000000000000}*f_curr[14]; + T rho = x3; + T x56 = T{2.00000000000000}*rho; + T u_0 = x6*(x0 + x4 + x5 - f_curr[1] - f_curr[5] - f_curr[8]); + T x9 = u_0*u_0; + T x16 = -u_0; + T x18 = -T{3.00000000000000}*x9; + T x21 = T{6.00000000000000}*u_0; + T x22 = -x21; + T x55 = T{0.0277777777777778}*u_0; + T u_1 = x6*(x1 + x4 + x7 - f_curr[12] - f_curr[13] - f_curr[4]); + T x8 = T{0.0277777777777778}*u_1; + T x10 = u_1*u_1; + T x17 = x16 + u_1; + T x19 = T{6.00000000000000}*u_1; + T x20 = x18 + x19; + T x23 = -T{3.00000000000000}*x10; + T x28 = -u_1; + T x29 = x28 + u_0; + T x30 = x18 - x19; + T x33 = u_0 + u_1; + T x34 = T{9.00000000000000}*(x33*x33); + T u_2 = x6*(x2 + x5 + x7 - f_curr[14] - f_curr[16] - f_curr[17]); + T x11 = u_2*u_2; + T x12 = x10 + x11 + x9; + T x13 = pow(x12, T{-0.500000000000000}); + T x24 = T{2.00000000000000} - T{3.00000000000000}*x11; + T x25 = x23 + x24; + T x26 = x22 + x25; + T x27 = rho*(x20 + x26 + T{9.00000000000000}*(x17*x17)); + T x31 = x21 + x25; + T x32 = rho*(x30 + x31 + T{9.00000000000000}*(x29*x29)); + T x35 = rho*(x20 + x31 + x34) + rho*(x26 + x30 + x34) - T{72.0000000000000}*f_curr[11] - T{72.0000000000000}*f_curr[7]; + T x36 = x14 + x15 - x27 - x32 + x35; + T x37 = x13*x36; + T x38 = T{0.0277777777777778}*u_2; + T x41 = x16 + u_2; + T x42 = T{6.00000000000000}*u_2; + T x43 = x18 + x42; + T x44 = rho*(x26 + x43 + T{9.00000000000000}*(x41*x41)); + T x45 = -u_2; + T x46 = x45 + u_0; + T x47 = -x42; + T x48 = x18 + x47; + T x49 = rho*(x31 + x48 + T{9.00000000000000}*(x46*x46)); + T x50 = u_0 + u_2; + T x51 = T{9.00000000000000}*(x50*x50); + T x52 = rho*(x26 + x48 + x51) + rho*(x31 + x43 + x51) - T{72.0000000000000}*f_curr[15] - T{72.0000000000000}*f_curr[3]; + T x53 = x39 + x40 - x44 - x49 + x52; + T x54 = x13*x53; + T x57 = x25 + T{6.00000000000000}*x9; + T x58 = -x14 - x15 + x27 + x32 + x35; + T x59 = -x39 - x40 + x44 + x49 + x52; + T x60 = x56*(x21 + x57) + x56*(x22 + x57) + x58 + x59 - T{72.0000000000000}*f_curr[10] - T{72.0000000000000}*f_curr[8]; + T x63 = x28 + u_2; + T x64 = x25 + x30; + T x65 = rho*(x42 + x64 + T{9.00000000000000}*(x63*x63)); + T x66 = x45 + u_1; + T x67 = x20 + x25; + T x68 = rho*(x47 + x67 + T{9.00000000000000}*(x66*x66)); + T x69 = u_1 + u_2; + T x70 = T{9.00000000000000}*(x69*x69); + T x71 = rho*(x42 + x67 + x70) + rho*(x47 + x64 + x70) - T{72.0000000000000}*f_curr[0] - T{72.0000000000000}*f_curr[18]; + T x72 = x61 + x62 - x65 - x68 + x71; + T x73 = T{6.00000000000000}*x10 + x24; + T x74 = -x61 - x62 + x65 + x68 + x71; + T x75 = x56*(x20 + x73) + x56*(x30 + x73) + x58 + x74 - T{72.0000000000000}*f_curr[12] - T{72.0000000000000}*f_curr[6]; + T x76 = T{6.00000000000000}*x11 + x23 + T{2.00000000000000}; + T x77 = x56*(x43 + x76) + x56*(x48 + x76) + x59 + x74 - T{72.0000000000000}*f_curr[16] - T{72.0000000000000}*f_curr[2]; + T x78 = ((x36*u_0 + x72*u_2 + x75*u_1)*u_1 + (x36*u_1 + x53*u_2 + x60*u_0)*u_0 + (x53*u_0 + x72*u_1 + x77*u_2)*u_2)/x12; + T x79 = x13*x72; + T n_0 = -x13*x55*x60 - x37*x8 - x38*x54 + x55*x78; + T n_1 = -x13*x75*x8 - x37*x55 - x38*x79 + x78*x8; + T n_2 = -x13*x38*x77 + x38*x78 - x54*x55 - x79*x8; + + cell_rho[gid] = rho; + + cell_u[3*gid+0] = u_0; + cell_u[3*gid+1] = u_1; + cell_u[3*gid+2] = u_2; + + float3 n = normalize(make_float3(n_0, n_1, n_2)); + cell_shear_normal[3*gid+0] = n.x; + cell_shear_normal[3*gid+1] = n.y; + cell_shear_normal[3*gid+2] = n.z; +} + +}; + +struct CollectShearLayerVisibilityF { + +using call_tag = tag::post_process_by_spatial_cell_mask; + +template +__device__ static void apply( + descriptor::D3Q19 + , std::size_t gid + , std::size_t iX + , std::size_t iY + , std::size_t iZ + , T* shear_normal + , float3 view_direction + , cudaSurfaceObject_t surface +) { + float3 n = make_float3(shear_normal[3*gid+0], shear_normal[3*gid+1], shear_normal[3*gid+2]); + float visibility = dot(n, view_direction); + surf3Dwrite(visibility, surface, iX*sizeof(float), iY, iZ); +} + +}; diff --git a/tangle/LLBM/kernel/collect_streamlines.h b/tangle/LLBM/kernel/collect_streamlines.h new file mode 100644 index 0000000..c787713 --- /dev/null +++ b/tangle/LLBM/kernel/collect_streamlines.h @@ -0,0 +1,48 @@ +#pragma once + +__device__ void writeDot(cudaSurfaceObject_t texture, int x, int y, uchar4 pixel) { + surf2Dwrite(pixel, texture, (x-1)*sizeof(uchar4), (y )); + surf2Dwrite(pixel, texture, (x-1)*sizeof(uchar4), (y+1)); + surf2Dwrite(pixel, texture, (x )*sizeof(uchar4), (y )); + surf2Dwrite(pixel, texture, (x )*sizeof(uchar4), (y+1)); +} + +__device__ float2 readVelocity(cudaSurfaceObject_t u_x, cudaSurfaceObject_t u_y, float2 p) { + return make_float2(tex2D(u_x, p.x, p.y), tex2D(u_y, p.x, p.y)); +} + +template +__global__ void renderStreamlinesToTexture( + descriptor::Cuboid cuboid + , cudaSurfaceObject_t u_x + , cudaSurfaceObject_t u_y + , T* origins + , unsigned nSteps + , float charU + , cudaSurfaceObject_t texture +) { + const int iOrigin = threadIdx.x + blockIdx.x * blockDim.x; + + float h = 1; + float2 p = make_float2(origins[2*iOrigin+0], origins[2*iOrigin+1]); + + for (unsigned iStep = 0; iStep < nSteps; ++iStep) { + float2 u = make_float2(tex2D(u_x, p.x, p.y), tex2D(u_y, p.x, p.y)); + + float2 k1 = readVelocity(u_x, u_y, p) / charU; + float2 k2 = readVelocity(u_x, u_y, p + 0.5f*h * k1) / charU; + float2 k3 = readVelocity(u_x, u_y, p + 0.5f*h * k2) / charU; + float2 k4 = readVelocity(u_x, u_y, p + h * k3) / charU; + + p += h * (1.f/6.f*k1 + 1.f/3.f*k2 + 1.f/3.f*k3 + 1.f/6.f*k4); + + const int screenX = std::nearbyint(p.x); + const int screenY = cuboid.nY-1 - std::nearbyint(p.y); + + if (screenX < 1 || screenY < 1 || screenX > cuboid.nX-2 || screenY > cuboid.nY-2) { + break; + } + + writeDot(texture, screenX, screenY, {255,255,255,255}); + } +} diff --git a/tangle/LLBM/kernel/collect_velocity_norm.h b/tangle/LLBM/kernel/collect_velocity_norm.h new file mode 100644 index 0000000..ea099a8 --- /dev/null +++ b/tangle/LLBM/kernel/collect_velocity_norm.h @@ -0,0 +1,61 @@ +#pragma once +#include + +struct CollectVelocityNormF { + +using call_tag = tag::post_process_by_spatial_cell_mask; + +template +__device__ static void apply( + descriptor::D2Q9 + , std::size_t gid + , std::size_t iX + , std::size_t iY + , std::size_t iZ + , T* u + , cudaSurfaceObject_t surface +) { + float norm = length(make_float2(u[2*gid+0], u[2*gid+1])); + surf2Dwrite(norm, surface, iX*sizeof(float), iY); +} + +template +__device__ static void apply( + descriptor::D3Q19 + , std::size_t gid + , std::size_t iX + , std::size_t iY + , std::size_t iZ + , T* u + , cudaSurfaceObject_t surface + , T* u_norm = nullptr +) { + float norm = length(make_float3(u[3*gid+0], u[3*gid+1], u[3*gid+2])); + surf3Dwrite(norm, surface, iX*sizeof(float), iY, iZ); + if (u_norm != nullptr) { + u_norm[gid] = norm; + } +} + +}; + +template +__global__ void renderSliceViewToTexture(std::size_t width, std::size_t height, SLICE slice, SAMPLE sample, PALETTE palette, cudaSurfaceObject_t texture) { + const int screenX = threadIdx.x + blockIdx.x * blockDim.x; + const int screenY = threadIdx.y + blockIdx.y * blockDim.y; + + if (screenX > width-1 || screenY > height-1) { + return; + } + + const std::size_t gid = slice(screenX,screenY); + float3 color = palette(sample(gid)); + + uchar4 pixel { + static_cast(color.x * 255), + static_cast(color.y * 255), + static_cast(color.z * 255), + 255 + }; + surf2Dwrite(pixel, texture, screenX*sizeof(uchar4), screenY); +} diff --git a/tangle/LLBM/kernel/collide.h b/tangle/LLBM/kernel/collide.h new file mode 100644 index 0000000..7ca02bf --- /dev/null +++ b/tangle/LLBM/kernel/collide.h @@ -0,0 +1,131 @@ +#pragma once +#include + +struct BgkCollideO { + +using call_tag = tag::call_by_cell_id; + +template +__device__ static void apply(descriptor::D2Q9, S f_curr[9], S f_next[9], std::size_t gid, T tau) { + T m0 = f_curr[1] + f_curr[2]; + T m1 = f_curr[3] + f_curr[6]; + T m2 = m0 + m1 + f_curr[0] + f_curr[4] + f_curr[5] + f_curr[7] + f_curr[8]; + T m3 = f_curr[0] - f_curr[8]; + T m4 = T{1} / (m2); + T rho = m2; + T u_0 = -m4*(m0 + m3 - f_curr[6] - f_curr[7]); + T u_1 = -m4*(m1 + m3 - f_curr[2] - f_curr[5]); + T x0 = T{1} / (tau); + T x1 = T{0.0138888888888889}*x0; + T x2 = T{6.00000000000000}*u_1; + T x3 = T{6.00000000000000}*u_0; + T x4 = u_0 + u_1; + T x5 = T{9.00000000000000}*(x4*x4); + T x6 = u_1*u_1; + T x7 = T{3.00000000000000}*x6; + T x8 = u_0*u_0; + T x9 = T{3.00000000000000}*x8; + T x10 = x9 + T{-2.00000000000000}; + T x11 = x10 + x7; + T x12 = T{0.0555555555555556}*x0; + T x13 = -x3; + T x14 = T{2.00000000000000} - x7; + T x15 = x14 + T{6.00000000000000}*x8; + T x16 = -x9; + T x17 = x16 + x2; + T x18 = u_0 - u_1; + T x19 = x14 + T{9.00000000000000}*(x18*x18); + T x20 = T{6.00000000000000}*x6; + f_next[0] = -x1*(rho*(x11 + x2 + x3 - x5) + T{72.0000000000000}*f_curr[0]) + f_curr[0]; + f_next[1] = x12*(rho*(x13 + x15) - T{18.0000000000000}*f_curr[1]) + f_curr[1]; + f_next[2] = x1*(rho*(x13 + x17 + x19) - T{72.0000000000000}*f_curr[2]) + f_curr[2]; + f_next[3] = -x12*(rho*(x10 + x2 - x20) + T{18.0000000000000}*f_curr[3]) + f_curr[3]; + f_next[4] = -T{0.111111111111111}*x0*(T{2.00000000000000}*rho*x11 + T{9.00000000000000}*f_curr[4]) + f_curr[4]; + f_next[5] = x12*(rho*(x17 + x20 + T{2.00000000000000}) - T{18.0000000000000}*f_curr[5]) + f_curr[5]; + f_next[6] = x1*(rho*(x16 + x19 - x2 + x3) - T{72.0000000000000}*f_curr[6]) + f_curr[6]; + f_next[7] = x12*(rho*(x15 + x3) - T{18.0000000000000}*f_curr[7]) + f_curr[7]; + f_next[8] = x1*(rho*(x14 + x17 + x3 + x5) - T{72.0000000000000}*f_curr[8]) + f_curr[8]; + +} + +template +__device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std::size_t gid, T tau) { + T m0 = f_curr[10] + f_curr[13] + f_curr[17]; + T m1 = f_curr[14] + f_curr[5] + f_curr[6]; + T m2 = f_curr[1] + f_curr[2] + f_curr[4]; + T m3 = m0 + m1 + m2 + f_curr[0] + f_curr[11] + f_curr[12] + f_curr[15] + f_curr[16] + f_curr[18] + f_curr[3] + f_curr[7] + f_curr[8] + f_curr[9]; + T m4 = -f_curr[11] + f_curr[7]; + T m5 = -f_curr[15] + f_curr[3]; + T m6 = T{1} / (m3); + T m7 = f_curr[0] - f_curr[18]; + T rho = m3; + T u_0 = m6*(m0 + m4 + m5 - f_curr[1] - f_curr[5] - f_curr[8]); + T u_1 = m6*(m1 + m4 + m7 - f_curr[12] - f_curr[13] - f_curr[4]); + T u_2 = m6*(m2 + m5 + m7 - f_curr[14] - f_curr[16] - f_curr[17]); + T x0 = T{1} / (tau); + T x1 = T{0.0138888888888889}*x0; + T x2 = u_1 + u_2; + T x3 = T{9.00000000000000}*(x2*x2); + T x4 = T{6.00000000000000}*u_2; + T x5 = u_1*u_1; + T x6 = T{3.00000000000000}*x5; + T x7 = -x6; + T x8 = u_0*u_0; + T x9 = T{3.00000000000000}*x8; + T x10 = T{2.00000000000000} - x9; + T x11 = x10 + x7; + T x12 = x11 + x4; + T x13 = u_2*u_2; + T x14 = T{3.00000000000000}*x13; + T x15 = -x14; + T x16 = T{6.00000000000000}*u_1; + T x17 = x15 + x16; + T x18 = T{6.00000000000000}*u_0; + T x19 = -u_0; + T x20 = x19 + u_2; + T x21 = x14 + x6 + T{-2.00000000000000}; + T x22 = x21 + x9; + T x23 = x22 - x4; + T x24 = T{0.0277777777777778}*x0; + T x25 = T{6.00000000000000}*x13; + T x26 = u_0 + u_2; + T x27 = T{9.00000000000000}*(x26*x26); + T x28 = x15 + x18; + T x29 = -u_1; + T x30 = x29 + u_2; + T x31 = x19 + u_1; + T x32 = -x16; + T x33 = x18 + x22; + T x34 = T{6.00000000000000}*x5; + T x35 = u_0 + u_1; + T x36 = T{9.00000000000000}*(x35*x35); + T x37 = T{6.00000000000000}*x8; + T x38 = x9 + T{-2.00000000000000}; + T x39 = x29 + u_0; + T x40 = -x18 + x22; + T x41 = -u_2; + T x42 = x41 + u_1; + T x43 = x22 + x4; + T x44 = x41 + u_0; + f_next[0] = x1*(rho*(x12 + x17 + x3) - T{72.0000000000000}*f_curr[0]) + f_curr[0]; + f_next[1] = -x1*(rho*(x18 + x23 - T{9.00000000000000}*x20*x20) + T{72.0000000000000}*f_curr[1]) + f_curr[1]; + f_next[2] = x24*(rho*(x12 + x25) - T{36.0000000000000}*f_curr[2]) + f_curr[2]; + f_next[3] = x1*(rho*(x12 + x27 + x28) - T{72.0000000000000}*f_curr[3]) + f_curr[3]; + f_next[4] = -x1*(rho*(x16 + x23 - T{9.00000000000000}*x30*x30) + T{72.0000000000000}*f_curr[4]) + f_curr[4]; + f_next[5] = -x1*(rho*(x32 + x33 - T{9.00000000000000}*x31*x31) + T{72.0000000000000}*f_curr[5]) + f_curr[5]; + f_next[6] = x24*(rho*(x10 + x17 + x34) - T{36.0000000000000}*f_curr[6]) + f_curr[6]; + f_next[7] = x1*(rho*(x11 + x17 + x18 + x36) - T{72.0000000000000}*f_curr[7]) + f_curr[7]; + f_next[8] = -x24*(rho*(x18 + x21 - x37) + T{36.0000000000000}*f_curr[8]) + f_curr[8]; + f_next[9] = -T{0.166666666666667}*x0*(rho*x22 + T{6.00000000000000}*f_curr[9]) + f_curr[9]; + f_next[10] = x24*(rho*(x28 + x37 + x7 + T{2.00000000000000}) - T{36.0000000000000}*f_curr[10]) + f_curr[10]; + f_next[11] = -x1*(rho*(x16 + x33 - x36) + T{72.0000000000000}*f_curr[11]) + f_curr[11]; + f_next[12] = -x24*(rho*(x14 + x16 - x34 + x38) + T{36.0000000000000}*f_curr[12]) + f_curr[12]; + f_next[13] = -x1*(rho*(x16 + x40 - T{9.00000000000000}*x39*x39) + T{72.0000000000000}*f_curr[13]) + f_curr[13]; + f_next[14] = -x1*(rho*(x32 + x43 - T{9.00000000000000}*x42*x42) + T{72.0000000000000}*f_curr[14]) + f_curr[14]; + f_next[15] = -x1*(rho*(-x27 + x33 + x4) + T{72.0000000000000}*f_curr[15]) + f_curr[15]; + f_next[16] = -x24*(rho*(-x25 + x38 + x4 + x6) + T{36.0000000000000}*f_curr[16]) + f_curr[16]; + f_next[17] = -x1*(rho*(x4 + x40 - T{9.00000000000000}*x44*x44) + T{72.0000000000000}*f_curr[17]) + f_curr[17]; + f_next[18] = -x1*(rho*(x16 - x3 + x43) + T{72.0000000000000}*f_curr[18]) + f_curr[18]; +} + +}; diff --git a/tangle/LLBM/kernel/equilibrium_density_wall.h b/tangle/LLBM/kernel/equilibrium_density_wall.h new file mode 100644 index 0000000..4f757f9 --- /dev/null +++ b/tangle/LLBM/kernel/equilibrium_density_wall.h @@ -0,0 +1,226 @@ +#pragma once +#include +#include +#include + +struct EquilibriumDensityWallO { + +using call_tag = tag::call_by_cell_id; + +template +__device__ static void apply(descriptor::D2Q9, S f_curr[9], S f_next[9], std::size_t gid, T rho_w, WallNormal<1,0>) { + T u = (rho_w - T{2.00000000000000}*f_curr[0] - T{2.00000000000000}*f_curr[1] - T{2.00000000000000}*f_curr[2] - f_curr[3] - f_curr[4] - f_curr[5])/rho_w; + T rho = rho_w; + T u_0 = u; + T u_1 = 0.; + T e0 = T{0.0277777777777778}*rho; + T e1 = T{3.00000000000000}*u_1; + T e2 = T{3.00000000000000}*u_0; + T e3 = u_0 + u_1; + T e4 = T{4.50000000000000}*(e3*e3); + T e5 = u_1*u_1; + T e6 = T{1.50000000000000}*e5; + T e7 = u_0*u_0; + T e8 = T{1.50000000000000}*e7; + T e9 = e8 + T{-1.00000000000000}; + T e10 = e6 + e9; + T e11 = T{0.111111111111111}*rho; + T e12 = -e2; + T e13 = T{1.00000000000000} - e6; + T e14 = e13 + T{3.00000000000000}*e7; + T e15 = -e8; + T e16 = e1 + e15; + T e17 = u_0 - u_1; + T e18 = e13 + T{4.50000000000000}*(e17*e17); + T e19 = T{3.00000000000000}*e5; + f_next[0] = -e0*(e1 + e10 + e2 - e4); + f_next[1] = e11*(e12 + e14); + f_next[2] = e0*(e12 + e16 + e18); + f_next[3] = -e11*(e1 - e19 + e9); + f_next[4] = -T{0.444444444444444}*e10*rho; + f_next[5] = e11*(e16 + e19 + T{1.00000000000000}); + f_next[6] = e0*(-e1 + e15 + e18 + e2); + f_next[7] = e11*(e14 + e2); + f_next[8] = e0*(e13 + e16 + e2 + e4); + +} + +template +__device__ static void apply(descriptor::D2Q9, S f_curr[9], S f_next[9], std::size_t gid, T rho_w, WallNormal<-1,0>) { + T u = (-rho_w + f_curr[3] + f_curr[4] + f_curr[5] + T{2.00000000000000}*f_curr[6] + T{2.00000000000000}*f_curr[7] + T{2.00000000000000}*f_curr[8])/rho_w; + T rho = rho_w; + T u_0 = u; + T u_1 = 0.; + T e0 = T{0.0277777777777778}*rho; + T e1 = T{3.00000000000000}*u_1; + T e2 = T{3.00000000000000}*u_0; + T e3 = u_0 + u_1; + T e4 = T{4.50000000000000}*(e3*e3); + T e5 = u_1*u_1; + T e6 = T{1.50000000000000}*e5; + T e7 = u_0*u_0; + T e8 = T{1.50000000000000}*e7; + T e9 = e8 + T{-1.00000000000000}; + T e10 = e6 + e9; + T e11 = T{0.111111111111111}*rho; + T e12 = -e2; + T e13 = T{1.00000000000000} - e6; + T e14 = e13 + T{3.00000000000000}*e7; + T e15 = -e8; + T e16 = e1 + e15; + T e17 = u_0 - u_1; + T e18 = e13 + T{4.50000000000000}*(e17*e17); + T e19 = T{3.00000000000000}*e5; + f_next[0] = -e0*(e1 + e10 + e2 - e4); + f_next[1] = e11*(e12 + e14); + f_next[2] = e0*(e12 + e16 + e18); + f_next[3] = -e11*(e1 - e19 + e9); + f_next[4] = -T{0.444444444444444}*e10*rho; + f_next[5] = e11*(e16 + e19 + T{1.00000000000000}); + f_next[6] = e0*(-e1 + e15 + e18 + e2); + f_next[7] = e11*(e14 + e2); + f_next[8] = e0*(e13 + e16 + e2 + e4); + +} + +template +__device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std::size_t gid, T rho_w, WallNormal<1,0,0>) { + T u = (rho_w - f_curr[0] - T{2.00000000000000}*f_curr[11] - f_curr[12] - f_curr[14] - T{2.00000000000000}*f_curr[15] - f_curr[16] - f_curr[18] - T{2.00000000000000}*f_curr[1] - f_curr[2] - f_curr[4] - T{2.00000000000000}*f_curr[5] - f_curr[6] - T{2.00000000000000}*f_curr[8] - f_curr[9])/rho_w; + T rho = rho_w; + T u_0 = u; + T u_1 = 0.; + T u_2 = 0.; + T e0 = T{0.0277777777777778}*rho; + T e1 = u_1 + u_2; + T e2 = T{4.50000000000000}*(e1*e1); + T e3 = T{3.00000000000000}*u_2; + T e4 = u_1*u_1; + T e5 = T{1.50000000000000}*e4; + T e6 = -e5; + T e7 = u_0*u_0; + T e8 = T{1.50000000000000}*e7; + T e9 = T{1.00000000000000} - e8; + T e10 = e6 + e9; + T e11 = e10 + e3; + T e12 = T{3.00000000000000}*u_1; + T e13 = u_2*u_2; + T e14 = T{1.50000000000000}*e13; + T e15 = -e14; + T e16 = e12 + e15; + T e17 = T{3.00000000000000}*u_0; + T e18 = -u_2; + T e19 = e18 + u_0; + T e20 = -T{4.50000000000000}*e19*e19; + T e21 = e14 + e5 + T{-1.00000000000000}; + T e22 = e21 + e8; + T e23 = e22 - e3; + T e24 = T{0.0555555555555556}*rho; + T e25 = T{3.00000000000000}*e13; + T e26 = u_0 + u_2; + T e27 = T{4.50000000000000}*(e26*e26); + T e28 = e15 + e17; + T e29 = e18 + u_1; + T e30 = -T{4.50000000000000}*e29*e29; + T e31 = -e12; + T e32 = u_0 - u_1; + T e33 = -T{4.50000000000000}*e32*e32; + T e34 = e17 + e22; + T e35 = T{3.00000000000000}*e4; + T e36 = u_0 + u_1; + T e37 = T{4.50000000000000}*(e36*e36); + T e38 = T{3.00000000000000}*e7; + T e39 = e8 + T{-1.00000000000000}; + T e40 = -e17 + e22; + T e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +template +__device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std::size_t gid, T rho_w, WallNormal<-1,0,0>) { + T u = (-rho_w + f_curr[0] + T{2.00000000000000}*f_curr[10] + f_curr[12] + T{2.00000000000000}*f_curr[13] + f_curr[14] + f_curr[16] + T{2.00000000000000}*f_curr[17] + f_curr[18] + f_curr[2] + T{2.00000000000000}*f_curr[3] + f_curr[4] + f_curr[6] + T{2.00000000000000}*f_curr[7] + f_curr[9])/rho_w; + T rho = rho_w; + T u_0 = u; + T u_1 = 0.; + T u_2 = 0.; + T e0 = T{0.0277777777777778}*rho; + T e1 = u_1 + u_2; + T e2 = T{4.50000000000000}*(e1*e1); + T e3 = T{3.00000000000000}*u_2; + T e4 = u_1*u_1; + T e5 = T{1.50000000000000}*e4; + T e6 = -e5; + T e7 = u_0*u_0; + T e8 = T{1.50000000000000}*e7; + T e9 = T{1.00000000000000} - e8; + T e10 = e6 + e9; + T e11 = e10 + e3; + T e12 = T{3.00000000000000}*u_1; + T e13 = u_2*u_2; + T e14 = T{1.50000000000000}*e13; + T e15 = -e14; + T e16 = e12 + e15; + T e17 = T{3.00000000000000}*u_0; + T e18 = -u_2; + T e19 = e18 + u_0; + T e20 = -T{4.50000000000000}*e19*e19; + T e21 = e14 + e5 + T{-1.00000000000000}; + T e22 = e21 + e8; + T e23 = e22 - e3; + T e24 = T{0.0555555555555556}*rho; + T e25 = T{3.00000000000000}*e13; + T e26 = u_0 + u_2; + T e27 = T{4.50000000000000}*(e26*e26); + T e28 = e15 + e17; + T e29 = e18 + u_1; + T e30 = -T{4.50000000000000}*e29*e29; + T e31 = -e12; + T e32 = u_0 - u_1; + T e33 = -T{4.50000000000000}*e32*e32; + T e34 = e17 + e22; + T e35 = T{3.00000000000000}*e4; + T e36 = u_0 + u_1; + T e37 = T{4.50000000000000}*(e36*e36); + T e38 = T{3.00000000000000}*e7; + T e39 = e8 + T{-1.00000000000000}; + T e40 = -e17 + e22; + T e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +}; diff --git a/tangle/LLBM/kernel/equilibrium_velocity_wall.h b/tangle/LLBM/kernel/equilibrium_velocity_wall.h new file mode 100644 index 0000000..23354dc --- /dev/null +++ b/tangle/LLBM/kernel/equilibrium_velocity_wall.h @@ -0,0 +1,222 @@ +#pragma once +#include +#include +#include + +struct EquilibriumVelocityWallO { + +using call_tag = tag::call_by_cell_id; + +template +__device__ static void apply(descriptor::D2Q9, S f_curr[9], S f_next[9], std::size_t gid, T u_w, WallNormal<1,0>) { + T rho = -(T{2.00000000000000}*f_curr[0] + T{2.00000000000000}*f_curr[1] + T{2.00000000000000}*f_curr[2] + f_curr[3] + f_curr[4] + f_curr[5])/(u_w + T{-1.00000000000000}); + T u_0 = u_w; + T u_1 = 0.; + T e0 = T{0.0277777777777778}*rho; + T e1 = T{3.00000000000000}*u_1; + T e2 = T{3.00000000000000}*u_0; + T e3 = u_0 + u_1; + T e4 = T{4.50000000000000}*(e3*e3); + T e5 = u_1*u_1; + T e6 = T{1.50000000000000}*e5; + T e7 = u_0*u_0; + T e8 = T{1.50000000000000}*e7; + T e9 = e8 + T{-1.00000000000000}; + T e10 = e6 + e9; + T e11 = T{0.111111111111111}*rho; + T e12 = -e2; + T e13 = T{1.00000000000000} - e6; + T e14 = e13 + T{3.00000000000000}*e7; + T e15 = -e8; + T e16 = e1 + e15; + T e17 = u_0 - u_1; + T e18 = e13 + T{4.50000000000000}*(e17*e17); + T e19 = T{3.00000000000000}*e5; + f_next[0] = -e0*(e1 + e10 + e2 - e4); + f_next[1] = e11*(e12 + e14); + f_next[2] = e0*(e12 + e16 + e18); + f_next[3] = -e11*(e1 - e19 + e9); + f_next[4] = -T{0.444444444444444}*e10*rho; + f_next[5] = e11*(e16 + e19 + T{1.00000000000000}); + f_next[6] = e0*(-e1 + e15 + e18 + e2); + f_next[7] = e11*(e14 + e2); + f_next[8] = e0*(e13 + e16 + e2 + e4); + +} + +template +__device__ static void apply(descriptor::D2Q9, S f_curr[9], S f_next[9], std::size_t gid, T u_w, WallNormal<-1,0>) { + T rho = (f_curr[3] + f_curr[4] + f_curr[5] + T{2.00000000000000}*f_curr[6] + T{2.00000000000000}*f_curr[7] + T{2.00000000000000}*f_curr[8])/(u_w + T{1.00000000000000}); + T u_0 = u_w; + T u_1 = 0; + T e0 = T{0.0277777777777778}*rho; + T e1 = T{3.00000000000000}*u_1; + T e2 = T{3.00000000000000}*u_0; + T e3 = u_0 + u_1; + T e4 = T{4.50000000000000}*(e3*e3); + T e5 = u_1*u_1; + T e6 = T{1.50000000000000}*e5; + T e7 = u_0*u_0; + T e8 = T{1.50000000000000}*e7; + T e9 = e8 + T{-1.00000000000000}; + T e10 = e6 + e9; + T e11 = T{0.111111111111111}*rho; + T e12 = -e2; + T e13 = T{1.00000000000000} - e6; + T e14 = e13 + T{3.00000000000000}*e7; + T e15 = -e8; + T e16 = e1 + e15; + T e17 = u_0 - u_1; + T e18 = e13 + T{4.50000000000000}*(e17*e17); + T e19 = T{3.00000000000000}*e5; + f_next[0] = -e0*(e1 + e10 + e2 - e4); + f_next[1] = e11*(e12 + e14); + f_next[2] = e0*(e12 + e16 + e18); + f_next[3] = -e11*(e1 - e19 + e9); + f_next[4] = -T{0.444444444444444}*e10*rho; + f_next[5] = e11*(e16 + e19 + T{1.00000000000000}); + f_next[6] = e0*(-e1 + e15 + e18 + e2); + f_next[7] = e11*(e14 + e2); + f_next[8] = e0*(e13 + e16 + e2 + e4); + +} + +template +__device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std::size_t gid, T u_w, WallNormal<1,0,0>) { + T rho = -(f_curr[0] + T{2.00000000000000}*f_curr[11] + f_curr[12] + f_curr[14] + T{2.00000000000000}*f_curr[15] + f_curr[16] + f_curr[18] + T{2.00000000000000}*f_curr[1] + f_curr[2] + f_curr[4] + T{2.00000000000000}*f_curr[5] + f_curr[6] + T{2.00000000000000}*f_curr[8] + f_curr[9])/(u_w + T{-1.00000000000000}); + T u_0 = u_w; + T u_1 = 0; + T u_2 = 0; + T e0 = T{0.0277777777777778}*rho; + T e1 = u_1 + u_2; + T e2 = T{4.50000000000000}*(e1*e1); + T e3 = T{3.00000000000000}*u_2; + T e4 = u_1*u_1; + T e5 = T{1.50000000000000}*e4; + T e6 = -e5; + T e7 = u_0*u_0; + T e8 = T{1.50000000000000}*e7; + T e9 = T{1.00000000000000} - e8; + T e10 = e6 + e9; + T e11 = e10 + e3; + T e12 = T{3.00000000000000}*u_1; + T e13 = u_2*u_2; + T e14 = T{1.50000000000000}*e13; + T e15 = -e14; + T e16 = e12 + e15; + T e17 = T{3.00000000000000}*u_0; + T e18 = -u_2; + T e19 = e18 + u_0; + T e20 = -T{4.50000000000000}*e19*e19; + T e21 = e14 + e5 + T{-1.00000000000000}; + T e22 = e21 + e8; + T e23 = e22 - e3; + T e24 = T{0.0555555555555556}*rho; + T e25 = T{3.00000000000000}*e13; + T e26 = u_0 + u_2; + T e27 = T{4.50000000000000}*(e26*e26); + T e28 = e15 + e17; + T e29 = e18 + u_1; + T e30 = -T{4.50000000000000}*e29*e29; + T e31 = -e12; + T e32 = u_0 - u_1; + T e33 = -T{4.50000000000000}*e32*e32; + T e34 = e17 + e22; + T e35 = T{3.00000000000000}*e4; + T e36 = u_0 + u_1; + T e37 = T{4.50000000000000}*(e36*e36); + T e38 = T{3.00000000000000}*e7; + T e39 = e8 + T{-1.00000000000000}; + T e40 = -e17 + e22; + T e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +template +__device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std::size_t gid, T u_w, WallNormal<-1,0,0>) { + T rho = (f_curr[0] + T{2.00000000000000}*f_curr[10] + f_curr[12] + T{2.00000000000000}*f_curr[13] + f_curr[14] + f_curr[16] + T{2.00000000000000}*f_curr[17] + f_curr[18] + f_curr[2] + T{2.00000000000000}*f_curr[3] + f_curr[4] + f_curr[6] + T{2.00000000000000}*f_curr[7] + f_curr[9])/(u_w + T{1.00000000000000}); + T u_0 = u_w; + T u_1 = 0; + T u_2 = 0; + T e0 = T{0.0277777777777778}*rho; + T e1 = u_1 + u_2; + T e2 = T{4.50000000000000}*(e1*e1); + T e3 = T{3.00000000000000}*u_2; + T e4 = u_1*u_1; + T e5 = T{1.50000000000000}*e4; + T e6 = -e5; + T e7 = u_0*u_0; + T e8 = T{1.50000000000000}*e7; + T e9 = T{1.00000000000000} - e8; + T e10 = e6 + e9; + T e11 = e10 + e3; + T e12 = T{3.00000000000000}*u_1; + T e13 = u_2*u_2; + T e14 = T{1.50000000000000}*e13; + T e15 = -e14; + T e16 = e12 + e15; + T e17 = T{3.00000000000000}*u_0; + T e18 = -u_2; + T e19 = e18 + u_0; + T e20 = -T{4.50000000000000}*e19*e19; + T e21 = e14 + e5 + T{-1.00000000000000}; + T e22 = e21 + e8; + T e23 = e22 - e3; + T e24 = T{0.0555555555555556}*rho; + T e25 = T{3.00000000000000}*e13; + T e26 = u_0 + u_2; + T e27 = T{4.50000000000000}*(e26*e26); + T e28 = e15 + e17; + T e29 = e18 + u_1; + T e30 = -T{4.50000000000000}*e29*e29; + T e31 = -e12; + T e32 = u_0 - u_1; + T e33 = -T{4.50000000000000}*e32*e32; + T e34 = e17 + e22; + T e35 = T{3.00000000000000}*e4; + T e36 = u_0 + u_1; + T e37 = T{4.50000000000000}*(e36*e36); + T e38 = T{3.00000000000000}*e7; + T e39 = e8 + T{-1.00000000000000}; + T e40 = -e17 + e22; + T e41 = e22 + e3; + f_next[0] = e0*(e11 + e16 + e2); + f_next[1] = -e0*(e17 + e20 + e23); + f_next[2] = e24*(e11 + e25); + f_next[3] = e0*(e11 + e27 + e28); + f_next[4] = -e0*(e12 + e23 + e30); + f_next[5] = -e0*(e31 + e33 + e34); + f_next[6] = e24*(e16 + e35 + e9); + f_next[7] = e0*(e10 + e16 + e17 + e37); + f_next[8] = -e24*(e17 + e21 - e38); + f_next[9] = -T{0.333333333333333}*e22*rho; + f_next[10] = e24*(e28 + e38 + e6 + T{1.00000000000000}); + f_next[11] = -e0*(e12 + e34 - e37); + f_next[12] = -e24*(e12 + e14 - e35 + e39); + f_next[13] = -e0*(e12 + e33 + e40); + f_next[14] = -e0*(e30 + e31 + e41); + f_next[15] = -e0*(-e27 + e3 + e34); + f_next[16] = -e24*(-e25 + e3 + e39 + e5); + f_next[17] = -e0*(e20 + e3 + e40); + f_next[18] = -e0*(e12 - e2 + e41); +} + +}; diff --git a/tangle/LLBM/kernel/executor.h b/tangle/LLBM/kernel/executor.h new file mode 100644 index 0000000..942918d --- /dev/null +++ b/tangle/LLBM/kernel/executor.h @@ -0,0 +1,171 @@ +#pragma once + +#include + +namespace kernel { + +template +__global__ void call_operator( + LatticeView lattice + , std::size_t* cells + , std::size_t cell_count + , ARGS... args +) { + const std::size_t index = blockIdx.x * blockDim.x + threadIdx.x; + if (!(index < cell_count)) { + return; + } + const std::size_t gid = cells[index]; + + S f_curr[DESCRIPTOR::q]; + S f_next[DESCRIPTOR::q]; + S* preshifted_f[DESCRIPTOR::q]; + for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { + preshifted_f[iPop] = lattice.pop(iPop, gid); + f_curr[iPop] = *preshifted_f[iPop]; + } + OPERATOR::template apply(DESCRIPTOR(), f_curr, f_next, gid, std::forward(args)...); + for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { + *preshifted_f[iPop] = f_next[iPop]; + } +} + +template +__global__ void call_operator( + LatticeView lattice + , bool* mask + , ARGS... args +) { + const std::size_t gid = blockIdx.x * blockDim.x + threadIdx.x; + if (!(gid < lattice.cuboid.volume) || !mask[gid]) { + return; + } + + S f_curr[DESCRIPTOR::q]; + S f_next[DESCRIPTOR::q]; + S* preshifted_f[DESCRIPTOR::q]; + for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { + preshifted_f[iPop] = lattice.pop(iPop, gid); + f_curr[iPop] = *preshifted_f[iPop]; + } + OPERATOR::template apply(DESCRIPTOR(), f_curr, f_next, gid, std::forward(args)...); + for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { + *preshifted_f[iPop] = f_next[iPop]; + } +} + +template +__global__ void call_functor( + LatticeView lattice + , bool* mask + , ARGS... args +) { + const std::size_t gid = blockIdx.x * blockDim.x + threadIdx.x; + if (!(gid < lattice.cuboid.volume) || !mask[gid]) { + return; + } + + S f_curr[DESCRIPTOR::q]; + S* preshifted_f[DESCRIPTOR::q]; +