From 8691f6f7306914d8fc9d5afc8a347ebf5ce0a7d7 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Mon, 23 Aug 2021 22:04:50 +0200 Subject: Replace InitializeO by memset --- tangle/LLBM/kernel/collect_q_criterion.h | 2 +- tangle/LLBM/kernel/initialize.h | 44 -------------------------------- tangle/LLBM/lattice.h | 1 - tangle/LLBM/propagate.h | 8 ++++-- tangle/benchmark-ldc.cu | 8 ------ tangle/channel-with-sphere.cu | 7 ----- tangle/ldc-2d.cu | 5 ---- tangle/ldc-3d.cu | 4 --- tangle/magnus.cu | 5 ---- tangle/nozzle.cu | 5 ---- tangle/taylor-couette.cu | 3 --- 11 files changed, 7 insertions(+), 85 deletions(-) delete mode 100644 tangle/LLBM/kernel/initialize.h (limited to 'tangle') diff --git a/tangle/LLBM/kernel/collect_q_criterion.h b/tangle/LLBM/kernel/collect_q_criterion.h index fa19dc7..6f770e5 100644 --- a/tangle/LLBM/kernel/collect_q_criterion.h +++ b/tangle/LLBM/kernel/collect_q_criterion.h @@ -14,7 +14,7 @@ __device__ static void apply( , std::size_t iX , std::size_t iY , std::size_t iZ - , T* cell_rho + , T* cell_rho , T* cell_u , T* cell_curl_norm , cudaSurfaceObject_t surface diff --git a/tangle/LLBM/kernel/initialize.h b/tangle/LLBM/kernel/initialize.h deleted file mode 100644 index 221b9ad..0000000 --- a/tangle/LLBM/kernel/initialize.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once -#include - -struct InitializeO { - -using call_tag = tag::call_by_cell_id; - -template -__device__ static void apply(descriptor::D2Q9, S f_curr[9], S f_next[9], std::size_t gid) { - f_next[0] = T{0.0277777777777778}; - f_next[1] = T{0.111111111111111}; - f_next[2] = T{0.0277777777777778}; - f_next[3] = T{0.111111111111111}; - f_next[4] = T{0.444444444444444}; - f_next[5] = T{0.111111111111111}; - f_next[6] = T{0.0277777777777778}; - f_next[7] = T{0.111111111111111}; - f_next[8] = T{0.0277777777777778}; -} - -template -__device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std::size_t gid) { - f_next[0] = T{0.0277777777777778}; - f_next[1] = T{0.0277777777777778}; - f_next[2] = T{0.0555555555555556}; - f_next[3] = T{0.0277777777777778}; - f_next[4] = T{0.0277777777777778}; - f_next[5] = T{0.0277777777777778}; - f_next[6] = T{0.0555555555555556}; - f_next[7] = T{0.0277777777777778}; - f_next[8] = T{0.0555555555555556}; - f_next[9] = T{0.333333333333333}; - f_next[10] = T{0.0555555555555556}; - f_next[11] = T{0.0277777777777778}; - f_next[12] = T{0.0555555555555556}; - f_next[13] = T{0.0277777777777778}; - f_next[14] = T{0.0277777777777778}; - f_next[15] = T{0.0277777777777778}; - f_next[16] = T{0.0555555555555556}; - f_next[17] = T{0.0277777777777778}; - f_next[18] = T{0.0277777777777778}; -} - -}; diff --git a/tangle/LLBM/lattice.h b/tangle/LLBM/lattice.h index 8ba1d66..7157c78 100644 --- a/tangle/LLBM/lattice.h +++ b/tangle/LLBM/lattice.h @@ -5,7 +5,6 @@ #include "operator.h" #include "propagate.h" -#include "kernel/initialize.h" #include "kernel/executor.h" template diff --git a/tangle/LLBM/propagate.h b/tangle/LLBM/propagate.h index d63ccd8..acb1d6c 100644 --- a/tangle/LLBM/propagate.h +++ b/tangle/LLBM/propagate.h @@ -79,7 +79,7 @@ CyclicPopulationBuffer::CyclicPopulationBuffer( for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { // per-population handle until cuMemMap accepts non-zero offset - cuMemCreate(&_handle[iPop], _volume, &_prop, 0); + cuMemCreate(&_handle[iPop], _volume, &_prop, 0); cuMemMap(_ptr + iPop * 2 * _volume, _volume, 0, _handle[iPop], 0); cuMemMap(_ptr + iPop * 2 * _volume + _volume, _volume, 0, _handle[iPop], 0); } @@ -88,7 +88,11 @@ CyclicPopulationBuffer::CyclicPopulationBuffer( _access.location.id = 0; _access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; cuMemSetAccess(_ptr, 2 * _volume * DESCRIPTOR::q, &_access, 1); - cuMemsetD8(_ptr, 0, 2 * _volume * DESCRIPTOR::q); + + for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { + float eq = descriptor::weight(iPop); + cuMemsetD32(_ptr + iPop * 2 * _volume, *reinterpret_cast(&eq), 2 * (_volume / sizeof(S))); + } for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { _base[iPop] = device() + iPop * 2 * (_volume / sizeof(S)); diff --git a/tangle/benchmark-ldc.cu b/tangle/benchmark-ldc.cu index 2443afe..4de0ae5 100644 --- a/tangle/benchmark-ldc.cu +++ b/tangle/benchmark-ldc.cu @@ -30,14 +30,6 @@ void simulate(descriptor::Cuboid cuboid, std::size_t nStep) { auto box_mask = materials.mask_of_material(2); auto lid_mask = materials.mask_of_material(3); - auto bulk_cells = materials.list_of_material(1); - auto box_cells = materials.list_of_material(2); - auto lid_cells = materials.list_of_material(3); - - lattice.template apply(bulk_cells); - lattice.template apply(box_cells); - lattice.template apply(lid_cells); - cudaDeviceSynchronize(); for (std::size_t iStep=0; iStep < 100; ++iStep) { diff --git a/tangle/channel-with-sphere.cu b/tangle/channel-with-sphere.cu index 401f0c9..29cc7de 100644 --- a/tangle/channel-with-sphere.cu +++ b/tangle/channel-with-sphere.cu @@ -55,13 +55,6 @@ auto inflow_mask = materials.mask_of_material(4); auto outflow_mask = materials.mask_of_material(5); auto edge_mask = materials.mask_of_material(6); -lattice.apply(Operator(InitializeO(), bulk_mask), - Operator(InitializeO(), wall_mask_z), - Operator(InitializeO(), wall_mask_y), - Operator(InitializeO(), inflow_mask), - Operator(InitializeO(), outflow_mask), - Operator(InitializeO(), edge_mask)); - cudaDeviceSynchronize(); VolumetricExample renderer(cuboid); diff --git a/tangle/ldc-2d.cu b/tangle/ldc-2d.cu index 8989374..acba98f 100644 --- a/tangle/ldc-2d.cu +++ b/tangle/ldc-2d.cu @@ -32,11 +32,6 @@ auto bulk_mask = materials.mask_of_material(1); auto wall_mask = materials.mask_of_material(2); auto lid_mask = materials.mask_of_material(3); -lattice.apply(Operator(InitializeO(), bulk_mask), - Operator(InitializeO(), wall_mask), - Operator(InitializeO(), lid_mask)); -cudaDeviceSynchronize(); - const float tau = 0.51; const float u_lid = 0.05; diff --git a/tangle/ldc-3d.cu b/tangle/ldc-3d.cu index ece1234..e9b42f2 100644 --- a/tangle/ldc-3d.cu +++ b/tangle/ldc-3d.cu @@ -34,10 +34,6 @@ auto bulk_mask = materials.mask_of_material(1); auto wall_mask = materials.mask_of_material(2); auto lid_mask = materials.mask_of_material(3); -lattice.apply(Operator(InitializeO(), bulk_mask), - Operator(InitializeO(), wall_mask), - Operator(InitializeO(), lid_mask)); - cudaDeviceSynchronize(); auto none = [] __device__ (float3) -> float { return 1; }; diff --git a/tangle/magnus.cu b/tangle/magnus.cu index 5800cd8..08a4515 100644 --- a/tangle/magnus.cu +++ b/tangle/magnus.cu @@ -64,11 +64,6 @@ auto inflow_mask = materials.mask_of_material(3); auto outflow_mask = materials.mask_of_material(4); auto edge_mask = materials.mask_of_material(5); -lattice.apply(Operator(InitializeO(), bulk_mask), - Operator(InitializeO(), wall_mask), - Operator(InitializeO(), inflow_mask), - Operator(InitializeO(), outflow_mask), - Operator(InitializeO(), edge_mask)); cudaDeviceSynchronize(); RenderWindow window("Magnus"); diff --git a/tangle/nozzle.cu b/tangle/nozzle.cu index 5a1b1f3..03c18f9 100644 --- a/tangle/nozzle.cu +++ b/tangle/nozzle.cu @@ -46,11 +46,6 @@ auto boundary_mask = materials.mask_of_material(2); auto inflow_mask = materials.mask_of_material(3); auto outflow_mask = materials.mask_of_material(4); -lattice.apply(Operator(InitializeO(), bulk_mask), - Operator(InitializeO(), boundary_mask), - Operator(InitializeO(), inflow_mask), - Operator(InitializeO(), outflow_mask)); - cudaDeviceSynchronize(); VolumetricExample renderer(cuboid); diff --git a/tangle/taylor-couette.cu b/tangle/taylor-couette.cu index 48b0d87..2e69bfb 100644 --- a/tangle/taylor-couette.cu +++ b/tangle/taylor-couette.cu @@ -55,9 +55,6 @@ auto bulk_list = materials.list_of_material(1); auto wall_mask = materials.mask_of_material(2); auto wall_list = materials.list_of_material(2); -lattice.apply(bulk_list); -lattice.apply(wall_list); - cudaDeviceSynchronize(); VolumetricExample renderer(cuboid); -- cgit v1.2.3