diff options
Diffstat (limited to 'tangle/LLBM')
-rw-r--r-- | tangle/LLBM/kernel/collect_q_criterion.h | 2 | ||||
-rw-r--r-- | tangle/LLBM/kernel/initialize.h | 44 | ||||
-rw-r--r-- | tangle/LLBM/lattice.h | 1 | ||||
-rw-r--r-- | tangle/LLBM/propagate.h | 8 |
4 files changed, 7 insertions, 48 deletions
diff --git a/tangle/LLBM/kernel/collect_q_criterion.h b/tangle/LLBM/kernel/collect_q_criterion.h index fa19dc7..6f770e5 100644 --- a/tangle/LLBM/kernel/collect_q_criterion.h +++ b/tangle/LLBM/kernel/collect_q_criterion.h @@ -14,7 +14,7 @@ __device__ static void apply( , std::size_t iX , std::size_t iY , std::size_t iZ - , T* cell_rho + , T* cell_rho , T* cell_u , T* cell_curl_norm , cudaSurfaceObject_t surface diff --git a/tangle/LLBM/kernel/initialize.h b/tangle/LLBM/kernel/initialize.h deleted file mode 100644 index 221b9ad..0000000 --- a/tangle/LLBM/kernel/initialize.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once -#include <LLBM/call_tag.h> - -struct InitializeO { - -using call_tag = tag::call_by_cell_id; - -template <typename T, typename S> -__device__ static void apply(descriptor::D2Q9, S f_curr[9], S f_next[9], std::size_t gid) { - f_next[0] = T{0.0277777777777778}; - f_next[1] = T{0.111111111111111}; - f_next[2] = T{0.0277777777777778}; - f_next[3] = T{0.111111111111111}; - f_next[4] = T{0.444444444444444}; - f_next[5] = T{0.111111111111111}; - f_next[6] = T{0.0277777777777778}; - f_next[7] = T{0.111111111111111}; - f_next[8] = T{0.0277777777777778}; -} - -template <typename T, typename S> -__device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std::size_t gid) { - f_next[0] = T{0.0277777777777778}; - f_next[1] = T{0.0277777777777778}; - f_next[2] = T{0.0555555555555556}; - f_next[3] = T{0.0277777777777778}; - f_next[4] = T{0.0277777777777778}; - f_next[5] = T{0.0277777777777778}; - f_next[6] = T{0.0555555555555556}; - f_next[7] = T{0.0277777777777778}; - f_next[8] = T{0.0555555555555556}; - f_next[9] = T{0.333333333333333}; - f_next[10] = T{0.0555555555555556}; - f_next[11] = T{0.0277777777777778}; - f_next[12] = T{0.0555555555555556}; - f_next[13] = T{0.0277777777777778}; - f_next[14] = T{0.0277777777777778}; - f_next[15] = T{0.0277777777777778}; - f_next[16] = T{0.0555555555555556}; - f_next[17] = T{0.0277777777777778}; - f_next[18] = T{0.0277777777777778}; -} - -}; diff --git a/tangle/LLBM/lattice.h b/tangle/LLBM/lattice.h index 8ba1d66..7157c78 100644 --- a/tangle/LLBM/lattice.h +++ b/tangle/LLBM/lattice.h @@ -5,7 +5,6 @@ #include "operator.h" #include "propagate.h" -#include "kernel/initialize.h" #include "kernel/executor.h" template <typename DESCRIPTOR, typename T, typename S=T> diff --git a/tangle/LLBM/propagate.h b/tangle/LLBM/propagate.h index d63ccd8..acb1d6c 100644 --- a/tangle/LLBM/propagate.h +++ b/tangle/LLBM/propagate.h @@ -79,7 +79,7 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer( for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { // per-population handle until cuMemMap accepts non-zero offset - cuMemCreate(&_handle[iPop], _volume, &_prop, 0); + cuMemCreate(&_handle[iPop], _volume, &_prop, 0); cuMemMap(_ptr + iPop * 2 * _volume, _volume, 0, _handle[iPop], 0); cuMemMap(_ptr + iPop * 2 * _volume + _volume, _volume, 0, _handle[iPop], 0); } @@ -88,7 +88,11 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer( _access.location.id = 0; _access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; cuMemSetAccess(_ptr, 2 * _volume * DESCRIPTOR::q, &_access, 1); - cuMemsetD8(_ptr, 0, 2 * _volume * DESCRIPTOR::q); + + for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { + float eq = descriptor::weight<DESCRIPTOR>(iPop); + cuMemsetD32(_ptr + iPop * 2 * _volume, *reinterpret_cast<int*>(&eq), 2 * (_volume / sizeof(S))); + } for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { _base[iPop] = device() + iPop * 2 * (_volume / sizeof(S)); |