diff options
Diffstat (limited to 'src/lattice.h')
-rw-r--r-- | src/lattice.h | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/src/lattice.h b/src/lattice.h new file mode 100644 index 0000000..bbebab4 --- /dev/null +++ b/src/lattice.h @@ -0,0 +1,267 @@ +#pragma once + +#include <memory> +#include <vector> + +#ifdef ENABLE_AVX512 +#include "simd/512.h" +#else +#include "simd/256.h" +#endif + +#include <omp.h> + +#include "cuboid.h" +#include "population.h" +#include "propagation.h" +#include "operator.h" +#include "export.h" + +#include "LLBM/initialize.h" +#include "LLBM/collect_moments.h" + +template <typename T> +class LatticeMask { +private: + const std::size_t _size; + + std::unique_ptr<typename simd::Mask<T>::storage_t[]> _simd_mask; + std::unique_ptr<bool[]> _base_mask; + +public: + LatticeMask(std::size_t nCells): + _size(((nCells + simd::Pack<T>::size - 1) / simd::Mask<T>::storage_size + 1) * simd::Mask<T>::storage_size), + _simd_mask(new typename simd::Mask<T>::storage_t[_size / simd::Mask<T>::storage_size] { }), + _base_mask(new bool[_size] { }) + { } + + void set(std::size_t iCell, bool state) { + _base_mask[iCell] = state; + } + + bool get(std::size_t iCell) const { + return _base_mask[iCell]; + } + + void serialize() { + for (std::size_t i=0; i < _size / simd::Mask<T>::storage_size; ++i) { + _simd_mask[i] = simd::Mask<T>::encode(_base_mask.get() + i*simd::Mask<T>::storage_size); + } + } + + typename simd::Mask<T>::storage_t* data() { + return _simd_mask.get(); + } + +}; + +template <concepts::PropagatablePopulationBuffer Buffer> +class Lattice { +private: + using value_t = typename Buffer::value_t; + using pack_t = simd::Pack<value_t>; + using mask_t = simd::Mask<value_t>; + + Cuboid _cuboid; + Buffer _population; + + template <typename COP> + void apply(COP cop, + std::size_t iCell, + pack_t f_curr[population::q], + pack_t f_next[population::q]) { + if (mask_t m = {cop.mask.data(), iCell}) { + cop(f_curr, f_next); + + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + simd::maskstore(get(iPop, stage::post_collision()) + iCell, m, f_next[iPop]); + } + } + } + +public: + Lattice(Cuboid cuboid): + _cuboid(cuboid), + _population(cuboid) + { + apply<InitializeO>(); + } + + template <typename STAGE> + value_t* get(unsigned iPop, STAGE stage) { + return _population.get(iPop, stage); + } + + std::size_t volume() const { + return _cuboid.volume(); + } + + void stream() { + _population.stream(); + } + + template <typename F, typename... ARGS> + requires concepts::Operator<F,value_t> + void apply(std::size_t iCell, ARGS&&... args) { + value_t f_curr[population::q]; + value_t f_next[population::q]; + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + f_curr[iPop] = get(iPop, stage::pre_collision())[iCell]; + } + + F::apply(f_curr, f_next, std::forward<ARGS>(args)...); + + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + get(iPop, stage::post_collision())[iCell] = f_next[iPop]; + } + } + + template <typename F> + requires concepts::Operator<F,pack_t> + void apply() { + pack_t f_curr[population::q]; + pack_t f_next[population::q]; + #pragma omp parallel for private(f_curr, f_next) schedule(static) + for (std::size_t i=0; i < volume() / pack_t::size; ++i) { + const std::size_t iCell = pack_t::size * i; + + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell); + } + + F::apply(f_curr, f_next); + + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + simd::store(get(iPop, stage::post_collision()) + iCell, f_next[iPop]); + } + } + + for (std::size_t iCell = pack_t::size * (volume() / pack_t::size); + iCell < volume(); + ++iCell) { + apply<F>(iCell); + } + } + + template <typename F, typename... ARGS> + requires concepts::Operator<F,pack_t,ARGS...> + void apply(LatticeMask<value_t>& mask, ARGS&&... args) { + pack_t f_curr[population::q]; + pack_t f_next[population::q]; + #pragma omp parallel for private(f_curr, f_next) schedule(static) + for (std::size_t iCell=0; iCell < volume(); iCell += pack_t::size) { + mask_t m(mask.data(), iCell); + if (m) { + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell); + } + + F::apply(f_curr, f_next, std::forward<ARGS>(args)...); + + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + simd::maskstore(get(iPop, stage::post_collision()) + iCell, m, f_next[iPop]); + } + } + } + } + + template <typename... COPs> + void apply(COPs&&... cops) { + pack_t f_curr[population::q]; + pack_t f_next[population::q]; + #pragma omp parallel for private(f_curr, f_next) schedule(static) + for (std::size_t iCell=0; iCell < volume(); iCell += pack_t::size) { + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell); + } + + (apply(cops, iCell, f_curr, f_next), ...); + } + } + + template <typename F, typename... ARGS> + void apply(std::vector<typename pack_t::index_t>& cells, ARGS&&... args) { + #ifdef SIMD_CELL_LIST + pack_t f_curr[population::q]; + pack_t f_next[population::q]; + #pragma omp parallel for private(f_curr, f_next) schedule(static) + for (std::size_t i=0; i < cells.size() / pack_t::size; ++i) { + const std::size_t iIdx = pack_t::size * i; + pack_t::index_t* locs = cells.data() + iIdx; + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()), locs); + } + + F::apply(f_curr, f_next, std::forward<ARGS>(args)...); + + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + simd::store(get(iPop, stage::post_collision()), f_next[iPop], locs); + } + } + + for (std::size_t i = pack_t::size * (cells.size() / pack_t::size); + i < cells.size(); + ++i) { + apply<F>(cells[i], std::forward<ARGS>(args)...); + } + #else + value_t f_curr[population::q]; + value_t f_next[population::q]; + #pragma omp parallel for private(f_curr, f_next) schedule(static) + for (std::size_t i=0; i < cells.size(); ++i) { + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + f_curr[iPop] = get(iPop, stage::pre_collision())[cells[i]]; + } + + F::apply(f_curr, f_next, std::forward<ARGS>(args)...); + + #pragma GCC unroll population::q + for (unsigned iPop=0; iPop < population::q; ++iPop) { + get(iPop, stage::post_collision())[cells[i]] = f_next[iPop]; + } + } + #endif + } + + template <typename F> + requires concepts::Functor<F,pack_t> + void observe(LatticeMask<value_t>& mask, std::span<value_t> buffer) { + pack_t f[population::q]; + pack_t output[F::size]; + #pragma omp parallel for private(f,output) schedule(static) + for (std::size_t iCell=0; iCell < volume(); iCell += pack_t::size) { + mask_t m(mask.data(), iCell); + if (m) { + for (unsigned iPop=0; iPop < population::q; ++iPop) { + f[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell); + } + + CollectMomentsF::observe(f, output); + + for (unsigned i=0; i < F::size; ++i) { + simd::maskstore(buffer.data() + i*volume() + iCell, m, output[i]); + } + } + } + } + + void write_momenta(LatticeMask<value_t>& mask, std::string path) { + std::vector<value_t> buffer(CollectMomentsF::size*volume()); + observe<CollectMomentsF>(mask, buffer); + export_format::vtk<value_t>(_cuboid, + { buffer.data(), 1*volume() }, + { buffer.data() + volume(), 3*volume() }, + path); + } +}; |