summaryrefslogtreecommitdiff
path: root/src/lattice.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lattice.h')
-rw-r--r--src/lattice.h267
1 files changed, 267 insertions, 0 deletions
diff --git a/src/lattice.h b/src/lattice.h
new file mode 100644
index 0000000..bbebab4
--- /dev/null
+++ b/src/lattice.h
@@ -0,0 +1,267 @@
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#ifdef ENABLE_AVX512
+#include "simd/512.h"
+#else
+#include "simd/256.h"
+#endif
+
+#include <omp.h>
+
+#include "cuboid.h"
+#include "population.h"
+#include "propagation.h"
+#include "operator.h"
+#include "export.h"
+
+#include "LLBM/initialize.h"
+#include "LLBM/collect_moments.h"
+
+template <typename T>
+class LatticeMask {
+private:
+ const std::size_t _size;
+
+ std::unique_ptr<typename simd::Mask<T>::storage_t[]> _simd_mask;
+ std::unique_ptr<bool[]> _base_mask;
+
+public:
+ LatticeMask(std::size_t nCells):
+ _size(((nCells + simd::Pack<T>::size - 1) / simd::Mask<T>::storage_size + 1) * simd::Mask<T>::storage_size),
+ _simd_mask(new typename simd::Mask<T>::storage_t[_size / simd::Mask<T>::storage_size] { }),
+ _base_mask(new bool[_size] { })
+ { }
+
+ void set(std::size_t iCell, bool state) {
+ _base_mask[iCell] = state;
+ }
+
+ bool get(std::size_t iCell) const {
+ return _base_mask[iCell];
+ }
+
+ void serialize() {
+ for (std::size_t i=0; i < _size / simd::Mask<T>::storage_size; ++i) {
+ _simd_mask[i] = simd::Mask<T>::encode(_base_mask.get() + i*simd::Mask<T>::storage_size);
+ }
+ }
+
+ typename simd::Mask<T>::storage_t* data() {
+ return _simd_mask.get();
+ }
+
+};
+
+template <concepts::PropagatablePopulationBuffer Buffer>
+class Lattice {
+private:
+ using value_t = typename Buffer::value_t;
+ using pack_t = simd::Pack<value_t>;
+ using mask_t = simd::Mask<value_t>;
+
+ Cuboid _cuboid;
+ Buffer _population;
+
+ template <typename COP>
+ void apply(COP cop,
+ std::size_t iCell,
+ pack_t f_curr[population::q],
+ pack_t f_next[population::q]) {
+ if (mask_t m = {cop.mask.data(), iCell}) {
+ cop(f_curr, f_next);
+
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ simd::maskstore(get(iPop, stage::post_collision()) + iCell, m, f_next[iPop]);
+ }
+ }
+ }
+
+public:
+ Lattice(Cuboid cuboid):
+ _cuboid(cuboid),
+ _population(cuboid)
+ {
+ apply<InitializeO>();
+ }
+
+ template <typename STAGE>
+ value_t* get(unsigned iPop, STAGE stage) {
+ return _population.get(iPop, stage);
+ }
+
+ std::size_t volume() const {
+ return _cuboid.volume();
+ }
+
+ void stream() {
+ _population.stream();
+ }
+
+ template <typename F, typename... ARGS>
+ requires concepts::Operator<F,value_t>
+ void apply(std::size_t iCell, ARGS&&... args) {
+ value_t f_curr[population::q];
+ value_t f_next[population::q];
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ f_curr[iPop] = get(iPop, stage::pre_collision())[iCell];
+ }
+
+ F::apply(f_curr, f_next, std::forward<ARGS>(args)...);
+
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ get(iPop, stage::post_collision())[iCell] = f_next[iPop];
+ }
+ }
+
+ template <typename F>
+ requires concepts::Operator<F,pack_t>
+ void apply() {
+ pack_t f_curr[population::q];
+ pack_t f_next[population::q];
+ #pragma omp parallel for private(f_curr, f_next) schedule(static)
+ for (std::size_t i=0; i < volume() / pack_t::size; ++i) {
+ const std::size_t iCell = pack_t::size * i;
+
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell);
+ }
+
+ F::apply(f_curr, f_next);
+
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ simd::store(get(iPop, stage::post_collision()) + iCell, f_next[iPop]);
+ }
+ }
+
+ for (std::size_t iCell = pack_t::size * (volume() / pack_t::size);
+ iCell < volume();
+ ++iCell) {
+ apply<F>(iCell);
+ }
+ }
+
+ template <typename F, typename... ARGS>
+ requires concepts::Operator<F,pack_t,ARGS...>
+ void apply(LatticeMask<value_t>& mask, ARGS&&... args) {
+ pack_t f_curr[population::q];
+ pack_t f_next[population::q];
+ #pragma omp parallel for private(f_curr, f_next) schedule(static)
+ for (std::size_t iCell=0; iCell < volume(); iCell += pack_t::size) {
+ mask_t m(mask.data(), iCell);
+ if (m) {
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell);
+ }
+
+ F::apply(f_curr, f_next, std::forward<ARGS>(args)...);
+
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ simd::maskstore(get(iPop, stage::post_collision()) + iCell, m, f_next[iPop]);
+ }
+ }
+ }
+ }
+
+ template <typename... COPs>
+ void apply(COPs&&... cops) {
+ pack_t f_curr[population::q];
+ pack_t f_next[population::q];
+ #pragma omp parallel for private(f_curr, f_next) schedule(static)
+ for (std::size_t iCell=0; iCell < volume(); iCell += pack_t::size) {
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell);
+ }
+
+ (apply(cops, iCell, f_curr, f_next), ...);
+ }
+ }
+
+ template <typename F, typename... ARGS>
+ void apply(std::vector<typename pack_t::index_t>& cells, ARGS&&... args) {
+ #ifdef SIMD_CELL_LIST
+ pack_t f_curr[population::q];
+ pack_t f_next[population::q];
+ #pragma omp parallel for private(f_curr, f_next) schedule(static)
+ for (std::size_t i=0; i < cells.size() / pack_t::size; ++i) {
+ const std::size_t iIdx = pack_t::size * i;
+ pack_t::index_t* locs = cells.data() + iIdx;
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()), locs);
+ }
+
+ F::apply(f_curr, f_next, std::forward<ARGS>(args)...);
+
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ simd::store(get(iPop, stage::post_collision()), f_next[iPop], locs);
+ }
+ }
+
+ for (std::size_t i = pack_t::size * (cells.size() / pack_t::size);
+ i < cells.size();
+ ++i) {
+ apply<F>(cells[i], std::forward<ARGS>(args)...);
+ }
+ #else
+ value_t f_curr[population::q];
+ value_t f_next[population::q];
+ #pragma omp parallel for private(f_curr, f_next) schedule(static)
+ for (std::size_t i=0; i < cells.size(); ++i) {
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ f_curr[iPop] = get(iPop, stage::pre_collision())[cells[i]];
+ }
+
+ F::apply(f_curr, f_next, std::forward<ARGS>(args)...);
+
+ #pragma GCC unroll population::q
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ get(iPop, stage::post_collision())[cells[i]] = f_next[iPop];
+ }
+ }
+ #endif
+ }
+
+ template <typename F>
+ requires concepts::Functor<F,pack_t>
+ void observe(LatticeMask<value_t>& mask, std::span<value_t> buffer) {
+ pack_t f[population::q];
+ pack_t output[F::size];
+ #pragma omp parallel for private(f,output) schedule(static)
+ for (std::size_t iCell=0; iCell < volume(); iCell += pack_t::size) {
+ mask_t m(mask.data(), iCell);
+ if (m) {
+ for (unsigned iPop=0; iPop < population::q; ++iPop) {
+ f[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell);
+ }
+
+ CollectMomentsF::observe(f, output);
+
+ for (unsigned i=0; i < F::size; ++i) {
+ simd::maskstore(buffer.data() + i*volume() + iCell, m, output[i]);
+ }
+ }
+ }
+ }
+
+ void write_momenta(LatticeMask<value_t>& mask, std::string path) {
+ std::vector<value_t> buffer(CollectMomentsF::size*volume());
+ observe<CollectMomentsF>(mask, buffer);
+ export_format::vtk<value_t>(_cuboid,
+ { buffer.data(), 1*volume() },
+ { buffer.data() + volume(), 3*volume() },
+ path);
+ }
+};