1 files changed, 267 insertions, 0 deletions
diff --git a/src/lattice.h b/src/lattice.h
new file mode 100644
index 0000000..bbebab4
--- /dev/null
+++ b/src/lattice.h
@@ -0,0 +1,267 @@
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#ifdef ENABLE_AVX512
+#include "simd/512.h"
+#else
+#include "simd/256.h"
+#endif
+
+#include <omp.h>
+
+#include "cuboid.h"
+#include "population.h"
+#include "propagation.h"
+#include "operator.h"
+#include "export.h"
+
+#include "LLBM/initialize.h"
+#include "LLBM/collect_moments.h"
+
+template <typename T>
+class LatticeMask {
+private:
+	const std::size_t _size;
+
+	std::unique_ptr<typename simd::Mask<T>::storage_t[]> _simd_mask;
+	std::unique_ptr<bool[]> _base_mask;
+
+public:
+	LatticeMask(std::size_t nCells):
+		_size(((nCells + simd::Pack<T>::size - 1) / simd::Mask<T>::storage_size + 1) * simd::Mask<T>::storage_size),
+		_simd_mask(new typename simd::Mask<T>::storage_t[_size / simd::Mask<T>::storage_size] { }),
+		_base_mask(new bool[_size] { })
+	{ }
+
+	void set(std::size_t iCell, bool state) {
+		_base_mask[iCell] = state;
+	}
+
+	bool get(std::size_t iCell) const {
+		return _base_mask[iCell];
+	}
+
+	void serialize() {
+		for (std::size_t i=0; i < _size / simd::Mask<T>::storage_size; ++i) {
+			_simd_mask[i] = simd::Mask<T>::encode(_base_mask.get() + i*simd::Mask<T>::storage_size);
+		}
+	}
+
+	typename simd::Mask<T>::storage_t* data() {
+		return _simd_mask.get();
+	}
+
+};
+
+template <concepts::PropagatablePopulationBuffer Buffer>
+class Lattice {
+private:
+	using value_t = typename Buffer::value_t;
+	using pack_t = simd::Pack<value_t>;
+	using mask_t = simd::Mask<value_t>;
+
+	Cuboid _cuboid;
+	Buffer _population;
+
+	template <typename COP>
+	void apply(COP cop,
+		         std::size_t iCell,
+		         pack_t f_curr[population::q],
+		         pack_t f_next[population::q]) {
+		if (mask_t m = {cop.mask.data(), iCell}) {
+			cop(f_curr, f_next);
+
+			#pragma GCC unroll population::q
+			for (unsigned iPop=0; iPop < population::q; ++iPop) {
+				simd::maskstore(get(iPop, stage::post_collision()) + iCell, m, f_next[iPop]);
+			}
+		}
+	}
+
+public:
+	Lattice(Cuboid cuboid):
+		_cuboid(cuboid),
+		_population(cuboid)
+	{
+		apply<InitializeO>();
+	}
+
+	template <typename STAGE>
+	value_t* get(unsigned iPop, STAGE stage) {
+		return _population.get(iPop, stage);
+	}
+
+	std::size_t volume() const {
+		return _cuboid.volume();
+	}
+
+	void stream() {
+		_population.stream();
+	}
+
+	template <typename F, typename... ARGS>
+  requires concepts::Operator<F,value_t>
+	void apply(std::size_t iCell, ARGS&&... args) {
+		value_t f_curr[population::q];
+		value_t f_next[population::q];
+		#pragma GCC unroll population::q
+		for (unsigned iPop=0; iPop < population::q; ++iPop) {
+			f_curr[iPop] = get(iPop, stage::pre_collision())[iCell];
+		}
+
+		F::apply(f_curr, f_next, std::forward<ARGS>(args)...);
+
+		#pragma GCC unroll population::q
+		for (unsigned iPop=0; iPop < population::q; ++iPop) {
+			get(iPop, stage::post_collision())[iCell] = f_next[iPop];
+		}
+	}
+
+	template <typename F>
+	requires concepts::Operator<F,pack_t>
+	void apply() {
+		pack_t f_curr[population::q];
+		pack_t f_next[population::q];
+		#pragma omp parallel for private(f_curr, f_next) schedule(static)
+		for (std::size_t i=0; i < volume() / pack_t::size; ++i) {
+			const std::size_t iCell = pack_t::size * i;
+
+			#pragma GCC unroll population::q
+			for (unsigned iPop=0; iPop < population::q; ++iPop) {
+				f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell);
+			}
+
+			F::apply(f_curr, f_next);
+
+			#pragma GCC unroll population::q
+			for (unsigned iPop=0; iPop < population::q; ++iPop) {
+				simd::store(get(iPop, stage::post_collision()) + iCell, f_next[iPop]);
+			}
+		}
+
+		for (std::size_t iCell = pack_t::size * (volume() / pack_t::size);
+				             iCell < volume();
+				           ++iCell) {
+			apply<F>(iCell);
+		}
+	}
+
+	template <typename F, typename... ARGS>
+	requires concepts::Operator<F,pack_t,ARGS...>
+	void apply(LatticeMask<value_t>& mask, ARGS&&... args) {
+		pack_t f_curr[population::q];
+		pack_t f_next[population::q];
+		#pragma omp parallel for private(f_curr, f_next) schedule(static)
+		for (std::size_t iCell=0; iCell < volume(); iCell += pack_t::size) {
+			mask_t m(mask.data(), iCell);
+			if (m) {
+				#pragma GCC unroll population::q
+				for (unsigned iPop=0; iPop < population::q; ++iPop) {
+					f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell);
+				}
+
+				F::apply(f_curr, f_next, std::forward<ARGS>(args)...);
+
+				#pragma GCC unroll population::q
+				for (unsigned iPop=0; iPop < population::q; ++iPop) {
+					simd::maskstore(get(iPop, stage::post_collision()) + iCell, m, f_next[iPop]);
+				}
+			}
+		}
+	}
+
+	template <typename... COPs>
+	void apply(COPs&&... cops) {
+		pack_t f_curr[population::q];
+		pack_t f_next[population::q];
+		#pragma omp parallel for private(f_curr, f_next) schedule(static)
+		for (std::size_t iCell=0; iCell < volume(); iCell += pack_t::size) {
+			#pragma GCC unroll population::q
+			for (unsigned iPop=0; iPop < population::q; ++iPop) {
+				f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell);
+			}
+
+			(apply(cops, iCell, f_curr, f_next), ...);
+		}
+	}
+
+	template <typename F, typename... ARGS>
+	void apply(std::vector<typename pack_t::index_t>& cells, ARGS&&... args) {
+	#ifdef SIMD_CELL_LIST
+		pack_t f_curr[population::q];
+		pack_t f_next[population::q];
+		#pragma omp parallel for private(f_curr, f_next) schedule(static)
+		for (std::size_t i=0; i < cells.size() / pack_t::size; ++i) {
+			const std::size_t iIdx = pack_t::size * i;
+			pack_t::index_t* locs = cells.data() + iIdx;
+			#pragma GCC unroll population::q
+			for (unsigned iPop=0; iPop < population::q; ++iPop) {
+				f_curr[iPop] = pack_t(get(iPop, stage::pre_collision()), locs);
+			}
+
+			F::apply(f_curr, f_next, std::forward<ARGS>(args)...);
+
+			#pragma GCC unroll population::q
+			for (unsigned iPop=0; iPop < population::q; ++iPop) {
+				simd::store(get(iPop, stage::post_collision()), f_next[iPop], locs);
+			}
+		}
+
+		for (std::size_t i = pack_t::size * (cells.size() / pack_t::size);
+				             i < cells.size();
+				           ++i) {
+			apply<F>(cells[i], std::forward<ARGS>(args)...);
+		}
+	#else
+		value_t f_curr[population::q];
+		value_t f_next[population::q];
+		#pragma omp parallel for private(f_curr, f_next) schedule(static)
+		for (std::size_t i=0; i < cells.size(); ++i) {
+			#pragma GCC unroll population::q
+			for (unsigned iPop=0; iPop < population::q; ++iPop) {
+				f_curr[iPop] = get(iPop, stage::pre_collision())[cells[i]];
+			}
+
+			F::apply(f_curr, f_next, std::forward<ARGS>(args)...);
+
+			#pragma GCC unroll population::q
+			for (unsigned iPop=0; iPop < population::q; ++iPop) {
+				get(iPop, stage::post_collision())[cells[i]] = f_next[iPop];
+			}
+		}
+	#endif
+	}
+
+	template <typename F>
+	requires concepts::Functor<F,pack_t>
+	void observe(LatticeMask<value_t>& mask, std::span<value_t> buffer) {
+		pack_t f[population::q];
+		pack_t output[F::size];
+		#pragma omp parallel for private(f,output) schedule(static)
+		for (std::size_t iCell=0; iCell < volume(); iCell += pack_t::size) {
+			mask_t m(mask.data(), iCell);
+			if (m) {
+				for (unsigned iPop=0; iPop < population::q; ++iPop) {
+					f[iPop] = pack_t(get(iPop, stage::pre_collision()) + iCell);
+				}
+
+				CollectMomentsF::observe(f, output);
+
+				for (unsigned i=0; i < F::size; ++i) {
+					simd::maskstore(buffer.data() + i*volume() + iCell, m, output[i]);
+				}
+			}
+		}
+	}
+
+	void write_momenta(LatticeMask<value_t>& mask, std::string path) {
+		std::vector<value_t> buffer(CollectMomentsF::size*volume());
+		observe<CollectMomentsF>(mask, buffer);
+		export_format::vtk<value_t>(_cuboid,
+		                            { buffer.data(),            1*volume() },
+		                            { buffer.data() + volume(), 3*volume() },
+		                            path);
+	}
+};