summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt40
-rw-r--r--assets.cmake42
-rw-r--r--default.nix152
-rw-r--r--lbm.org5817
-rw-r--r--org.css213
-rw-r--r--tangle/LLBM/base.h6
-rw-r--r--tangle/LLBM/boundary.h13
-rw-r--r--tangle/LLBM/bulk.h3
-rw-r--r--tangle/LLBM/call_tag.h12
-rw-r--r--tangle/LLBM/descriptor.h298
-rw-r--r--tangle/LLBM/kernel/bounce_back.h44
-rw-r--r--tangle/LLBM/kernel/bounce_back_moving_wall.h44
-rw-r--r--tangle/LLBM/kernel/bouzidi.h39
-rw-r--r--tangle/LLBM/kernel/collect_curl.h44
-rw-r--r--tangle/LLBM/kernel/collect_moments.h45
-rw-r--r--tangle/LLBM/kernel/collect_q_criterion.h104
-rw-r--r--tangle/LLBM/kernel/collect_shear_layer_normal.h139
-rw-r--r--tangle/LLBM/kernel/collect_streamlines.h48
-rw-r--r--tangle/LLBM/kernel/collect_velocity_norm.h61
-rw-r--r--tangle/LLBM/kernel/collide.h131
-rw-r--r--tangle/LLBM/kernel/equilibrium_density_wall.h226
-rw-r--r--tangle/LLBM/kernel/equilibrium_velocity_wall.h222
-rw-r--r--tangle/LLBM/kernel/executor.h171
-rw-r--r--tangle/LLBM/kernel/free_slip.h82
-rw-r--r--tangle/LLBM/kernel/initialize.h44
-rw-r--r--tangle/LLBM/kernel/propagate.h20
-rw-r--r--tangle/LLBM/kernel/smagorinsky_collide.h183
-rw-r--r--tangle/LLBM/lattice.h131
-rw-r--r--tangle/LLBM/materials.h108
-rw-r--r--tangle/LLBM/memory.h134
-rw-r--r--tangle/LLBM/operator.h27
-rw-r--r--tangle/LLBM/propagate.h111
-rw-r--r--tangle/LLBM/sdf.h75
-rw-r--r--tangle/LLBM/sdf_boundary.h114
-rw-r--r--tangle/LLBM/volumetric.h134
-rw-r--r--tangle/LLBM/wall.h4
-rw-r--r--tangle/asset/noise/blue_0.pngbin0 -> 3763 bytes
-rw-r--r--tangle/asset/noise/blue_1.pngbin0 -> 3775 bytes
-rw-r--r--tangle/asset/noise/blue_2.pngbin0 -> 3752 bytes
-rw-r--r--tangle/asset/noise/blue_3.pngbin0 -> 3789 bytes
-rw-r--r--tangle/asset/noise/blue_4.pngbin0 -> 3754 bytes
-rw-r--r--tangle/asset/palette/4wave_ROTB.pngbin0 -> 1572 bytes
-rw-r--r--tangle/asset/palette/4wave_equal.pngbin0 -> 1485 bytes
-rw-r--r--tangle/asset/palette/5wave_cool.pngbin0 -> 1601 bytes
-rw-r--r--tangle/asset/palette/autumn.pngbin0 -> 827 bytes
-rw-r--r--tangle/asset/palette/blue.pngbin0 -> 1223 bytes
-rw-r--r--tangle/asset/palette/blue_orange.pngbin0 -> 1457 bytes
-rw-r--r--tangle/asset/palette/green_brown.pngbin0 -> 1269 bytes
-rw-r--r--tangle/asset/palette/orange.pngbin0 -> 1165 bytes
-rw-r--r--tangle/asset/shader/blur.frag20
-rw-r--r--tangle/benchmark-ldc.cu95
-rw-r--r--tangle/channel-with-sphere.cu85
-rw-r--r--tangle/ldc-2d.cu82
-rw-r--r--tangle/ldc-3d.cu58
-rw-r--r--tangle/magnus.cu116
-rw-r--r--tangle/nozzle.cu73
-rw-r--r--tangle/sampler/curl_norm.h77
-rw-r--r--tangle/sampler/q_criterion.h90
-rw-r--r--tangle/sampler/sampler.h32
-rw-r--r--tangle/sampler/shear_layer.h67
-rw-r--r--tangle/sampler/velocity_norm.h79
-rw-r--r--tangle/taylor-couette.cu75
-rw-r--r--tangle/tmp/noise_overview.pngbin0 -> 14996 bytes
-rw-r--r--tangle/tmp/test_noise.pngbin0 -> 26571 bytes
-rw-r--r--tangle/util/camera.h86
-rw-r--r--tangle/util/colormap.h38
-rw-r--r--tangle/util/noise.h39
-rw-r--r--tangle/util/render_window.h95
-rw-r--r--tangle/util/texture.h25
-rw-r--r--tangle/util/timer.h19
-rw-r--r--tangle/util/volumetric_example.h121
71 files changed, 10453 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..abe1087
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,40 @@
+cmake_minimum_required(VERSION 3.10)
+project(literatelb LANGUAGES CXX CUDA)
+
+find_package(CUDAToolkit REQUIRED)
+find_package(SFML 2.5 COMPONENTS graphics window system REQUIRED)
+find_package(ImGui-SFML REQUIRED)
+
+if(NOT CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --default-stream per-thread --extended-lambda")
+set(CMAKE_CUDA_ARCHITECTURES 50)
+
+include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}/tangle
+ ${CMAKE_CURRENT_BINARY_DIR})
+
+include(assets.cmake)
+serialize_assets(${CMAKE_CURRENT_SOURCE_DIR}/tangle/asset
+ ${CMAKE_CURRENT_BINARY_DIR}/assets.cc
+ ${CMAKE_CURRENT_BINARY_DIR}/assets.h)
+add_library(assets ${CMAKE_CURRENT_BINARY_DIR}/assets.cc)
+
+link_libraries(
+ sfml-graphics
+ sfml-window
+ sfml-system
+ ImGui-SFML::ImGui-SFML
+ CUDA::cuda_driver
+ assets)
+
+file(GLOB EXAMPLES ${CMAKE_CURRENT_SOURCE_DIR}/tangle/*.cu)
+foreach(examplefile ${EXAMPLES})
+ get_filename_component(examplename ${examplefile} NAME_WE)
+ add_executable(${examplename} ${examplefile})
+endforeach()
diff --git a/assets.cmake b/assets.cmake
new file mode 100644
index 0000000..11868f8
--- /dev/null
+++ b/assets.cmake
@@ -0,0 +1,42 @@
+function(serialize_folder name dir data header)
+ file(APPEND ${header} "namespace ${name} {\n")
+ file(APPEND ${data} "namespace ${name} {\n")
+ file(GLOB files ${dir}/*)
+ list(LENGTH files file_count)
+ foreach(file ${files})
+ string(REGEX MATCH "([^/]+)$" filename ${file})
+ string(REGEX REPLACE "\\.| |-" "_" filename ${filename})
+ file(READ ${file} filedata HEX)
+ string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
+ file(APPEND ${data} "const unsigned char file_${filename}[] = {${filedata}};\n")
+ file(APPEND ${data} "const unsigned file_${filename}_size = sizeof(file_${filename});\n")
+ file(APPEND ${header} "extern const unsigned char file_${filename}[];\n")
+ file(APPEND ${header} "extern const unsigned file_${filename}_size;\n")
+ endforeach()
+ file(APPEND ${data} "}\n")
+ file(APPEND ${header} "const File files[] {\n")
+ foreach(file ${files})
+ string(REGEX MATCH "([^/]+)$" filename ${file})
+ string(REGEX REPLACE "\\.| |-" "_" flat_filename ${filename})
+ file(APPEND ${header} "File{\"${filename}\", file_${flat_filename}, file_${flat_filename}_size},\n")
+ endforeach()
+ file(APPEND ${header} "};\n")
+ file(APPEND ${header} "const unsigned file_count = ${file_count};\n")
+ file(APPEND ${header} "}\n")
+endfunction()
+
+function(serialize_assets dir data header)
+ file(WRITE ${data} "")
+ file(APPEND ${data} "#include \"${header}\"\n")
+ file(APPEND ${data} "namespace assets {\n")
+ file(WRITE ${header} "")
+ file(APPEND ${header} "#pragma once\n")
+ file(APPEND ${header} "#include <string>\n")
+ file(APPEND ${header} "namespace assets {\n")
+ file(APPEND ${header} "struct File { const std::string name; const unsigned char* data; const unsigned size; };\n")
+ serialize_folder("palette" ${dir}/palette ${data} ${header})
+ serialize_folder("noise" ${dir}/noise ${data} ${header})
+ serialize_folder("shader" ${dir}/shader ${data} ${header})
+ file(APPEND ${data} "}\n")
+ file(APPEND ${header} "}\n")
+endfunction()
diff --git a/default.nix b/default.nix
new file mode 100644
index 0000000..6d19d77
--- /dev/null
+++ b/default.nix
@@ -0,0 +1,152 @@
+{ pkgs ? import <nixpkgs> {
+ overlays = [
+ (import (builtins.fetchTarball {
+ url = https://github.com/nix-community/emacs-overlay/archive/0bb3c36bb8cddd92b788d8ce474c39475148d5e2.tar.gz;
+ }))
+ ];
+}, ... }:
+
+let
+ cuda-samples-common-headers = pkgs.stdenv.mkDerivation rec {
+ name = "cuda-samples-common-headers";
+ version = "11.1";
+
+ src = pkgs.fetchFromGitHub {
+ owner = "NVIDIA";
+ repo = "cuda-samples";
+ rev = "v${version}";
+ sha256 = "1kjixk50i8y1bkiwbdn5lkv342crvkmbvy1xl5j3lsa1ica21kwh";
+ };
+
+ phases = [ "installPhase" ];
+
+ installPhase = ''
+ mkdir -p $out/include/cuda-samples/Common
+ cp -r $src/Common/* $out/include/cuda-samples/Common
+ '';
+ };
+
+ imgui-sfml = pkgs.stdenv.mkDerivation rec {
+ name = "imgui-sfml";
+ version = "2.1";
+
+ src = pkgs.fetchFromGitHub {
+ owner = "eliasdaler";
+ repo = "imgui-sfml";
+ rev = "v${version}";
+ sha256 = "1g8gqly156miv12ajapnhmxfcv9i3fqhdmdy45gmdw47kh8ly5zj";
+ };
+
+ buildInputs = with pkgs; [
+ cmake
+ ];
+
+ propagatedBuildInputs = with pkgs; [
+ libGL
+ sfml
+ xorg.libX11
+ ];
+
+ cmakeFlags = let
+ imgui_src = pkgs.fetchFromGitHub {
+ owner = "ocornut";
+ repo = "imgui";
+ rev = "v1.68";
+ sha256 = "0a7b4fljybvpls84rqzsb2p4r89ic2g6w2m9h0209xlhm4k0x7qr";
+ };
+ in [
+ "-DIMGUI_DIR=${imgui_src}"
+ "-DBUILD_SHARED_LIBS=ON"
+ "-DIMGUI_SFML_BUILD_EXAMPLES=ON"
+ ];
+ };
+
+in pkgs.stdenv.mkDerivation rec {
+ name = "LiterateLB";
+
+ src = ./.;
+
+ nativeBuildInputs = with pkgs; [
+ cmake
+ addOpenGLRunpath
+ ];
+
+ buildInputs = let
+ local-python = pkgs.python3.withPackages (python-packages: with python-packages; [
+ sympy
+ numpy
+ Mako
+ matplotlib
+ scipy
+ ]);
+
+ in with pkgs; [
+ local-python
+ cudatoolkit_11
+ cuda-samples-common-headers
+ linuxPackages.nvidia_x11
+ libGL
+ sfml
+ imgui-sfml
+ ];
+
+ phases = [ "buildPhase" "installPhase" ];
+
+ buildPhase = let
+ tangle-el = pkgs.writeTextFile {
+ name = "tangle.el";
+ text = ''
+ (toggle-debug-on-error)
+ (require 'org)
+ (org-babel-do-load-languages 'org-babel-load-languages '((python . t)))
+ (setq org-confirm-babel-evaluate nil)
+ (setq org-babel-confirm-evaluate-answer-no t)
+ (find-file "lbm.org")
+ (setq org-babel-default-header-args
+ (cons '(:result-params . ("none"))
+ (assq-delete-all :result-params org-babel-default-header-args)))
+ (org-babel-execute-buffer)
+ (setq org-babel-default-header-args
+ (cons '(:result-params . ("output"))
+ (assq-delete-all :result-params org-babel-default-header-args)))
+ (org-babel-tangle)
+ '';
+ };
+
+ in ''
+ cp $src/lbm.org .
+ mkdir -p tangle/asset
+ cp -r $src/tangle/asset/noise tangle/asset
+ cp -r $src/tangle/asset/palette tangle/asset
+ ${pkgs.emacsUnstable-nox}/bin/emacs --no-init-file --script ${tangle-el}
+ cp $src/CMakeLists.txt .
+ cp $src/assets.cmake .
+ mkdir build
+ pushd build
+ cmake ..
+ make
+ popd
+ '';
+
+ installPhase = ''
+ mkdir -p $out/bin
+ for program in tangle/*.cu; do
+ cp build/$(basename $program .cu) $out/bin/$(basename $program .cu)
+ addOpenGLRunpath $out/bin/$(basename $program .cu)
+ done
+
+ mkdir $out/include
+ cp -r tangle/LLBM $out/include/
+ '';
+
+ env = pkgs.buildEnv {
+ name = name;
+ paths = buildInputs;
+ };
+
+ shellHook = ''
+ export NIX_SHELL_NAME="${name}"
+ export CUDA_PATH="${pkgs.cudatoolkit_11}"
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/run/opengl-driver/lib
+ '';
+}
diff --git a/lbm.org b/lbm.org
new file mode 100644
index 0000000..6df9a50
--- /dev/null
+++ b/lbm.org
@@ -0,0 +1,5817 @@
+#+TITLE: A literate Lattice Boltzmann Code
+#+SUBTITLE: [[https://kummerlaender.eu][Adrian Kummerländer]]
+#+STARTUP: latexpreview
+#+HTML_DOCTYPE: html5
+#+OPTIONS: toc:nil html-postamble:nil html5-fancy:t html-style:nil
+#+PROPERTY: header-args :exports both :mkdirp yes :noweb no-export :eval no-export
+#+PROPERTY: header-args:python+ :var lattice="D2Q9"
+#+PROPERTY: header-args:cpp+ :main no :eval no
+#+HTML_MATHJAX: path:"https://static.kummerlaender.eu/mathjax/MathJax.js?config=TeX-AMS_HTML"
+
+#+HTML_HEAD: <link rel="stylesheet" type="text/css" href="org.css"/>
+
+#+BEGIN_ABSTRACT
+This file describes a full Lattice Boltzmann code featuring both 2D and 3D lattices, a workable selection of boundary conditions, Smagorinsky
+turbulence modelling, expression-level code optimization and even a full ray marcher for just-in-time volumetric visualization in addition
+to a set of interesting examples. All of this runs on GPUs using CUDA near the maximum possible performance on that platform.
+
+*This document is a [[*Open tasks][work in progress]].*
+#+BEGIN_EXPORT html
+<video style="width:100%" src="https://literatelb.org/talk/fire.webm" onloadeddata="this.play();" playsinline loop muted/>
+#+END_EXPORT
+#+END_ABSTRACT
+#+TOC: headlines 2
+
+* Preamble :noexport:
+#+BEGIN_SRC python :session :results none
+from sympy import *
+from sympy.codegen.ast import CodeBlock, Assignment
+from mako.template import Template
+import itertools
+
+def printlatexpr(*exprs):
+ print("$$\\begin{align*}")
+ for expr in exprs:
+ print(f"{latex(expr.lhs)} &:= {latex(expr.rhs)} \\\\")
+
+ print("\\end{align*}$$")
+
+def multirange(*ranges):
+ return itertools.product(*[ range(r) for r in ranges ])
+
+def multisum(f, *ranges):
+ return sum([ f(*idx) for idx in multirange(*ranges) ])
+
+descriptor = { }
+#+END_SRC
+
+#+NAME: eval-using-descriptor
+#+BEGIN_SRC python :session :results output :var src=""
+if lattice in descriptor:
+ D = descriptor[lattice]
+ print(Template(src).render(**locals()))
+#+END_SRC
+
+#+RESULTS: eval-using-descriptor
+
+* Introduction
+Approaches to modelling fluid dynamics can be coarsely grouped into three categories: Microscopic, Mesoscopic and Macroscopic models.
+Microscopic models describe the behavior of the individual /particles/ of whose the macroscopic fluid movement is an emergent property.
+On the other side of the spectrum, macroscopic models consider only the large scale properties of a fluid such as its velocity and density
+fields. One could say that microscopic particle models are closest to what actually happens in nature and macroscopic models in the form
+of the /Navier Stokes equations (NSE)/ represent an ideal vision of a fluid that only holds for /large enough/ space scales.
+As is the case for most things, what we call a /fluid/ is by itself already an abstraction.
+
+Mesoscopic models sit between these two extremes and consider neither individual particles nor purely macroscopic properties but rather
+the probability of some amount of particles moving with some velocity into a certain direction at various points in time and space. The
+mathematical field of /kinetic theory/ provides a foundation for linking all of these three models.
+
+** Kinetic theory
+One can use kinetic theory to get from a microscropic or /molecular/ system governed by e.g. Newton's laws of motion to macroscopic
+descriptions of certain properties of such a system. For example one can reach the diffusion equation for /billiard systems/ of particles
+colliding with solid obstacles or to the Navier Stokes equations for systems of pairwise colliding particles.
+
+In very rough terms one starts out by considering the microscopic system and taking its limit for infinitely many particles of
+vanishingly small size. This yields a /kinetic equation/ that describes the behaviour of the particle distribution function
+under some collision operator for any point in space and time. When we consider pairwise colliding particles the result
+turns out to be the well known /Boltzmann transport equation/:
+
+$$(\partial_t + v \cdot \nabla_x) f(t,x,v) = \Omega (f).$$
+
+This PDE describes the behavior of the probability $f(t,x,v)$ that particles move into direction $v$ at location $x$ and time $t$ for
+some collision operator $\Omega$. Discretizing this equation on a lattice for a finite set of discrete velocities is called the /Lattice Boltzmann Method (LBM)/.
+
+The practical usefulness of LBM hinges on the ability of reaching the desired /target equations/
+such as NSE or the heat equation as a limit of the kinetic equation.
+
+** Lattice Boltzmann Method
+We discretize the Boltzmann equation in spatial, velocity and temporal space on a cartesian lattice using discrete velocities
+$\xi_i$ that point to the neighbors of each cell.
+
+$$(\partial_t + v \cdot \nabla_x)f = \Omega(f) \approx f_i(x + \xi_i, t + 1) - f_i(x,t) = \Omega_i(x,t)$$
+
+A common collision operator $\Omega$ for Navier-Stokes approximation is given by Bhatnagar, Gross and Kroog (BGK).
+
+$$\Omega(f) := -\frac{f - f^\text{eq}}{\tau}$$
+
+This BGK operator /relaxes/ the local population $f$ towards some equilibrium distribution $f^\text{eq}$ with rate $\tau$. Inserting this operator
+into the discrete Boltzmann equation yields the discrete LBM BGK equation.
+
+$$f_i(x + \xi_i, t + 1) = f_i(x,t) - \frac{1}{\tau} (f_i(x,t) - f_i^\text{eq}(x,t))$$
+
+This explicit form exposes the separation of the LBM algorithm into a local collision step followed by a streaming step.
+The collision step relaxes the population of each cell towards its local equilibrium and the streaming step propagates the resulting
+updated population values to the respective neighboring cells.
+
+Macroscopic moments such as density and velocity can be calculated directly from the distribution functions.
+
+$$\rho := \sum_{i=0}^{q-1} f_i(x,t) \text{ and } u := \frac{1}{\rho} \sum_{i=0}^{q-1} \xi_i f_i(x,t)$$
+
+The equilibrium distribution $f_i^\text{eq}$ for these macroscopic properties is given by
+
+$$f_i^\text{eq}(x,t) := \omega_i \rho \left( 1 + \frac{u \cdot \xi_i}{c_s^2} + \frac{(u \cdot \xi_i)^2}{2c_s^4} - \frac{u \cdot u}{2c_s^2} \right)$$
+
+using lattice-dependent weights $\omega_i$ and speed of sound $c_s$. Note that this is just one possible discretization of the Boltzmann
+equation -- nothing stops us from using different sets of velocities, relaxation times, collision operators and so on.
+Changing these things is how different physics are modeled in LBM. e.g. what we will do in the section on
+[[*Smagorinsky BGK collision][Smagorinsky turbulence modelling]] is to locally change the relaxation time.
+
+To summarize what we are going to do for the simplest bulk fluid case: First calculate the current density and velocity
+moments of a cell, then compute the matching equilibrium distributions and finally perform the local BGK collision to
+update the cell populations.
+The last step before we start again is to propagate the post-collision values to the corresponding neighbor cells.
+
+Special care has to be taken for the boundary conditions at the lattice frontier and around any obstacle geometries.
+Such boundary conditions are one of the major topics of LBM research with a very rich toolbox of specialized collision
+steps for modeling e.g. inflow, outflow, solid or moving walls of various kinds.
+
+* Lattice
+The cartesian grids used for the spatial discretization are commonly described as =DXQY= lattices where =X= is the number of spatial dimensions
+and =Y= is the number of discrete velocities $\xi_i$. e.g. a =D2Q9= lattice is two dimensional and stores nine population values per cell. Each population has
+an associated weigth $\omega_i$ that in a sense controls its impact on the collision step. Additionally we also require the lattice speed of sound $c_s$ which is
+the speed of information propagation within the lattice.
+
+#+BEGIN_SRC python :session :results none
+from fractions import Fraction
+
+class Descriptor:
+ def __init__(self, name, data):
+ self.name = name
+ self.c = [ Matrix(eval(row[0])) for row in data ]
+ self.w = [ Rational(f.numerator, f.denominator)
+ for f in [ Fraction(row[1]) for row in data ] ]
+
+ self.d = self.c[0].shape[0]
+ self.q = len(self.c)
+ self.c_s = sqrt(Rational(1,3))
+#+END_SRC
+
+All of these constants have to be accessible for symbolic computations which is why we store them within a so called descriptor class.
+For convenience we write out the directions and weights as plain tables that are then read into the Python structure.
+
+#+NAME: load-descriptor
+#+BEGIN_SRC python :session :results output :var data=D2Q9
+descriptor[lattice] = Descriptor(lattice, data)
+print(f"Loaded D{descriptor[lattice].d}Q{descriptor[lattice].q} lattice with a weight sum of {sum(descriptor[lattice].w)}.")
+#+END_SRC
+
+#+RESULTS: load-descriptor
+: Loaded D2Q9 lattice with a weight sum of 1.
+
+#+CALL: load-descriptor(lattice="D3Q19", data=D3Q19)
+
+#+RESULTS:
+: Loaded D3Q19 lattice with a weight sum of 1.
+
+** D2Q9
+#+NAME: D2Q9
+| Direction | Weight |
+|-----------+--------|
+| (-1,-1) | 1/36 |
+| (-1, 0) | 1/9 |
+| (-1, 1) | 1/36 |
+| ( 0,-1) | 1/9 |
+| ( 0, 0) | 4/9 |
+| ( 0, 1) | 1/9 |
+| ( 1,-1) | 1/36 |
+| ( 1, 0) | 1/9 |
+| ( 1, 1) | 1/36 |
+
+** D3Q19
+#+NAME: D3Q19
+| Direction | Weight |
+|------------+--------|
+| ( 0, 1, 1) | 1/36 |
+| (-1, 0, 1) | 1/36 |
+| ( 0, 0, 1) | 1/18 |
+| ( 1, 0, 1) | 1/36 |
+| ( 0,-1, 1) | 1/36 |
+| (-1, 1, 0) | 1/36 |
+| ( 0, 1, 0) | 1/18 |
+| ( 1, 1, 0) | 1/36 |
+| (-1, 0, 0) | 1/18 |
+| ( 0, 0, 0) | 1/3 |
+| ( 1, 0, 0) | 1/18 |
+| (-1,-1, 0) | 1/36 |
+| ( 0,-1, 0) | 1/18 |
+| ( 1,-1, 0) | 1/36 |
+| ( 0, 1,-1) | 1/36 |
+| (-1, 0,-1) | 1/36 |
+| ( 0, 0,-1) | 1/18 |
+| ( 1, 0,-1) | 1/36 |
+| ( 0,-1,-1) | 1/36 |
+
+* Collision steps
+While the streaming step of the LBM algorithm only propagates the populations between cells in an unform fashion,
+the collision step determines the actual values those populations take. This means that the physical behaviour
+modelled by a given LBM algorithm is determined primarily by the collision step.
+
+In this section we are going to generate the code for bulk collisions. i.e. the collisions that model fluid and other transport
+phenomena apart from domain boundaries. Those will be handled at a later point by special purpose collision steps called
+/boundary conditions/.
+
+** Code printing
+Before we can get started on constructing expression trees and generating code from them we need to
+setup some basics so SymPy actually generates something we can compile in our environment.
+
+In order to get more fine grained control we need to overload an approproate SymPy C code printer class.
+This allows us to e.g. easily print indexed expressions that access the population array in the correct way
+or to explicitly type float constants according to a compile-time template type.
+
+#+BEGIN_SRC python :session :results none
+from sympy.printing.ccode import C11CodePrinter
+
+class CodeBlockPrinter(C11CodePrinter):
+ def __init__(self, custom_assignment, custom_functions):
+ super(CodeBlockPrinter, self).__init__()
+ self._default_settings['contract'] = False
+ self.custom_assignment = custom_assignment
+ for f in custom_functions:
+ self._kf[f] = f
+
+ def _print_Indexed(self, expr):
+ assert len(expr.indices) == 1
+ if expr.base.name[0] == 'f':
+ return f"{expr.base.name}[{expr.indices[0]}]"
+ else:
+ return f"{expr.base.name}_{expr.indices[0]}"
+
+ def _print_Float(self, flt):
+ return "T{%s}" % str(flt.evalf())
+
+ def _print_Pow(self, expr):
+ if expr.exp == -1:
+ return "T{1} / (%s)" % self.doprint(expr.base)
+ else:
+ return super()._print_Pow(expr)
+
+ def _print_Assignment(self, expr):
+ if self.custom_assignment and expr.lhs.is_Indexed and expr.lhs.base.name[0] == 'f':
+ return f"{self.doprint(expr.lhs)} = {self.doprint(expr.rhs.evalf())};"
+ else:
+ return f"T {self.doprint(expr.lhs)} = {self.doprint(expr.rhs.evalf())};"
+#+END_SRC
+
+For convenience the instantiation of this class is hidden in a =printcode= function that we can use everywhere.
+
+#+BEGIN_SRC python :session :results none
+def printcode(expr, custom_assignment=True, custom_functions=[]):
+ print(CodeBlockPrinter(custom_assignment, custom_functions).doprint(expr))
+#+END_SRC
+
+The additional assignment parameter allow us to control whether the targets of assignment expressions should be
+instantiated as a new scalar variable in addition while the functions parameter allows us to make custom runtime
+functions known to SymPy. If we don't do this for a function =f= that our assignment expression contains a reference
+to, the printer will generate unwanted comments to reflect this.
+
+*** Custom expression transformations
+We do not want to use the =pow= function for squares in the generated code. This can be achieved by providing
+a custom =ReplaceOptim= structure during the CSE optimization step that conditionally resolves =Pow= expressions.
+
+#+BEGIN_SRC python :session :results none
+from sympy.codegen.rewriting import ReplaceOptim
+
+expand_pos_square = ReplaceOptim(
+ lambda e: e.is_Pow and e.exp.is_integer and e.exp == 2,
+ lambda p: UnevaluatedExpr(Mul(p.base, p.base, evaluate = False))
+)
+
+custom_opti = cse_main.basic_optimizations + [
+ (expand_pos_square, expand_pos_square)
+]
+#+END_SRC
+
+#+BEGIN_SRC python :session :results none
+def cse(block, symbol_prefix = 'x'):
+ return block.cse(symbols = numbered_symbols(prefix=symbol_prefix), optimizations = custom_opti, order = 'none')
+#+END_SRC
+
+** Moments
+#+BEGIN_SRC python :session :results none
+i, j = symbols('i, j')
+d, q = symbols('d, q')
+xi = IndexedBase('xi')
+#+END_SRC
+
+To start we define placeholders for the spatial and discrete velocity dimensions as well as the velocity set $\xi$ of
+some lattice. As the moments are constructed using the lattice populations $f$ we also require placeholders
+for those in addition to the moments $\rho$ and $u$ themselves.
+
+#+BEGIN_SRC python :session :results none
+f = IndexedBase('f')
+rho = Symbol('rho')
+u = IndexedBase('u