aboutsummaryrefslogtreecommitdiff
path: root/lbm.py
diff options
context:
space:
mode:
authorAdrian Kummerlaender2019-06-15 20:45:27 +0200
committerAdrian Kummerlaender2019-06-15 20:54:56 +0200
commitd71faec93ec0a55c46810e0d178b2803ee89130c (patch)
tree3c35650637615af20668a5ec7bf974b2c05b248b /lbm.py
parentc43d3f38b6922d36d15e8ba2b6ce17ddb0c75b0a (diff)
downloadsymlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.gz
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.bz2
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.lz
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.xz
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.tar.zst
symlbm_playground-d71faec93ec0a55c46810e0d178b2803ee89130c.zip
Add support for generating a D3Q19 kernel
Note how this basically required no changes besides generalizing cell indexing and adding the symbolic formulation of a D3Q19 BGK collision step. Increasing the neighborhood communication from 9 to 19 cells leads to a significant performance "regression": The 3D kernel yields ~ 360 MLUPS compared to the 2D version's ~ 820 MLUPS.
Diffstat (limited to 'lbm.py')
-rw-r--r--lbm.py54
1 files changed, 38 insertions, 16 deletions
diff --git a/lbm.py b/lbm.py
index 546b3c7..ef05e4f 100644
--- a/lbm.py
+++ b/lbm.py
@@ -8,21 +8,34 @@ from mako.template import Template
from pathlib import Path
class Geometry:
- def __init__(self, size_x, size_y):
+ def __init__(self, size_x, size_y, size_z = 1):
self.size_x = size_x
self.size_y = size_y
- self.volume = size_x * size_y
+ self.size_z = size_z
+ self.volume = size_x * size_y * size_z
def inner_cells(self):
- for y in range(1,self.size_y-1):
- for x in range(1,self.size_x-1):
- yield x, y
+ if self.size_z == 1:
+ for y in range(1,self.size_y-1):
+ for x in range(1,self.size_x-1):
+ yield x, y
+ else:
+ for z in range(1,self.size_z-1):
+ for y in range(1,self.size_y-1):
+ for x in range(1,self.size_x-1):
+ yield x, y, z
def span(self):
- return (self.size_x, self.size_y)
+ if self.size_z == 1:
+ return (self.size_x, self.size_y)
+ else:
+ return (self.size_x, self.size_y, self.size_z)
def inner_span(self):
- return (self.size_x-2, self.size_y-2)
+ if self.size_z == 1:
+ return (self.size_x-2, self.size_y-2)
+ else:
+ return (self.size_x-2, self.size_y-2, self.size_z-2)
class Lattice:
@@ -55,15 +68,24 @@ class Lattice:
self.build_kernel()
+ if descriptor.d == 2:
+ self.layout = (32,1)
+ elif descriptor.d == 3:
+ self.layout = (32,1,1)
+
self.program.equilibrilize(
- self.queue, (self.geometry.size_x,self.geometry.size_y), (32,1), self.cl_pop_a, self.cl_pop_b).wait()
+ self.queue, self.geometry.span(), self.layout, self.cl_pop_a, self.cl_pop_b).wait()
- def idx(self, x, y):
- return y * self.geometry.size_x + x;
+ def idx(self, x, y, z = 0):
+ return z * (self.geometry.size_x*self.geometry.size_y) + y * self.geometry.size_x + x;
def setup_geometry(self, material_at):
- for x, y in self.geometry.inner_cells():
- self.np_material[self.idx(x,y)] = material_at(self.geometry, x, y)
+ if self.descriptor.d == 2:
+ for x, y in self.geometry.inner_cells():
+ self.np_material[self.idx(x,y)] = material_at(self.geometry, x, y)
+ elif self.descriptor.d == 3:
+ for x, y, z in self.geometry.inner_cells():
+ self.np_material[self.idx(x,y,z)] = material_at(self.geometry, x, y, z)
cl.enqueue_copy(self.queue, self.cl_material, self.np_material).wait();
@@ -94,11 +116,11 @@ class Lattice:
if self.tick:
self.tick = False
self.program.collide_and_stream(
- self.queue, self.geometry.span(), (32,1), self.cl_pop_a, self.cl_pop_b, self.cl_material)
+ self.queue, self.geometry.span(), self.layout, self.cl_pop_a, self.cl_pop_b, self.cl_material)
else:
self.tick = True
self.program.collide_and_stream(
- self.queue, self.geometry.span(), (32,1), self.cl_pop_b, self.cl_pop_a, self.cl_material)
+ self.queue, self.geometry.span(), self.layout, self.cl_pop_b, self.cl_pop_a, self.cl_material)
def sync(self):
self.queue.finish()
@@ -107,9 +129,9 @@ class Lattice:
moments = numpy.ndarray(shape=(self.descriptor.d+1, self.geometry.volume), dtype=numpy.float32)
if self.tick:
self.program.collect_moments(
- self.queue, self.geometry.span(), (32,1), self.cl_pop_b, self.cl_moments)
+ self.queue, self.geometry.span(), self.layout, self.cl_pop_b, self.cl_moments)
else:
self.program.collect_moments(
- self.queue, self.geometry.span(), (32,1), self.cl_pop_a, self.cl_moments)
+ self.queue, self.geometry.span(), self.layout, self.cl_pop_a, self.cl_moments)
cl.enqueue_copy(self.queue, moments, self.cl_moments).wait();
return moments