From d71faec93ec0a55c46810e0d178b2803ee89130c Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Sat, 15 Jun 2019 20:45:27 +0200 Subject: Add support for generating a D3Q19 kernel Note how this basically required no changes besides generalizing cell indexing and adding the symbolic formulation of a D3Q19 BGK collision step. Increasing the neighborhood communication from 9 to 19 cells leads to a significant performance "regression": The 3D kernel yields ~ 360 MLUPS compared to the 2D version's ~ 820 MLUPS. --- implosion.py | 2 +- lbm.py | 54 ++++++++++++++++++++--------- ldc_2d.py | 79 ++++++++++++++++++++++++++++++++++++++++++ ldc_3d.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lid_driven_cavity.py | 79 ------------------------------------------ symbolic/D3Q19.py | 59 ++++++++++++++++++++++++++++++++ template/kernel.mako | 26 ++++++++------ 7 files changed, 289 insertions(+), 107 deletions(-) create mode 100644 ldc_2d.py create mode 100644 ldc_3d.py delete mode 100644 lid_driven_cavity.py create mode 100644 symbolic/D3Q19.py diff --git a/implosion.py b/implosion.py index 90b1d55..05f3644 100644 --- a/implosion.py +++ b/implosion.py @@ -22,7 +22,7 @@ def generate_moment_plots(lattice, moments): plt.figure(figsize=(10, 10)) plt.imshow(velocity, origin='lower', cmap=plt.get_cmap('seismic')) - plt.savefig("result/implosion_" + str(i) + ".png", bbox_inches='tight', pad_inches=0) + plt.savefig("result/implosion_%02d.png" % i, bbox_inches='tight', pad_inches=0) def box(geometry, x, y): if x == 1 or y == 1 or x == geometry.size_x-2 or y == geometry.size_y-2: diff --git a/lbm.py b/lbm.py index 546b3c7..ef05e4f 100644 --- a/lbm.py +++ b/lbm.py @@ -8,21 +8,34 @@ from mako.template import Template from pathlib import Path class Geometry: - def __init__(self, size_x, size_y): + def __init__(self, size_x, size_y, size_z = 1): self.size_x = size_x self.size_y = size_y - self.volume = size_x * size_y + self.size_z = size_z + self.volume = size_x * size_y * size_z def inner_cells(self): - for y in range(1,self.size_y-1): - for x in range(1,self.size_x-1): - yield x, y + if self.size_z == 1: + for y in range(1,self.size_y-1): + for x in range(1,self.size_x-1): + yield x, y + else: + for z in range(1,self.size_z-1): + for y in range(1,self.size_y-1): + for x in range(1,self.size_x-1): + yield x, y, z def span(self): - return (self.size_x, self.size_y) + if self.size_z == 1: + return (self.size_x, self.size_y) + else: + return (self.size_x, self.size_y, self.size_z) def inner_span(self): - return (self.size_x-2, self.size_y-2) + if self.size_z == 1: + return (self.size_x-2, self.size_y-2) + else: + return (self.size_x-2, self.size_y-2, self.size_z-2) class Lattice: @@ -55,15 +68,24 @@ class Lattice: self.build_kernel() + if descriptor.d == 2: + self.layout = (32,1) + elif descriptor.d == 3: + self.layout = (32,1,1) + self.program.equilibrilize( - self.queue, (self.geometry.size_x,self.geometry.size_y), (32,1), self.cl_pop_a, self.cl_pop_b).wait() + self.queue, self.geometry.span(), self.layout, self.cl_pop_a, self.cl_pop_b).wait() - def idx(self, x, y): - return y * self.geometry.size_x + x; + def idx(self, x, y, z = 0): + return z * (self.geometry.size_x*self.geometry.size_y) + y * self.geometry.size_x + x; def setup_geometry(self, material_at): - for x, y in self.geometry.inner_cells(): - self.np_material[self.idx(x,y)] = material_at(self.geometry, x, y) + if self.descriptor.d == 2: + for x, y in self.geometry.inner_cells(): + self.np_material[self.idx(x,y)] = material_at(self.geometry, x, y) + elif self.descriptor.d == 3: + for x, y, z in self.geometry.inner_cells(): + self.np_material[self.idx(x,y,z)] = material_at(self.geometry, x, y, z) cl.enqueue_copy(self.queue, self.cl_material, self.np_material).wait(); @@ -94,11 +116,11 @@ class Lattice: if self.tick: self.tick = False self.program.collide_and_stream( - self.queue, self.geometry.span(), (32,1), self.cl_pop_a, self.cl_pop_b, self.cl_material) + self.queue, self.geometry.span(), self.layout, self.cl_pop_a, self.cl_pop_b, self.cl_material) else: self.tick = True self.program.collide_and_stream( - self.queue, self.geometry.span(), (32,1), self.cl_pop_b, self.cl_pop_a, self.cl_material) + self.queue, self.geometry.span(), self.layout, self.cl_pop_b, self.cl_pop_a, self.cl_material) def sync(self): self.queue.finish() @@ -107,9 +129,9 @@ class Lattice: moments = numpy.ndarray(shape=(self.descriptor.d+1, self.geometry.volume), dtype=numpy.float32) if self.tick: self.program.collect_moments( - self.queue, self.geometry.span(), (32,1), self.cl_pop_b, self.cl_moments) + self.queue, self.geometry.span(), self.layout, self.cl_pop_b, self.cl_moments) else: self.program.collect_moments( - self.queue, self.geometry.span(), (32,1), self.cl_pop_a, self.cl_moments) + self.queue, self.geometry.span(), self.layout, self.cl_pop_a, self.cl_moments) cl.enqueue_copy(self.queue, moments, self.cl_moments).wait(); return moments diff --git a/ldc_2d.py b/ldc_2d.py new file mode 100644 index 0000000..0aa1e71 --- /dev/null +++ b/ldc_2d.py @@ -0,0 +1,79 @@ +import numpy +import time + +import matplotlib +import matplotlib.pyplot as plt +matplotlib.use('AGG') + +from lbm import Lattice, Geometry + +import symbolic.D2Q9 as D2Q9 + +def MLUPS(cells, steps, time): + return cells * steps / time * 1e-6 + +def generate_moment_plots(lattice, moments): + for i, m in enumerate(moments): + print("Generating plot %d of %d." % (i+1, len(moments))) + + velocity = numpy.ndarray(shape=tuple(reversed(lattice.geometry.inner_span()))) + for x, y in lattice.geometry.inner_cells(): + velocity[y-1,x-1] = numpy.sqrt(m[1,lattice.idx(x,y)]**2 + m[2,lattice.idx(x,y)]**2) + + plt.figure(figsize=(10, 10)) + plt.imshow(velocity, origin='lower', cmap=plt.get_cmap('seismic')) + plt.savefig("result/ldc_2d_%02d.png" % i, bbox_inches='tight', pad_inches=0) + +def cavity(geometry, x, y): + if x == 1 or y == 1 or x == geometry.size_x-2: + return 2 + elif y == geometry.size_y-2: + return 3 + else: + return 1 + +boundary = """ + if ( m == 2 ) { + u_0 = 0.0; + u_1 = 0.0; + } + if ( m == 3 ) { + u_0 = 0.1; + u_1 = 0.0; + } +""" + +nUpdates = 100000 +nStat = 5000 + +moments = [] + +print("Initializing simulation...\n") + +lattice = Lattice( + descriptor = D2Q9, + geometry = Geometry(256, 256), + + moments = D2Q9.moments(optimize = False), + collide = D2Q9.bgk(tau = 0.52), + + boundary_src = boundary) + +lattice.setup_geometry(cavity) + +print("Starting simulation using %d cells...\n" % lattice.geometry.volume) + +lastStat = time.time() + +for i in range(1,nUpdates+1): + lattice.evolve() + + if i % nStat == 0: + lattice.sync() + print("i = %4d; %3.0f MLUPS" % (i, MLUPS(lattice.geometry.volume, nStat, time.time() - lastStat))) + moments.append(lattice.get_moments()) + lastStat = time.time() + +print("\nConcluded simulation.\n") + +generate_moment_plots(lattice, moments) diff --git a/ldc_3d.py b/ldc_3d.py new file mode 100644 index 0000000..62f820e --- /dev/null +++ b/ldc_3d.py @@ -0,0 +1,97 @@ +import numpy +import time + +import matplotlib +import matplotlib.pyplot as plt +matplotlib.use('AGG') + +from lbm import Lattice, Geometry + +import symbolic.D3Q19 as D3Q19 + +def MLUPS(cells, steps, time): + return cells * steps / time * 1e-6 + +def generate_moment_plots(lattice, moments): + for i, m in enumerate(moments): + print("Generating plot %d of %d." % (i+1, len(moments))) + + velocity = numpy.ndarray(shape=tuple(reversed(lattice.geometry.inner_span()))) + + # plot x-z-plane + y = lattice.geometry.size_y//2 + for z in range(1,lattice.geometry.size_z-1): + for x in range(1,lattice.geometry.size_x-1): + velocity[z-1,y,x-1] = numpy.sqrt(m[1,lattice.idx(x,y,z)]**2 + m[2,lattice.idx(x,y,z)]**2 + m[3,lattice.idx(x,y,z)]**2) + + plt.figure(figsize=(20, 10)) + + plt.subplot(1, 2, 1) + plt.imshow(velocity[:,y,:], origin='lower', vmin=0.0, vmax=0.12, cmap=plt.get_cmap('seismic')) + + # plot y-z-plane + x = lattice.geometry.size_x//2 + for z in range(1,lattice.geometry.size_z-1): + for y in range(1,lattice.geometry.size_y-1): + velocity[z-1,y-1,x] = numpy.sqrt(m[1,lattice.idx(x,y,z)]**2 + m[2,lattice.idx(x,y,z)]**2 + m[3,lattice.idx(x,y,z)]**2) + + plt.subplot(1, 2, 2) + plt.imshow(velocity[:,:,x], origin='lower', vmin=0.0, vmax=0.15, cmap=plt.get_cmap('seismic')) + + plt.savefig("result/ldc_3d_%02d.png" % i, bbox_inches='tight', pad_inches=0) + +def cavity(geometry, x, y, z): + if x == 1 or y == 1 or z == 1 or x == geometry.size_x-2 or y == geometry.size_y-2: + return 2 + elif z == geometry.size_z-2: + return 3 + else: + return 1 + +boundary = """ + if ( m == 2 ) { + u_0 = 0.0; + u_1 = 0.0; + u_2 = 0.0; + } + if ( m == 3 ) { + u_0 = 0.1; + u_1 = 0.0; + u_2 = 0.0; + } +""" + +nUpdates = 20000 +nStat = 500 + +moments = [] + +print("Initializing simulation...\n") + +lattice = Lattice( + descriptor = D3Q19, + geometry = Geometry(128, 128, 128), + + moments = D3Q19.moments(optimize = False), + collide = D3Q19.bgk(tau = 0.52), + + boundary_src = boundary) + +lattice.setup_geometry(cavity) + +print("Starting simulation using %d cells...\n" % lattice.geometry.volume) + +lastStat = time.time() + +for i in range(1,nUpdates+1): + lattice.evolve() + + if i % nStat == 0: + lattice.sync() + print("i = %4d; %3.0f MLUPS" % (i, MLUPS(lattice.geometry.volume, nStat, time.time() - lastStat))) + moments.append(lattice.get_moments()) + lastStat = time.time() + +print("\nConcluded simulation.\n") + +generate_moment_plots(lattice, moments) diff --git a/lid_driven_cavity.py b/lid_driven_cavity.py deleted file mode 100644 index 873adeb..0000000 --- a/lid_driven_cavity.py +++ /dev/null @@ -1,79 +0,0 @@ -import numpy -import time - -import matplotlib -import matplotlib.pyplot as plt -matplotlib.use('AGG') - -from lbm import Lattice, Geometry - -import symbolic.D2Q9 as D2Q9 - -def MLUPS(cells, steps, time): - return cells * steps / time * 1e-6 - -def generate_moment_plots(lattice, moments): - for i, m in enumerate(moments): - print("Generating plot %d of %d." % (i+1, len(moments))) - - velocity = numpy.ndarray(shape=tuple(reversed(lattice.geometry.inner_span()))) - for x, y in lattice.geometry.inner_cells(): - velocity[y-1,x-1] = numpy.sqrt(m[1,lattice.idx(x,y)]**2 + m[2,lattice.idx(x,y)]**2) - - plt.figure(figsize=(10, 10)) - plt.imshow(velocity, origin='lower', cmap=plt.get_cmap('seismic')) - plt.savefig("result/ldc_" + str(i) + ".png", bbox_inches='tight', pad_inches=0) - -def cavity(geometry, x, y): - if x == 1 or y == 1 or x == geometry.size_x-2: - return 2 - elif y == geometry.size_y-2: - return 3 - else: - return 1 - -boundary = """ - if ( m == 2 ) { - u_0 = 0.0; - u_1 = 0.0; - } - if ( m == 3 ) { - u_0 = 0.1; - u_1 = 0.0; - } -""" - -nUpdates = 100000 -nStat = 5000 - -moments = [] - -print("Initializing simulation...\n") - -lattice = Lattice( - descriptor = D2Q9, - geometry = Geometry(256, 256), - - moments = D2Q9.moments(optimize = False), - collide = D2Q9.bgk(tau = 0.56), - - boundary_src = boundary) - -lattice.setup_geometry(cavity) - -print("Starting simulation using %d cells...\n" % lattice.geometry.volume) - -lastStat = time.time() - -for i in range(1,nUpdates+1): - lattice.evolve() - - if i % nStat == 0: - lattice.sync() - print("i = %4d; %3.0f MLUPS" % (i, MLUPS(lattice.geometry.volume, nStat, time.time() - lastStat))) - moments.append(lattice.get_moments()) - lastStat = time.time() - -print("\nConcluded simulation.\n") - -generate_moment_plots(lattice, moments) diff --git a/symbolic/D3Q19.py b/symbolic/D3Q19.py new file mode 100644 index 0000000..789b083 --- /dev/null +++ b/symbolic/D3Q19.py @@ -0,0 +1,59 @@ +from sympy import * +from sympy.codegen.ast import Assignment + +q = 19 +d = 3 + +c = [ Matrix(x) for x in [ + ( 0, 1, 1), (-1, 0, 1), ( 0, 0, 1), ( 1, 0, 1), ( 0, -1, 1), + (-1, 1, 0), ( 0, 1, 0), ( 1, 1, 0), (-1, 0, 0), ( 0, 0, 0), ( 1, 0, 0), (-1,-1, 0), ( 0, -1, 0), ( 1, -1, 0), + ( 0, 1,-1), (-1, 0,-1), ( 0, 0,-1), ( 1, 0,-1), ( 0, -1,-1) +]] + +w = [Rational(*x) for x in [ + (1,36), (1,36), (1,18), (1,36), (1,36), + (1,36), (1,18), (1,36), (1,18), (1,3), (1,18), (1,36), (1,18), (1,36), + (1,36), (1,36), (1,18), (1,36), (1,36) +]] + +c_s = sqrt(Rational(1,3)) + +f_next = symarray('f_next', q) +f_curr = symarray('f_curr', q) + +def moments(f = f_curr, optimize = True): + rho = symbols('rho') + u = Matrix(symarray('u', d)) + + exprs = [ Assignment(rho, sum(f)) ] + + for i, u_i in enumerate(u): + exprs.append(Assignment(u_i, sum([ (c_j*f[j])[i] for j, c_j in enumerate(c) ]) / sum(f))) + + if optimize: + return cse(exprs, optimizations='basic', symbols=numbered_symbols(prefix='m')) + else: + return ([], exprs) + +def equilibrium(): + rho = symbols('rho') + u = Matrix(symarray('u', d)) + + f_eq = [] + + for i, c_i in enumerate(c): + f_eq_i = w[i] * rho * ( 1 + + c_i.dot(u) / c_s**2 + + c_i.dot(u)**2 / (2*c_s**4) + - u.dot(u) / (2*c_s**2) ) + f_eq.append(f_eq_i) + + return f_eq + +def bgk(tau, f_eq = equilibrium(), optimize = True): + exprs = [ Assignment(f_next[i], f_curr[i] + 1/tau * ( f_eq_i - f_curr[i] )) for i, f_eq_i in enumerate(f_eq) ] + + if optimize: + return cse(exprs, optimizations='basic') + else: + return ([], exprs) diff --git a/template/kernel.mako b/template/kernel.mako index 7e930af..79cae66 100644 --- a/template/kernel.mako +++ b/template/kernel.mako @@ -1,7 +1,7 @@ __kernel void equilibrilize(__global __write_only float* f_next, __global __write_only float* f_prev) { - const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0); + const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0); __global __write_only float* preshifted_f_next = f_next + gid; __global __write_only float* preshifted_f_prev = f_prev + gid; @@ -17,21 +17,25 @@ __kernel void equilibrilize(__global __write_only float* f_next, } <% -def direction_index(c_i): - return (c_i[0]+1) + 3*(1-c_i[1]) - def neighbor_offset(c_i): - if c_i[1] == 0: - return c_i[0] - else: - return c_i[1]*geometry.size_x + c_i[0] + if descriptor.d == 2: + if c_i[1] == 0: + return c_i[0] + else: + return c_i[1]*geometry.size_x + c_i[0] + elif descriptor.d == 3: + if c_i[1] == 0: + return c_i[2]*geometry.size_x*geometry.size_y + c_i[0] + else: + return c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0] + %> __kernel void collide_and_stream(__global __write_only float* f_next, __global __read_only float* f_prev, __global __read_only int* material) { - const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0); + const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0); const int m = material[gid]; @@ -43,7 +47,7 @@ __kernel void collide_and_stream(__global __write_only float* f_next, __global __read_only float* preshifted_f_prev = f_prev + gid; % for i, c_i in enumerate(descriptor.c): - const float f_curr_${i} = preshifted_f_prev[${direction_index(c_i)*geometry.volume + neighbor_offset(-c_i)}]; + const float f_curr_${i} = preshifted_f_prev[${i*geometry.volume + neighbor_offset(-c_i)}]; % endfor % for i, expr in enumerate(moments_subexpr): @@ -72,7 +76,7 @@ __kernel void collide_and_stream(__global __write_only float* f_next, __kernel void collect_moments(__global __read_only float* f, __global __write_only float* moments) { - const unsigned int gid = get_global_id(1)*${geometry.size_x} + get_global_id(0); + const unsigned int gid = get_global_id(2)*(${geometry.size_x*geometry.size_y}) + get_global_id(1)*${geometry.size_x} + get_global_id(0); __global __read_only float* preshifted_f = f + gid; -- cgit v1.2.3