From d540b701836dbcdef727947f52a0ca01430968a6 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Wed, 30 Oct 2019 19:45:39 +0100 Subject: Rename folder, add basic README.md --- README.md | 3 + ldc_2d/cpp/CMakeLists.txt | 44 --------- ldc_2d/cpp/README.md | 18 ---- ldc_2d/cpp/config.py | 13 --- ldc_2d/cpp/generate.py | 40 -------- ldc_2d/cpp/ldc.cpp.mako | 176 --------------------------------- ldc_2d/opencl/ldc_2d.py | 101 ------------------- ldc_2d/opencl/simulation.py | 105 -------------------- lid_driven_cavity/cpp/CMakeLists.txt | 44 +++++++++ lid_driven_cavity/cpp/README.md | 18 ++++ lid_driven_cavity/cpp/config.py | 13 +++ lid_driven_cavity/cpp/generate.py | 40 ++++++++ lid_driven_cavity/cpp/ldc.cpp.mako | 176 +++++++++++++++++++++++++++++++++ lid_driven_cavity/opencl/ldc_2d.py | 101 +++++++++++++++++++ lid_driven_cavity/opencl/simulation.py | 105 ++++++++++++++++++++ 15 files changed, 500 insertions(+), 497 deletions(-) create mode 100644 README.md delete mode 100644 ldc_2d/cpp/CMakeLists.txt delete mode 100644 ldc_2d/cpp/README.md delete mode 100644 ldc_2d/cpp/config.py delete mode 100755 ldc_2d/cpp/generate.py delete mode 100644 ldc_2d/cpp/ldc.cpp.mako delete mode 100644 ldc_2d/opencl/ldc_2d.py delete mode 100644 ldc_2d/opencl/simulation.py create mode 100644 lid_driven_cavity/cpp/CMakeLists.txt create mode 100644 lid_driven_cavity/cpp/README.md create mode 100644 lid_driven_cavity/cpp/config.py create mode 100755 lid_driven_cavity/cpp/generate.py create mode 100644 lid_driven_cavity/cpp/ldc.cpp.mako create mode 100644 lid_driven_cavity/opencl/ldc_2d.py create mode 100644 lid_driven_cavity/opencl/simulation.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..abbc0e0 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# boltzgen examples + +Various examples on how to use the [boltzgen](https://github.com/KnairdA/boltzgen) library for generating efficient implementations of the Lattice Boltzmann Method. diff --git a/ldc_2d/cpp/CMakeLists.txt b/ldc_2d/cpp/CMakeLists.txt deleted file mode 100644 index 5b5fb90..0000000 --- a/ldc_2d/cpp/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -cmake_minimum_required(VERSION 3.10) -project(ldc LANGUAGES CXX) - -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE Release) -endif() - -set(CMAKE_CXX_FLAGS_RELEASE "-O3") - -add_custom_command( - OUTPUT - kernel.h - ldc.cpp - COMMAND - ${CMAKE_CURRENT_SOURCE_DIR}/generate.py --output ${CMAKE_CURRENT_BINARY_DIR} - WORKING_DIRECTORY - ${CMAKE_CURRENT_SOURCE_DIR} - DEPENDS - generate.py config.py ldc.cpp.mako -) - -include_directories( - ${CMAKE_BINARY_DIR} -) - -add_executable( - ldc - ${CMAKE_CURRENT_BINARY_DIR}/ldc.cpp -) - -target_compile_features( - ldc - PUBLIC - cxx_std_17 -) - -find_package(OpenMP) -if(OpenMP_CXX_FOUND) - target_link_libraries( - ldc - PUBLIC - OpenMP::OpenMP_CXX - ) -endif() diff --git a/ldc_2d/cpp/README.md b/ldc_2d/cpp/README.md deleted file mode 100644 index 8e84c83..0000000 --- a/ldc_2d/cpp/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# Lid driven cavity - -This example models the common lid driven cavity example. -Note that the actual optimized C++ implementation is generated using the _boltzgen_ library. - -See `config.py` for various configuration options. Both 2D and 3D are supported. - -## Build instructions - -``` -mkdir build -cd build -cmake .. -make -./ldc -``` - -This should result in some summarizing CLI output in addition to a `test.vtk` file for visualization in Paraview. diff --git a/ldc_2d/cpp/config.py b/ldc_2d/cpp/config.py deleted file mode 100644 index 3ed8bd5..0000000 --- a/ldc_2d/cpp/config.py +++ /dev/null @@ -1,13 +0,0 @@ -from boltzgen.lbm.model import * -from boltzgen import Geometry - -descriptor = D2Q9 -geometry = Geometry(512, 512) -tau = 0.52 -precision = 'single' - -## 3D LDC -#descriptor = D3Q19 -#geometry = Geometry(64, 64, 64) -#tau = 0.52 -#precision = 'single' diff --git a/ldc_2d/cpp/generate.py b/ldc_2d/cpp/generate.py deleted file mode 100755 index 4222e98..0000000 --- a/ldc_2d/cpp/generate.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python - -import argparse - -from boltzgen import LBM, Generator, Geometry -from boltzgen.lbm.model import D2Q9 - -import config - -argparser = argparse.ArgumentParser( - description = 'Generate a C++ implementation of a lid driven cavity simulation using LBM') -argparser.add_argument( - '--output', required = False, help = 'Target directory for the generated sources') - -args = argparser.parse_args() - -lbm = LBM(config.descriptor) -generator = Generator( - descriptor = config.descriptor, - moments = lbm.moments(), - collision = lbm.bgk(f_eq = lbm.equilibrium(), tau = config.tau), - target = 'cpp', - precision = config.precision, - index = 'XYZ', - layout = 'AOS') - -if args.output is None: - args.output = '.' - -functions = ['collide_and_stream', 'equilibrilize', 'collect_moments', 'momenta_boundary'] - -with open('%s/kernel.h' % args.output, 'w') as kernel: - kernel.write(generator.kernel(config.geometry, functions)) - -ldc_src = '' -with open('ldc.cpp.mako', 'r') as template: - ldc_src = template.read() - -with open('%s/ldc.cpp' % args.output, 'w') as app: - app.write(generator.custom(config.geometry, ldc_src)) diff --git a/ldc_2d/cpp/ldc.cpp.mako b/ldc_2d/cpp/ldc.cpp.mako deleted file mode 100644 index 5d480de..0000000 --- a/ldc_2d/cpp/ldc.cpp.mako +++ /dev/null @@ -1,176 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "kernel.h" - -void collect_moments_to_vtk(const std::string& path, ${float_type}* f) { - std::ofstream fout; - fout.open(path.c_str()); - - fout << "# vtk DataFile Version 3.0\n"; - fout << "lbm_output\n"; - fout << "ASCII\n"; - fout << "DATASET RECTILINEAR_GRID\n"; -% if descriptor.d == 2: - fout << "DIMENSIONS " << ${geometry.size_x-2} << " " << ${geometry.size_y-2} << " 1" << "\n"; -% else: - fout << "DIMENSIONS " << ${geometry.size_x-2} << " " << ${geometry.size_y-2} << " " << ${geometry.size_z-2} << "\n"; -% endif - - fout << "X_COORDINATES " << ${geometry.size_x-2} << " float\n"; - for( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { - fout << x << " "; - } - - fout << "\nY_COORDINATES " << ${geometry.size_y-2} << " float\n"; - for( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { - fout << y << " "; - } - -% if descriptor.d == 2: - fout << "\nZ_COORDINATES " << 1 << " float\n"; - fout << 0 << "\n"; - fout << "POINT_DATA " << ${(geometry.size_x-2) * (geometry.size_y-2)} << "\n"; -% else: - fout << "\nZ_COORDINATES " << ${geometry.size_z-2} << " float\n"; - for( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { - fout << z << " "; - } - fout << "\nPOINT_DATA " << ${(geometry.size_x-2) * (geometry.size_y-2) * (geometry.size_z-2)} << "\n"; -% endif - - ${float_type} rho; - ${float_type} u[${descriptor.d}]; - - fout << "VECTORS velocity float\n"; -% if descriptor.d == 2: - for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { - for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { - collect_moments(f, x*${geometry.size_y}+y, rho, u); - fout << u[0] << " " << u[1] << " 0\n"; - } - } -% else: - for ( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { - for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { - for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { - collect_moments(f, x*${geometry.size_y*geometry.size_z}+y*${geometry.size_z}+z, rho, u); - fout << u[0] << " " << u[1] << " " << u[2] << "\n"; - } - } - } -% endif - - fout << "SCALARS density float 1\n"; - fout << "LOOKUP_TABLE default\n"; -% if descriptor.d == 2: - for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { - for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { - collect_moments(f, x*${geometry.size_y}+y, rho, u); - fout << rho << "\n"; - } - } -% else: - for ( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { - for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { - for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { - collect_moments(f, x*${geometry.size_y*geometry.size_z}+y*${geometry.size_z}+z, rho, u); - fout << rho << "\n"; - } - } - } -% endif - - fout.close(); -} - -void simulate(std::size_t nStep) -{ - auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q}); - auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q}); - - ${float_type}* f_prev = f_a.get(); - ${float_type}* f_next = f_b.get(); - - std::vector bulk; - std::vector lid_bc; - std::vector box_bc; - - for (int iX = 1; iX < ${geometry.size_x-1}; ++iX) { - for (int iY = 1; iY < ${geometry.size_y-1}; ++iY) { -% if descriptor.d == 2: - const std::size_t iCell = iX*${geometry.size_y} + iY; - if (iY == ${geometry.size_y-2}) { - lid_bc.emplace_back(iCell); - } else if (iX == 1 || iX == ${geometry.size_x-2} || iY == 1) { - box_bc.emplace_back(iCell); - } else { - bulk.emplace_back(iCell); - } -% elif descriptor.d == 3: - for (int iZ = 0; iZ < ${geometry.size_z}; ++iZ) { - const std::size_t iCell = iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ; - if (iZ == ${geometry.size_z-2}) { - lid_bc.emplace_back(iCell); - } else if (iX == 1 || iX == ${geometry.size_x-2} || iY == 1 || iY == ${geometry.size_y-2} || iZ == 1) { - box_bc.emplace_back(iCell); - } else { - bulk.emplace_back(iCell); - } - } -% endif - } - } - - std::cout << "#bulk : " << bulk.size() << std::endl; - std::cout << "#lid : " << lid_bc.size() << std::endl; - std::cout << "#wall : " << box_bc.size() << std::endl; - std::cout << "#steps : " << nStep << std::endl; - std::cout << std::endl; - - for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) { - equilibrilize(f_prev, f_next, iCell); - } - - const auto start = std::chrono::high_resolution_clock::now(); - - for (std::size_t iStep = 0; iStep < nStep; ++iStep) { - if (iStep % 2 == 0) { - f_next = f_a.get(); - f_prev = f_b.get(); - } else { - f_next = f_b.get(); - f_prev = f_a.get(); - } - -#pragma omp parallel for - for (std::size_t i = 0; i < bulk.size(); ++i) { - collide_and_stream(f_next, f_prev, bulk[i]); - } - ${float_type} u[${descriptor.d}] { 0. }; -#pragma omp parallel for - for (std::size_t i = 0; i < box_bc.size(); ++i) { - velocity_momenta_boundary(f_next, f_prev, box_bc[i], u); - } - u[0] = 0.05; -#pragma omp parallel for - for (std::size_t i = 0; i < lid_bc.size(); ++i) { - velocity_momenta_boundary(f_next, f_prev, lid_bc[i], u); - } - } - - auto duration = std::chrono::duration_cast>( - std::chrono::high_resolution_clock::now() - start); - - std::cout << "MLUPS : " << nStep*${geometry.volume}/(1e6*duration.count()) << std::endl; - - collect_moments_to_vtk("test.vtk", f_next); -} - -int main() { - simulate(10000); -} diff --git a/ldc_2d/opencl/ldc_2d.py b/ldc_2d/opencl/ldc_2d.py deleted file mode 100644 index 7ca7252..0000000 --- a/ldc_2d/opencl/ldc_2d.py +++ /dev/null @@ -1,101 +0,0 @@ -import numpy -import time - -import matplotlib -matplotlib.use('AGG') -import matplotlib.pyplot as plt - -from boltzgen import LBM, Generator, Geometry -from boltzgen.lbm.model import D2Q9 - -from simulation import Lattice, CellList - -def MLUPS(cells, steps, time): - return cells * steps / time * 1e-6 - -def generate_moment_plots(lattice, moments): - for i, m in enumerate(moments): - print("Generating plot %d of %d." % (i+1, len(moments))) - - gid = lattice.memory.gid - velocity = numpy.reshape( - [ numpy.sqrt(m[gid(x,y)*3+1]**2 + m[gid(x,y)*3+2]**2) for x, y in lattice.geometry.inner_cells() ], - lattice.geometry.inner_size()) - - plt.figure(figsize=(10, 10)) - plt.imshow(velocity, origin='lower', cmap=plt.get_cmap('seismic')) - plt.savefig("result/ldc_2d_%02d.png" % i, bbox_inches='tight', pad_inches=0) - -nUpdates = 100000 -nStat = 10000 - -geometry = Geometry(512, 512) - -print("Generating kernel using boltzgen...\n") - -functions = ['collide_and_stream', 'equilibrilize', 'collect_moments', 'momenta_boundary'] -extras = ['cell_list_dispatch'] - -precision = 'single' - -lbm = LBM(D2Q9) -generator = Generator( - descriptor = D2Q9, - moments = lbm.moments(), - collision = lbm.bgk(f_eq = lbm.equilibrium(), tau = 0.6), - target = 'cl', - precision = precision, - index = 'ZYX', - layout = 'SOA') - -kernel_src = generator.kernel(geometry, functions, extras) -kernel_src += generator.custom(geometry, """ -__kernel void equilibrilize(__global ${float_type}* f_next, - __global ${float_type}* f_prev) -{ - const unsigned int gid = ${index.gid('get_global_id(0)', 'get_global_id(1)')}; - equilibrilize_gid(f_next, f_prev, gid); -} - -__kernel void collect_moments(__global ${float_type}* f, - __global ${float_type}* moments) -{ - const unsigned int gid = ${index.gid('get_global_id(0)', 'get_global_id(1)')}; - collect_moments_gid(f, moments, gid); -} -""") - -print("Initializing simulation...\n") - -lattice = Lattice(geometry, kernel_src, D2Q9, precision = precision) -gid = lattice.memory.gid - -bulk_cells = CellList(lattice.context, lattice.queue, lattice.float_type, - [ gid(x,y) for x, y in geometry.inner_cells() if x > 1 and x < geometry.size_x-2 and y > 1 and y < geometry.size_y-2 ]) -wall_cells = CellList(lattice.context, lattice.queue, lattice.float_type, - [ gid(x,y) for x, y in geometry.inner_cells() if x == 1 or y == 1 or x == geometry.size_x-2 ]) -lid_cells = CellList(lattice.context, lattice.queue, lattice.float_type, - [ gid(x,y) for x, y in geometry.inner_cells() if y == geometry.size_y-2 ]) - -lattice.schedule('collide_and_stream_cells', bulk_cells) -lattice.schedule('velocity_momenta_boundary_cells', wall_cells, numpy.array([0.0, 0.0], dtype=lattice.float_type[0])) -lattice.schedule('velocity_momenta_boundary_cells', lid_cells, numpy.array([0.1, 0.0], dtype=lattice.float_type[0])) - -print("Starting simulation using %d cells...\n" % lattice.geometry.volume) - -moments = [] - -lastStat = time.time() - -for i in range(1,nUpdates+1): - lattice.evolve() - - if i % nStat == 0: - lattice.sync() - print("i = %4d; %3.0f MLUPS" % (i, MLUPS(lattice.geometry.volume, nStat, time.time() - lastStat))) - moments.append(lattice.get_moments()) - lastStat = time.time() - -print("\nConcluded simulation.\n") - -generate_moment_plots(lattice, moments) diff --git a/ldc_2d/opencl/simulation.py b/ldc_2d/opencl/simulation.py deleted file mode 100644 index 7625609..0000000 --- a/ldc_2d/opencl/simulation.py +++ /dev/null @@ -1,105 +0,0 @@ -import pyopencl as cl -mf = cl.mem_flags - -import numpy - -class Memory: - def __init__(self, descriptor, geometry, context, float_type): - self.context = context - self.float_type = float_type - - self.size_x = geometry.size_x - self.size_y = geometry.size_y - self.size_z = geometry.size_z - self.volume = self.size_x * self.size_y * self.size_z - - self.pop_size = descriptor.q * self.volume * self.float_type(0).nbytes - self.moments_size = 3 * self.volume * self.float_type(0).nbytes - - self.cl_pop_a = cl.Buffer(self.context, mf.READ_WRITE, size=self.pop_size) - self.cl_pop_b = cl.Buffer(self.context, mf.READ_WRITE, size=self.pop_size) - - self.cl_moments = cl.Buffer(self.context, mf.WRITE_ONLY, size=self.moments_size) - - def gid(self, x, y, z = 0): - return z * (self.size_x*self.size_y) + y * self.size_x + x; - -class CellList: - def __init__(self, context, queue, float_type, cells): - self.cl_cells = cl.Buffer(context, mf.READ_ONLY, size=len(cells) * numpy.uint32(0).nbytes) - self.np_cells = numpy.ndarray(shape=(len(cells), 1), dtype=numpy.uint32) - self.np_cells[:,0] = cells[:] - - cl.enqueue_copy(queue, self.cl_cells, self.np_cells).wait(); - - def get(self): - return self.cl_cells - - def size(self): - return (len(self.np_cells), 1, 1) - -class Lattice: - def __init__(self, geometry, kernel_src, descriptor, platform = 0, precision = 'single'): - self.geometry = geometry - self.descriptor = descriptor - - self.float_type = { - 'single': (numpy.float32, 'float'), - 'double': (numpy.float64, 'double'), - }.get(precision, None) - - self.platform = cl.get_platforms()[platform] - self.layout = None - - self.context = cl.Context( - properties=[(cl.context_properties.PLATFORM, self.platform)]) - - self.queue = cl.CommandQueue(self.context) - - self.memory = Memory(descriptor, self.geometry, self.context, self.float_type[0]) - self.tick = False - - self.compiler_args = { - 'single': '-cl-single-precision-constant -cl-fast-relaxed-math', - 'double': '-cl-fast-relaxed-math' - }.get(precision, None) - - self.build_kernel(kernel_src) - - self.program.equilibrilize( - self.queue, self.geometry.size(), self.layout, self.memory.cl_pop_a, self.memory.cl_pop_b).wait() - - self.tasks = [] - - def build_kernel(self, src): - self.program = cl.Program(self.context, src).build(self.compiler_args) - - def schedule(self, f, cells, *params): - self.tasks += [ (eval("self.program.%s" % f), cells, params) ] - - def evolve(self): - if self.tick: - self.tick = False - for f, cells, params in self.tasks: - f(self.queue, cells.size(), self.layout, self.memory.cl_pop_a, self.memory.cl_pop_b, cells.get(), *params) - else: - self.tick = True - for f, cells, params in self.tasks: - f(self.queue, cells.size(), self.layout, self.memory.cl_pop_b, self.memory.cl_pop_a, cells.get(), *params) - - def sync(self): - self.queue.finish() - - def get_moments(self): - moments = numpy.ndarray(shape=(self.memory.volume*(self.descriptor.d+1),1), dtype=self.float_type[0]) - - if self.tick: - self.program.collect_moments( - self.queue, self.geometry.size(), self.layout, self.memory.cl_pop_b, self.memory.cl_moments) - else: - self.program.collect_moments( - self.queue, self.geometry.size(), self.layout, self.memory.cl_pop_a, self.memory.cl_moments) - - cl.enqueue_copy(self.queue, moments, self.memory.cl_moments).wait(); - - return moments diff --git a/lid_driven_cavity/cpp/CMakeLists.txt b/lid_driven_cavity/cpp/CMakeLists.txt new file mode 100644 index 0000000..5b5fb90 --- /dev/null +++ b/lid_driven_cavity/cpp/CMakeLists.txt @@ -0,0 +1,44 @@ +cmake_minimum_required(VERSION 3.10) +project(ldc LANGUAGES CXX) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +set(CMAKE_CXX_FLAGS_RELEASE "-O3") + +add_custom_command( + OUTPUT + kernel.h + ldc.cpp + COMMAND + ${CMAKE_CURRENT_SOURCE_DIR}/generate.py --output ${CMAKE_CURRENT_BINARY_DIR} + WORKING_DIRECTORY + ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS + generate.py config.py ldc.cpp.mako +) + +include_directories( + ${CMAKE_BINARY_DIR} +) + +add_executable( + ldc + ${CMAKE_CURRENT_BINARY_DIR}/ldc.cpp +) + +target_compile_features( + ldc + PUBLIC + cxx_std_17 +) + +find_package(OpenMP) +if(OpenMP_CXX_FOUND) + target_link_libraries( + ldc + PUBLIC + OpenMP::OpenMP_CXX + ) +endif() diff --git a/lid_driven_cavity/cpp/README.md b/lid_driven_cavity/cpp/README.md new file mode 100644 index 0000000..8e84c83 --- /dev/null +++ b/lid_driven_cavity/cpp/README.md @@ -0,0 +1,18 @@ +# Lid driven cavity + +This example models the common lid driven cavity example. +Note that the actual optimized C++ implementation is generated using the _boltzgen_ library. + +See `config.py` for various configuration options. Both 2D and 3D are supported. + +## Build instructions + +``` +mkdir build +cd build +cmake .. +make +./ldc +``` + +This should result in some summarizing CLI output in addition to a `test.vtk` file for visualization in Paraview. diff --git a/lid_driven_cavity/cpp/config.py b/lid_driven_cavity/cpp/config.py new file mode 100644 index 0000000..3ed8bd5 --- /dev/null +++ b/lid_driven_cavity/cpp/config.py @@ -0,0 +1,13 @@ +from boltzgen.lbm.model import * +from boltzgen import Geometry + +descriptor = D2Q9 +geometry = Geometry(512, 512) +tau = 0.52 +precision = 'single' + +## 3D LDC +#descriptor = D3Q19 +#geometry = Geometry(64, 64, 64) +#tau = 0.52 +#precision = 'single' diff --git a/lid_driven_cavity/cpp/generate.py b/lid_driven_cavity/cpp/generate.py new file mode 100755 index 0000000..4222e98 --- /dev/null +++ b/lid_driven_cavity/cpp/generate.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +import argparse + +from boltzgen import LBM, Generator, Geometry +from boltzgen.lbm.model import D2Q9 + +import config + +argparser = argparse.ArgumentParser( + description = 'Generate a C++ implementation of a lid driven cavity simulation using LBM') +argparser.add_argument( + '--output', required = False, help = 'Target directory for the generated sources') + +args = argparser.parse_args() + +lbm = LBM(config.descriptor) +generator = Generator( + descriptor = config.descriptor, + moments = lbm.moments(), + collision = lbm.bgk(f_eq = lbm.equilibrium(), tau = config.tau), + target = 'cpp', + precision = config.precision, + index = 'XYZ', + layout = 'AOS') + +if args.output is None: + args.output = '.' + +functions = ['collide_and_stream', 'equilibrilize', 'collect_moments', 'momenta_boundary'] + +with open('%s/kernel.h' % args.output, 'w') as kernel: + kernel.write(generator.kernel(config.geometry, functions)) + +ldc_src = '' +with open('ldc.cpp.mako', 'r') as template: + ldc_src = template.read() + +with open('%s/ldc.cpp' % args.output, 'w') as app: + app.write(generator.custom(config.geometry, ldc_src)) diff --git a/lid_driven_cavity/cpp/ldc.cpp.mako b/lid_driven_cavity/cpp/ldc.cpp.mako new file mode 100644 index 0000000..5d480de --- /dev/null +++ b/lid_driven_cavity/cpp/ldc.cpp.mako @@ -0,0 +1,176 @@ +#include +#include +#include +#include +#include +#include + +#include "kernel.h" + +void collect_moments_to_vtk(const std::string& path, ${float_type}* f) { + std::ofstream fout; + fout.open(path.c_str()); + + fout << "# vtk DataFile Version 3.0\n"; + fout << "lbm_output\n"; + fout << "ASCII\n"; + fout << "DATASET RECTILINEAR_GRID\n"; +% if descriptor.d == 2: + fout << "DIMENSIONS " << ${geometry.size_x-2} << " " << ${geometry.size_y-2} << " 1" << "\n"; +% else: + fout << "DIMENSIONS " << ${geometry.size_x-2} << " " << ${geometry.size_y-2} << " " << ${geometry.size_z-2} << "\n"; +% endif + + fout << "X_COORDINATES " << ${geometry.size_x-2} << " float\n"; + for( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { + fout << x << " "; + } + + fout << "\nY_COORDINATES " << ${geometry.size_y-2} << " float\n"; + for( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { + fout << y << " "; + } + +% if descriptor.d == 2: + fout << "\nZ_COORDINATES " << 1 << " float\n"; + fout << 0 << "\n"; + fout << "POINT_DATA " << ${(geometry.size_x-2) * (geometry.size_y-2)} << "\n"; +% else: + fout << "\nZ_COORDINATES " << ${geometry.size_z-2} << " float\n"; + for( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { + fout << z << " "; + } + fout << "\nPOINT_DATA " << ${(geometry.size_x-2) * (geometry.size_y-2) * (geometry.size_z-2)} << "\n"; +% endif + + ${float_type} rho; + ${float_type} u[${descriptor.d}]; + + fout << "VECTORS velocity float\n"; +% if descriptor.d == 2: + for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { + for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { + collect_moments(f, x*${geometry.size_y}+y, rho, u); + fout << u[0] << " " << u[1] << " 0\n"; + } + } +% else: + for ( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { + for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { + for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { + collect_moments(f, x*${geometry.size_y*geometry.size_z}+y*${geometry.size_z}+z, rho, u); + fout << u[0] << " " << u[1] << " " << u[2] << "\n"; + } + } + } +% endif + + fout << "SCALARS density float 1\n"; + fout << "LOOKUP_TABLE default\n"; +% if descriptor.d == 2: + for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { + for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { + collect_moments(f, x*${geometry.size_y}+y, rho, u); + fout << rho << "\n"; + } + } +% else: + for ( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { + for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { + for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { + collect_moments(f, x*${geometry.size_y*geometry.size_z}+y*${geometry.size_z}+z, rho, u); + fout << rho << "\n"; + } + } + } +% endif + + fout.close(); +} + +void simulate(std::size_t nStep) +{ + auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q}); + auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q}); + + ${float_type}* f_prev = f_a.get(); + ${float_type}* f_next = f_b.get(); + + std::vector bulk; + std::vector lid_bc; + std::vector box_bc; + + for (int iX = 1; iX < ${geometry.size_x-1}; ++iX) { + for (int iY = 1; iY < ${geometry.size_y-1}; ++iY) { +% if descriptor.d == 2: + const std::size_t iCell = iX*${geometry.size_y} + iY; + if (iY == ${geometry.size_y-2}) { + lid_bc.emplace_back(iCell); + } else if (iX == 1 || iX == ${geometry.size_x-2} || iY == 1) { + box_bc.emplace_back(iCell); + } else { + bulk.emplace_back(iCell); + } +% elif descriptor.d == 3: + for (int iZ = 0; iZ < ${geometry.size_z}; ++iZ) { + const std::size_t iCell = iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ; + if (iZ == ${geometry.size_z-2}) { + lid_bc.emplace_back(iCell); + } else if (iX == 1 || iX == ${geometry.size_x-2} || iY == 1 || iY == ${geometry.size_y-2} || iZ == 1) { + box_bc.emplace_back(iCell); + } else { + bulk.emplace_back(iCell); + } + } +% endif + } + } + + std::cout << "#bulk : " << bulk.size() << std::endl; + std::cout << "#lid : " << lid_bc.size() << std::endl; + std::cout << "#wall : " << box_bc.size() << std::endl; + std::cout << "#steps : " << nStep << std::endl; + std::cout << std::endl; + + for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) { + equilibrilize(f_prev, f_next, iCell); + } + + const auto start = std::chrono::high_resolution_clock::now(); + + for (std::size_t iStep = 0; iStep < nStep; ++iStep) { + if (iStep % 2 == 0) { + f_next = f_a.get(); + f_prev = f_b.get(); + } else { + f_next = f_b.get(); + f_prev = f_a.get(); + } + +#pragma omp parallel for + for (std::size_t i = 0; i < bulk.size(); ++i) { + collide_and_stream(f_next, f_prev, bulk[i]); + } + ${float_type} u[${descriptor.d}] { 0. }; +#pragma omp parallel for + for (std::size_t i = 0; i < box_bc.size(); ++i) { + velocity_momenta_boundary(f_next, f_prev, box_bc[i], u); + } + u[0] = 0.05; +#pragma omp parallel for + for (std::size_t i = 0; i < lid_bc.size(); ++i) { + velocity_momenta_boundary(f_next, f_prev, lid_bc[i], u); + } + } + + auto duration = std::chrono::duration_cast>( + std::chrono::high_resolution_clock::now() - start); + + std::cout << "MLUPS : " << nStep*${geometry.volume}/(1e6*duration.count()) << std::endl; + + collect_moments_to_vtk("test.vtk", f_next); +} + +int main() { + simulate(10000); +} diff --git a/lid_driven_cavity/opencl/ldc_2d.py b/lid_driven_cavity/opencl/ldc_2d.py new file mode 100644 index 0000000..7ca7252 --- /dev/null +++ b/lid_driven_cavity/opencl/ldc_2d.py @@ -0,0 +1,101 @@ +import numpy +import time + +import matplotlib +matplotlib.use('AGG') +import matplotlib.pyplot as plt + +from boltzgen import LBM, Generator, Geometry +from boltzgen.lbm.model import D2Q9 + +from simulation import Lattice, CellList + +def MLUPS(cells, steps, time): + return cells * steps / time * 1e-6 + +def generate_moment_plots(lattice, moments): + for i, m in enumerate(moments): + print("Generating plot %d of %d." % (i+1, len(moments))) + + gid = lattice.memory.gid + velocity = numpy.reshape( + [ numpy.sqrt(m[gid(x,y)*3+1]**2 + m[gid(x,y)*3+2]**2) for x, y in lattice.geometry.inner_cells() ], + lattice.geometry.inner_size()) + + plt.figure(figsize=(10, 10)) + plt.imshow(velocity, origin='lower', cmap=plt.get_cmap('seismic')) + plt.savefig("result/ldc_2d_%02d.png" % i, bbox_inches='tight', pad_inches=0) + +nUpdates = 100000 +nStat = 10000 + +geometry = Geometry(512, 512) + +print("Generating kernel using boltzgen...\n") + +functions = ['collide_and_stream', 'equilibrilize', 'collect_moments', 'momenta_boundary'] +extras = ['cell_list_dispatch'] + +precision = 'single' + +lbm = LBM(D2Q9) +generator = Generator( + descriptor = D2Q9, + moments = lbm.moments(), + collision = lbm.bgk(f_eq = lbm.equilibrium(), tau = 0.6), + target = 'cl', + precision = precision, + index = 'ZYX', + layout = 'SOA') + +kernel_src = generator.kernel(geometry, functions, extras) +kernel_src += generator.custom(geometry, """ +__kernel void equilibrilize(__global ${float_type}* f_next, + __global ${float_type}* f_prev) +{ + const unsigned int gid = ${index.gid('get_global_id(0)', 'get_global_id(1)')}; + equilibrilize_gid(f_next, f_prev, gid); +} + +__kernel void collect_moments(__global ${float_type}* f, + __global ${float_type}* moments) +{ + const unsigned int gid = ${index.gid('get_global_id(0)', 'get_global_id(1)')}; + collect_moments_gid(f, moments, gid); +} +""") + +print("Initializing simulation...\n") + +lattice = Lattice(geometry, kernel_src, D2Q9, precision = precision) +gid = lattice.memory.gid + +bulk_cells = CellList(lattice.context, lattice.queue, lattice.float_type, + [ gid(x,y) for x, y in geometry.inner_cells() if x > 1 and x < geometry.size_x-2 and y > 1 and y < geometry.size_y-2 ]) +wall_cells = CellList(lattice.context, lattice.queue, lattice.float_type, + [ gid(x,y) for x, y in geometry.inner_cells() if x == 1 or y == 1 or x == geometry.size_x-2 ]) +lid_cells = CellList(lattice.context, lattice.queue, lattice.float_type, + [ gid(x,y) for x, y in geometry.inner_cells() if y == geometry.size_y-2 ]) + +lattice.schedule('collide_and_stream_cells', bulk_cells) +lattice.schedule('velocity_momenta_boundary_cells', wall_cells, numpy.array([0.0, 0.0], dtype=lattice.float_type[0])) +lattice.schedule('velocity_momenta_boundary_cells', lid_cells, numpy.array([0.1, 0.0], dtype=lattice.float_type[0])) + +print("Starting simulation using %d cells...\n" % lattice.geometry.volume) + +moments = [] + +lastStat = time.time() + +for i in range(1,nUpdates+1): + lattice.evolve() + + if i % nStat == 0: + lattice.sync() + print("i = %4d; %3.0f MLUPS" % (i, MLUPS(lattice.geometry.volume, nStat, time.time() - lastStat))) + moments.append(lattice.get_moments()) + lastStat = time.time() + +print("\nConcluded simulation.\n") + +generate_moment_plots(lattice, moments) diff --git a/lid_driven_cavity/opencl/simulation.py b/lid_driven_cavity/opencl/simulation.py new file mode 100644 index 0000000..7625609 --- /dev/null +++ b/lid_driven_cavity/opencl/simulation.py @@ -0,0 +1,105 @@ +import pyopencl as cl +mf = cl.mem_flags + +import numpy + +class Memory: + def __init__(self, descriptor, geometry, context, float_type): + self.context = context + self.float_type = float_type + + self.size_x = geometry.size_x + self.size_y = geometry.size_y + self.size_z = geometry.size_z + self.volume = self.size_x * self.size_y * self.size_z + + self.pop_size = descriptor.q * self.volume * self.float_type(0).nbytes + self.moments_size = 3 * self.volume * self.float_type(0).nbytes + + self.cl_pop_a = cl.Buffer(self.context, mf.READ_WRITE, size=self.pop_size) + self.cl_pop_b = cl.Buffer(self.context, mf.READ_WRITE, size=self.pop_size) + + self.cl_moments = cl.Buffer(self.context, mf.WRITE_ONLY, size=self.moments_size) + + def gid(self, x, y, z = 0): + return z * (self.size_x*self.size_y) + y * self.size_x + x; + +class CellList: + def __init__(self, context, queue, float_type, cells): + self.cl_cells = cl.Buffer(context, mf.READ_ONLY, size=len(cells) * numpy.uint32(0).nbytes) + self.np_cells = numpy.ndarray(shape=(len(cells), 1), dtype=numpy.uint32) + self.np_cells[:,0] = cells[:] + + cl.enqueue_copy(queue, self.cl_cells, self.np_cells).wait(); + + def get(self): + return self.cl_cells + + def size(self): + return (len(self.np_cells), 1, 1) + +class Lattice: + def __init__(self, geometry, kernel_src, descriptor, platform = 0, precision = 'single'): + self.geometry = geometry + self.descriptor = descriptor + + self.float_type = { + 'single': (numpy.float32, 'float'), + 'double': (numpy.float64, 'double'), + }.get(precision, None) + + self.platform = cl.get_platforms()[platform] + self.layout = None + + self.context = cl.Context( + properties=[(cl.context_properties.PLATFORM, self.platform)]) + + self.queue = cl.CommandQueue(self.context) + + self.memory = Memory(descriptor, self.geometry, self.context, self.float_type[0]) + self.tick = False + + self.compiler_args = { + 'single': '-cl-single-precision-constant -cl-fast-relaxed-math', + 'double': '-cl-fast-relaxed-math' + }.get(precision, None) + + self.build_kernel(kernel_src) + + self.program.equilibrilize( + self.queue, self.geometry.size(), self.layout, self.memory.cl_pop_a, self.memory.cl_pop_b).wait() + + self.tasks = [] + + def build_kernel(self, src): + self.program = cl.Program(self.context, src).build(self.compiler_args) + + def schedule(self, f, cells, *params): + self.tasks += [ (eval("self.program.%s" % f), cells, params) ] + + def evolve(self): + if self.tick: + self.tick = False + for f, cells, params in self.tasks: + f(self.queue, cells.size(), self.layout, self.memory.cl_pop_a, self.memory.cl_pop_b, cells.get(), *params) + else: + self.tick = True + for f, cells, params in self.tasks: + f(self.queue, cells.size(), self.layout, self.memory.cl_pop_b, self.memory.cl_pop_a, cells.get(), *params) + + def sync(self): + self.queue.finish() + + def get_moments(self): + moments = numpy.ndarray(shape=(self.memory.volume*(self.descriptor.d+1),1), dtype=self.float_type[0]) + + if self.tick: + self.program.collect_moments( + self.queue, self.geometry.size(), self.layout, self.memory.cl_pop_b, self.memory.cl_moments) + else: + self.program.collect_moments( + self.queue, self.geometry.size(), self.layout, self.memory.cl_pop_a, self.memory.cl_moments) + + cl.enqueue_copy(self.queue, moments, self.memory.cl_moments).wait(); + + return moments -- cgit v1.2.3