From 18c54d79699db7554faa851c87d7113db67a8a08 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Sun, 27 Oct 2019 14:05:21 +0100 Subject: Separate functions into separate template files Selection of the desired templates is possible via a new `functions` parameter. --- boltzgen.py | 15 +- boltzgen/kernel/generator.py | 46 ++-- boltzgen/kernel/template/basic.cl.mako | 84 ------- boltzgen/kernel/template/basic.cpp.mako | 280 --------------------- boltzgen/kernel/template/collect_moments.cl.mako | 19 ++ boltzgen/kernel/template/collect_moments.cpp.mako | 24 ++ .../kernel/template/collide_and_stream.cl.mako | 31 +++ .../kernel/template/collide_and_stream.cpp.mako | 32 +++ boltzgen/kernel/template/equilibrilize.cl.mako | 13 + boltzgen/kernel/template/equilibrilize.cpp.mako | 13 + boltzgen/kernel/template/example.cpp.mako | 172 +++++++++++++ boltzgen/kernel/template/momenta_boundary.cpp.mako | 35 +++ boltzgen/kernel/template/preamble.cl.mako | 7 + 13 files changed, 381 insertions(+), 390 deletions(-) delete mode 100644 boltzgen/kernel/template/basic.cl.mako delete mode 100644 boltzgen/kernel/template/basic.cpp.mako create mode 100644 boltzgen/kernel/template/collect_moments.cl.mako create mode 100644 boltzgen/kernel/template/collect_moments.cpp.mako create mode 100644 boltzgen/kernel/template/collide_and_stream.cl.mako create mode 100644 boltzgen/kernel/template/collide_and_stream.cpp.mako create mode 100644 boltzgen/kernel/template/equilibrilize.cl.mako create mode 100644 boltzgen/kernel/template/equilibrilize.cpp.mako create mode 100644 boltzgen/kernel/template/example.cpp.mako create mode 100644 boltzgen/kernel/template/momenta_boundary.cpp.mako create mode 100644 boltzgen/kernel/template/preamble.cl.mako diff --git a/boltzgen.py b/boltzgen.py index 2b32661..5224942 100755 --- a/boltzgen.py +++ b/boltzgen.py @@ -15,7 +15,8 @@ argparser.add_argument('--geometry', required = True, help = 'Size of the block argparser.add_argument('--tau', required = True, help = 'BGK relaxation time') argparser.add_argument('--disable-cse', action = 'store_const', const = True, help = 'Disable common subexpression elimination') -argparser.add_argument('--extra', action = 'append', nargs = '+', default = [], help = 'Additional generator parameters') +argparser.add_argument('--functions', action = 'append', nargs = '+', default = [], help = 'Function templates to be generated') +argparser.add_argument('--extras', action = 'append', nargs = '+', default = [], help = 'Additional generator parameters') args = argparser.parse_args() @@ -29,5 +30,15 @@ generator = Generator( geometry = Geometry.parse(args.geometry) -src = generator.kernel(args.language, args.precision, args.layout, geometry, sum(args.extra, [])) +functions = sum(args.functions, []) +if len(functions) == 0: + functions += ['default'] +if 'default' in functions: + for f in ['collide_and_stream', 'equilibrilize', 'collect_moments', 'momenta_boundary']: + functions.insert(functions.index('default'), f) + functions.remove('default') + +extras = sum(args.extras, []) + +src = generator.kernel(args.language, args.precision, args.layout, geometry, functions, extras) print(src) diff --git a/boltzgen/kernel/generator.py b/boltzgen/kernel/generator.py index 8d0de37..04b0a64 100644 --- a/boltzgen/kernel/generator.py +++ b/boltzgen/kernel/generator.py @@ -7,25 +7,15 @@ import kernel.target.cl import kernel.target.cpp class Generator: - def __init__(self, descriptor, moments, collision, boundary = ''): + def __init__(self, descriptor, moments, collision): self.descriptor = descriptor self.moments = moments self.collision = collision - self.boundary = boundary - def kernel(self, target, precision, layout, geometry, extras = []): - template_path = Path(__file__).parent/('template/basic.' + target + '.mako') + def instantiate(self, target, template, float_type, layout_impl, geometry, extras = []): + template_path = Path(__file__).parent/("template/%s.%s.mako" % (template, target)) if not template_path.exists(): - raise Exception("Target '%s' not supported" % target) - - layout_impl = eval("kernel.target.%s.%s" % (target, layout)) - if layout_impl is None: - raise Exception("Target '%s' doesn't support layout '%s'" % (target, layout)) - else: - layout_impl = layout_impl(self.descriptor, geometry) - - if geometry.dimension() != self.descriptor.d: - raise Exception('Geometry dimension must match descriptor dimension') + raise Exception("Target '%s' doesn't provide '%s'" % (target, template)) return Template(filename = str(template_path)).render( descriptor = self.descriptor, @@ -38,16 +28,24 @@ class Generator: collision_assignment = self.collision[1], ccode = sympy.ccode, - float_type = { - 'single': 'float', - 'double': 'double' - }.get(precision), - - boundary_src = Template(self.boundary).render( - descriptor = self.descriptor, - geometry = geometry, - float_type = precision - ), + float_type = float_type, extras = extras ) + + def kernel(self, target, precision, layout, geometry, functions = ['collide_and_stream'], extras = []): + layout_impl = eval("kernel.target.%s.%s" % (target, layout)) + if layout_impl is None: + raise Exception("Target '%s' doesn't support layout '%s'" % (target, layout)) + else: + layout_impl = layout_impl(self.descriptor, geometry) + + if geometry.dimension() != self.descriptor.d: + raise Exception('Geometry dimension must match descriptor dimension') + + float_type = { + 'single': 'float', + 'double': 'double' + }.get(precision) + + return "\n".join(map(lambda f: self.instantiate(target, f, float_type, layout_impl, geometry, extras), functions)) diff --git a/boltzgen/kernel/template/basic.cl.mako b/boltzgen/kernel/template/basic.cl.mako deleted file mode 100644 index b64a480..0000000 --- a/boltzgen/kernel/template/basic.cl.mako +++ /dev/null @@ -1,84 +0,0 @@ -% if float_type == 'double': -#if defined(cl_khr_fp64) -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -#elif defined(cl_amd_fp64) -#pragma OPENCL EXTENSION cl_amd_fp64 : enable -#endif -% endif - -__kernel void equilibrilize(__global ${float_type}* f_next, - __global ${float_type}* f_prev) -{ - const unsigned int gid = ${layout.gid()}; - - __global ${float_type}* preshifted_f_next = f_next + gid; - __global ${float_type}* preshifted_f_prev = f_prev + gid; - -% for i, w_i in enumerate(descriptor.w): - preshifted_f_next[${layout.pop_offset(i)}] = ${w_i}.f; - preshifted_f_prev[${layout.pop_offset(i)}] = ${w_i}.f; -% endfor -} - -__kernel void collide_and_stream(__global ${float_type}* f_next, - __global ${float_type}* f_prev, - __global int* material, - unsigned int time) -{ - const unsigned int gid = ${layout.gid()}; - - const int m = material[gid]; - - if ( m == 0 ) { - return; - } - - __global ${float_type}* preshifted_f_next = f_next + gid; - __global ${float_type}* preshifted_f_prev = f_prev + gid; - -% for i, c_i in enumerate(descriptor.c): - const ${float_type} f_curr_${i} = preshifted_f_prev[${layout.pop_offset(i) + layout.neighbor_offset(-c_i)}]; -% endfor - -% for i, expr in enumerate(moments_subexpr): - const ${float_type} ${expr[0]} = ${ccode(expr[1])}; -% endfor - -% for i, expr in enumerate(moments_assignment): - ${float_type} ${ccode(expr)} -% endfor - - ${boundary_src} - -% for i, expr in enumerate(collision_subexpr): - const ${float_type} ${expr[0]} = ${ccode(expr[1])}; -% endfor - -% for i, expr in enumerate(collision_assignment): - const ${float_type} ${ccode(expr)} -% endfor - -% for i in range(0,descriptor.q): - preshifted_f_next[${layout.pop_offset(i)}] = f_next_${i}; -% endfor -} - -__kernel void collect_moments(__global ${float_type}* f, - __global ${float_type}* moments) -{ - const unsigned int gid = ${layout.gid()}; - - __global ${float_type}* preshifted_f = f + gid; - -% for i in range(0,descriptor.q): - const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(i)}]; -% endfor - -% for i, expr in enumerate(moments_subexpr): - const ${float_type} ${expr[0]} = ${ccode(expr[1])}; -% endfor - -% for i, expr in enumerate(moments_assignment): - moments[${layout.pop_offset(i)} + gid] = ${ccode(expr.rhs)}; -% endfor -} diff --git a/boltzgen/kernel/template/basic.cpp.mako b/boltzgen/kernel/template/basic.cpp.mako deleted file mode 100644 index 118ef8c..0000000 --- a/boltzgen/kernel/template/basic.cpp.mako +++ /dev/null @@ -1,280 +0,0 @@ -void equilibrilize(${float_type}* f_next, - ${float_type}* f_prev, - std::size_t gid) -{ - ${float_type}* preshifted_f_next = f_next + gid*${layout.gid_offset()}; - ${float_type}* preshifted_f_prev = f_prev + gid*${layout.gid_offset()}; - -% for i, w_i in enumerate(descriptor.w): - preshifted_f_next[${layout.pop_offset(i)}] = ${w_i.evalf()}; - preshifted_f_prev[${layout.pop_offset(i)}] = ${w_i.evalf()}; -% endfor -} - -void collide_and_stream( ${float_type}* f_next, - const ${float_type}* f_prev, - std::size_t gid) -{ - ${float_type}* preshifted_f_next = f_next + gid*${layout.gid_offset()}; - const ${float_type}* preshifted_f_prev = f_prev + gid*${layout.gid_offset()}; - -% for i, c_i in enumerate(descriptor.c): - const ${float_type} f_curr_${i} = preshifted_f_prev[${layout.pop_offset(i) + layout.neighbor_offset(-c_i)}]; -% endfor - -% for i, expr in enumerate(moments_subexpr): - const ${float_type} ${expr[0]} = ${ccode(expr[1])}; -% endfor - -% for i, expr in enumerate(moments_assignment): - ${float_type} ${ccode(expr)} -% endfor - -% for i, expr in enumerate(collision_subexpr): - const ${float_type} ${expr[0]} = ${ccode(expr[1])}; -% endfor - -% for i, expr in enumerate(collision_assignment): - const ${float_type} ${ccode(expr)} -% endfor - -% for i, expr in enumerate(collision_assignment): - preshifted_f_next[${layout.pop_offset(i)}] = f_next_${i}; -% endfor -} - -void velocity_momenta_boundary( ${float_type}* f_next, - const ${float_type}* f_prev, - std::size_t gid, - ${float_type} velocity[${descriptor.d}]) -{ - ${float_type}* preshifted_f_next = f_next + gid*${layout.gid_offset()}; - const ${float_type}* preshifted_f_prev = f_prev + gid*${layout.gid_offset()}; - -% for i, c_i in enumerate(descriptor.c): - const ${float_type} f_curr_${i} = preshifted_f_prev[${layout.pop_offset(i) + layout.neighbor_offset(-c_i)}]; -% endfor - -% for i, expr in enumerate(moments_subexpr): - const ${float_type} ${expr[0]} = ${ccode(expr[1])}; -% endfor - -% for i, expr in enumerate(moments_assignment): - ${float_type} ${ccode(expr)} -% endfor -% for i, expr in enumerate(moments_assignment[1:]): - ${expr.lhs} = velocity[${i}]; -% endfor - -% for i, expr in enumerate(collision_subexpr): - const ${float_type} ${expr[0]} = ${ccode(expr[1])}; -% endfor - -% for i, expr in enumerate(collision_assignment): - const ${float_type} ${ccode(expr)} -% endfor - -% for i, expr in enumerate(collision_assignment): - preshifted_f_next[${layout.pop_offset(i)}] = f_next_${i}; -% endfor -} - -void collect_moments(const ${float_type}* f, - std::size_t gid, - ${float_type}& rho, - ${float_type} u[${descriptor.d}]) -{ - const ${float_type}* preshifted_f = f + gid*${layout.gid_offset()}; - -% for i in range(0,descriptor.q): - const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(i)}]; -% endfor - -% for i, expr in enumerate(moments_subexpr): - const ${float_type} ${expr[0]} = ${ccode(expr[1])}; -% endfor - -% for i, expr in enumerate(moments_assignment): -% if i == 0: - rho = ${ccode(expr.rhs)}; -% else: - u[${i-1}] = ${ccode(expr.rhs)}; -% endif -% endfor -} - -% if 'moments_vtk' in extras: -void collect_moments_to_vtk(const std::string& path, ${float_type}* f) { - std::ofstream fout; - fout.open(path.c_str()); - - fout << "# vtk DataFile Version 3.0\n"; - fout << "lbm_output\n"; - fout << "ASCII\n"; - fout << "DATASET RECTILINEAR_GRID\n"; -% if descriptor.d == 2: - fout << "DIMENSIONS " << ${geometry.size_x-2} << " " << ${geometry.size_y-2} << " 1" << "\n"; -% else: - fout << "DIMENSIONS " << ${geometry.size_x-2} << " " << ${geometry.size_y-2} << " " << ${geometry.size_z-2} << "\n"; -% endif - - fout << "X_COORDINATES " << ${geometry.size_x-2} << " float\n"; - for( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { - fout << x << " "; - } - - fout << "\nY_COORDINATES " << ${geometry.size_y-2} << " float\n"; - for( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { - fout << y << " "; - } - -% if descriptor.d == 2: - fout << "\nZ_COORDINATES " << 1 << " float\n"; - fout << 0 << "\n"; - fout << "POINT_DATA " << ${(geometry.size_x-2) * (geometry.size_y-2)} << "\n"; -% else: - fout << "\nZ_COORDINATES " << ${geometry.size_z-2} << " float\n"; - for( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { - fout << z << " "; - } - fout << "\nPOINT_DATA " << ${(geometry.size_x-2) * (geometry.size_y-2) * (geometry.size_z-2)} << "\n"; -% endif - - ${float_type} rho; - ${float_type} u[${descriptor.d}]; - - fout << "VECTORS velocity float\n"; -% if descriptor.d == 2: - for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { - for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { - collect_moments(f, x*${geometry.size_y}+y, rho, u); - fout << u[0] << " " << u[1] << " 0\n"; - } - } -% else: - for ( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { - for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { - for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { - collect_moments(f, x*${geometry.size_y*geometry.size_z}+y*${geometry.size_z}+z, rho, u); - fout << u[0] << " " << u[1] << " " << u[2] << "\n"; - } - } - } -% endif - - fout << "SCALARS density float 1\n"; - fout << "LOOKUP_TABLE default\n"; -% if descriptor.d == 2: - for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { - for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { - collect_moments(f, x*${geometry.size_y}+y, rho, u); - fout << rho << "\n"; - } - } -% else: - for ( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { - for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { - for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { - collect_moments(f, x*${geometry.size_y*geometry.size_z}+y*${geometry.size_z}+z, rho, u); - fout << rho << "\n"; - } - } - } -% endif - - fout.close(); -} -% endif - -% if 'test_ldc' in extras: -void test_ldc(std::size_t nStep) -{ - auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q}); - auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q}); - - ${float_type}* f_prev = f_a.get(); - ${float_type}* f_next = f_b.get(); - - std::vector<std::size_t> bulk; - std::vector<std::size_t> lid_bc; - std::vector<std::size_t> box_bc; - - for (int iX = 1; iX < ${geometry.size_x-1}; ++iX) { - for (int iY = 1; iY < ${geometry.size_y-1}; ++iY) { -% if descriptor.d == 2: - const std::size_t iCell = iX*${geometry.size_y} + iY; - if (iY == ${geometry.size_y-2}) { - lid_bc.emplace_back(iCell); - } else if (iX == 1 || iX == ${geometry.size_x-2} || iY == 1) { - box_bc.emplace_back(iCell); - } else { - bulk.emplace_back(iCell); - } -% elif descriptor.d == 3: - for (int iZ = 0; iZ < ${geometry.size_z}; ++iZ) { - const std::size_t iCell = iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ; - if (iZ == ${geometry.size_z-2}) { - lid_bc.emplace_back(iCell); - } else if (iX == 1 || iX == ${geometry.size_x-2} || iY == 1 || iY == ${geometry.size_y-2} || iZ == 1) { - box_bc.emplace_back(iCell); - } else { - bulk.emplace_back(iCell); - } - } -% endif - } - } - - for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) { - equilibrilize(f_prev, f_next, iCell); - } - - const auto start = std::chrono::high_resolution_clock::now(); - - for (std::size_t iStep = 0; iStep < nStep; ++iStep) { - if (iStep % 2 == 0) { - f_next = f_a.get(); - f_prev = f_b.get(); - } else { - f_next = f_b.get(); - f_prev = f_a.get(); - } - -% if 'omp_parallel_for' in extras: -#pragma omp parallel for -% endif - for (std::size_t i = 0; i < bulk.size(); ++i) { - collide_and_stream(f_next, f_prev, bulk[i]); - } - ${float_type} u[${descriptor.d}] { 0. }; -% if 'omp_parallel_for' in extras: -#pragma omp parallel for -% endif - for (std::size_t i = 0; i < box_bc.size(); ++i) { - velocity_momenta_boundary(f_next, f_prev, box_bc[i], u); - } - u[0] = 0.05; -% if 'omp_parallel_for' in extras: -#pragma omp parallel for -% endif - for (std::size_t i = 0; i < lid_bc.size(); ++i) { - velocity_momenta_boundary(f_next, f_prev, lid_bc[i], u); - } - } - - auto duration = std::chrono::duration_cast<std::chrono::duration<double>>( - std::chrono::high_resolution_clock::now() - start); - - std::cout << "#bulk : " << bulk.size() << std::endl; - std::cout << "#lid : " << lid_bc.size() << std::endl; - std::cout << "#wall : " << box_bc.size() << std::endl; - std::cout << "#steps : " << nStep << std::endl; - std::cout << std::endl; - std::cout << "MLUPS : " << nStep*${geometry.volume}/(1e6*duration.count()) << std::endl; - -% if 'moments_vtk' in extras: - collect_moments_to_vtk("test.vtk", f_next); -% endif -} -% endif - diff --git a/boltzgen/kernel/template/collect_moments.cl.mako b/boltzgen/kernel/template/collect_moments.cl.mako new file mode 100644 index 0000000..b07b759 --- /dev/null +++ b/boltzgen/kernel/template/collect_moments.cl.mako @@ -0,0 +1,19 @@ +__kernel void collect_moments(__global ${float_type}* f, + __global ${float_type}* moments) +{ + const unsigned int gid = ${layout.gid()}; + + __global ${float_type}* preshifted_f = f + gid; + +% for i in range(0,descriptor.q): + const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(i)}]; +% endfor + +% for i, expr in enumerate(moments_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(moments_assignment): + moments[${layout.pop_offset(i)} + gid] = ${ccode(expr.rhs)}; +% endfor +} diff --git a/boltzgen/kernel/template/collect_moments.cpp.mako b/boltzgen/kernel/template/collect_moments.cpp.mako new file mode 100644 index 0000000..8c37db2 --- /dev/null +++ b/boltzgen/kernel/template/collect_moments.cpp.mako @@ -0,0 +1,24 @@ +void collect_moments(const ${float_type}* f, + std::size_t gid, + ${float_type}& rho, + ${float_type} u[${descriptor.d}]) +{ + const ${float_type}* preshifted_f = f + gid*${layout.gid_offset()}; + +% for i in range(0,descriptor.q): + const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(i)}]; +% endfor + +% for i, expr in enumerate(moments_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(moments_assignment): +% if i == 0: + rho = ${ccode(expr.rhs)}; +% else: + u[${i-1}] = ${ccode(expr.rhs)}; +% endif +% endfor +} + diff --git a/boltzgen/kernel/template/collide_and_stream.cl.mako b/boltzgen/kernel/template/collide_and_stream.cl.mako new file mode 100644 index 0000000..28cfa57 --- /dev/null +++ b/boltzgen/kernel/template/collide_and_stream.cl.mako @@ -0,0 +1,31 @@ +__kernel void collide_and_stream(__global ${float_type}* f_next, + __global ${float_type}* f_prev, + unsigned int gid) +{ + __global ${float_type}* preshifted_f_next = f_next + gid; + __global ${float_type}* preshifted_f_prev = f_prev + gid; + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${i} = preshifted_f_prev[${layout.pop_offset(i) + layout.neighbor_offset(-c_i)}]; +% endfor + +% for i, expr in enumerate(moments_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(moments_assignment): + ${float_type} ${ccode(expr)} +% endfor + +% for i, expr in enumerate(collision_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(collision_assignment): + const ${float_type} ${ccode(expr)} +% endfor + +% for i in range(0,descriptor.q): + preshifted_f_next[${layout.pop_offset(i)}] = f_next_${i}; +% endfor +} diff --git a/boltzgen/kernel/template/collide_and_stream.cpp.mako b/boltzgen/kernel/template/collide_and_stream.cpp.mako new file mode 100644 index 0000000..71c62b7 --- /dev/null +++ b/boltzgen/kernel/template/collide_and_stream.cpp.mako @@ -0,0 +1,32 @@ +void collide_and_stream( ${float_type}* f_next, + const ${float_type}* f_prev, + std::size_t gid) +{ + ${float_type}* preshifted_f_next = f_next + gid*${layout.gid_offset()}; + const ${float_type}* preshifted_f_prev = f_prev + gid*${layout.gid_offset()}; + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${i} = preshifted_f_prev[${layout.pop_offset(i) + layout.neighbor_offset(-c_i)}]; +% endfor + +% for i, expr in enumerate(moments_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(moments_assignment): + ${float_type} ${ccode(expr)} +% endfor + +% for i, expr in enumerate(collision_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(collision_assignment): + const ${float_type} ${ccode(expr)} +% endfor + +% for i, expr in enumerate(collision_assignment): + preshifted_f_next[${layout.pop_offset(i)}] = f_next_${i}; +% endfor +} + diff --git a/boltzgen/kernel/template/equilibrilize.cl.mako b/boltzgen/kernel/template/equilibrilize.cl.mako new file mode 100644 index 0000000..aa2246c --- /dev/null +++ b/boltzgen/kernel/template/equilibrilize.cl.mako @@ -0,0 +1,13 @@ +__kernel void equilibrilize(__global ${float_type}* f_next, + __global ${float_type}* f_prev) +{ + const unsigned int gid = ${layout.gid()}; + + __global ${float_type}* preshifted_f_next = f_next + gid; + __global ${float_type}* preshifted_f_prev = f_prev + gid; + +% for i, w_i in enumerate(descriptor.w): + preshifted_f_next[${layout.pop_offset(i)}] = ${w_i}.f; + preshifted_f_prev[${layout.pop_offset(i)}] = ${w_i}.f; +% endfor +} diff --git a/boltzgen/kernel/template/equilibrilize.cpp.mako b/boltzgen/kernel/template/equilibrilize.cpp.mako new file mode 100644 index 0000000..9f082a1 --- /dev/null +++ b/boltzgen/kernel/template/equilibrilize.cpp.mako @@ -0,0 +1,13 @@ +void equilibrilize(${float_type}* f_next, + ${float_type}* f_prev, + std::size_t gid) +{ + ${float_type}* preshifted_f_next = f_next + gid*${layout.gid_offset()}; + ${float_type}* preshifted_f_prev = f_prev + gid*${layout.gid_offset()}; + +% for i, w_i in enumerate(descriptor.w): + preshifted_f_next[${layout.pop_offset(i)}] = ${w_i.evalf()}; + preshifted_f_prev[${layout.pop_offset(i)}] = ${w_i.evalf()}; +% endfor +} + diff --git a/boltzgen/kernel/template/example.cpp.mako b/boltzgen/kernel/template/example.cpp.mako new file mode 100644 index 0000000..d979bfd --- /dev/null +++ b/boltzgen/kernel/template/example.cpp.mako @@ -0,0 +1,172 @@ +% if 'moments_vtk' in extras: +void collect_moments_to_vtk(const std::string& path, ${float_type}* f) { + std::ofstream fout; + fout.open(path.c_str()); + + fout << "# vtk DataFile Version 3.0\n"; + fout << "lbm_output\n"; + fout << "ASCII\n"; + fout << "DATASET RECTILINEAR_GRID\n"; +% if descriptor.d == 2: + fout << "DIMENSIONS " << ${geometry.size_x-2} << " " << ${geometry.size_y-2} << " 1" << "\n"; +% else: + fout << "DIMENSIONS " << ${geometry.size_x-2} << " " << ${geometry.size_y-2} << " " << ${geometry.size_z-2} << "\n"; +% endif + + fout << "X_COORDINATES " << ${geometry.size_x-2} << " float\n"; + for( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { + fout << x << " "; + } + + fout << "\nY_COORDINATES " << ${geometry.size_y-2} << " float\n"; + for( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { + fout << y << " "; + } + +% if descriptor.d == 2: + fout << "\nZ_COORDINATES " << 1 << " float\n"; + fout << 0 << "\n"; + fout << "POINT_DATA " << ${(geometry.size_x-2) * (geometry.size_y-2)} << "\n"; +% else: + fout << "\nZ_COORDINATES " << ${geometry.size_z-2} << " float\n"; + for( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { + fout << z << " "; + } + fout << "\nPOINT_DATA " << ${(geometry.size_x-2) * (geometry.size_y-2) * (geometry.size_z-2)} << "\n"; +% endif + + ${float_type} rho; + ${float_type} u[${descriptor.d}]; + + fout << "VECTORS velocity float\n"; +% if descriptor.d == 2: + for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { + for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { + collect_moments(f, x*${geometry.size_y}+y, rho, u); + fout << u[0] << " " << u[1] << " 0\n"; + } + } +% else: + for ( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { + for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { + for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { + collect_moments(f, x*${geometry.size_y*geometry.size_z}+y*${geometry.size_z}+z, rho, u); + fout << u[0] << " " << u[1] << " " << u[2] << "\n"; + } + } + } +% endif + + fout << "SCALARS density float 1\n"; + fout << "LOOKUP_TABLE default\n"; +% if descriptor.d == 2: + for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { + for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { + collect_moments(f, x*${geometry.size_y}+y, rho, u); + fout << rho << "\n"; + } + } +% else: + for ( std::size_t z = 1; z < ${geometry.size_z-1}; ++z ) { + for ( std::size_t y = 1; y < ${geometry.size_y-1}; ++y ) { + for ( std::size_t x = 1; x < ${geometry.size_x-1}; ++x ) { + collect_moments(f, x*${geometry.size_y*geometry.size_z}+y*${geometry.size_z}+z, rho, u); + fout << rho << "\n"; + } + } + } +% endif + + fout.close(); +} +% endif + +void test_ldc(std::size_t nStep) +{ + auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q}); + auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q}); + + ${float_type}* f_prev = f_a.get(); + ${float_type}* f_next = f_b.get(); + + std::vector<std::size_t> bulk; + std::vector<std::size_t> lid_bc; + std::vector<std::size_t> box_bc; + + for (int iX = 1; iX < ${geometry.size_x-1}; ++iX) { + for (int iY = 1; iY < ${geometry.size_y-1}; ++iY) { +% if descriptor.d == 2: + const std::size_t iCell = iX*${geometry.size_y} + iY; + if (iY == ${geometry.size_y-2}) { + lid_bc.emplace_back(iCell); + } else if (iX == 1 || iX == ${geometry.size_x-2} || iY == 1) { + box_bc.emplace_back(iCell); + } else { + bulk.emplace_back(iCell); + } +% elif descriptor.d == 3: + for (int iZ = 0; iZ < ${geometry.size_z}; ++iZ) { + const std::size_t iCell = iX*${geometry.size_y*geometry.size_z} + iY*${geometry.size_z} + iZ; + if (iZ == ${geometry.size_z-2}) { + lid_bc.emplace_back(iCell); + } else if (iX == 1 || iX == ${geometry.size_x-2} || iY == 1 || iY == ${geometry.size_y-2} || iZ == 1) { + box_bc.emplace_back(iCell); + } else { + bulk.emplace_back(iCell); + } + } +% endif + } + } + + for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) { + equilibrilize(f_prev, f_next, iCell); + } + + const auto start = std::chrono::high_resolution_clock::now(); + + for (std::size_t iStep = 0; iStep < nStep; ++iStep) { + if (iStep % 2 == 0) { + f_next = f_a.get(); + f_prev = f_b.get(); + } else { + f_next = f_b.get(); + f_prev = f_a.get(); + } + +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif + for (std::size_t i = 0; i < bulk.size(); ++i) { + collide_and_stream(f_next, f_prev, bulk[i]); + } + ${float_type} u[${descriptor.d}] { 0. }; +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif + for (std::size_t i = 0; i < box_bc.size(); ++i) { + velocity_momenta_boundary(f_next, f_prev, box_bc[i], u); + } + u[0] = 0.05; +% if 'omp_parallel_for' in extras: +#pragma omp parallel for +% endif + for (std::size_t i = 0; i < lid_bc.size(); ++i) { + velocity_momenta_boundary(f_next, f_prev, lid_bc[i], u); + } + } + + auto duration = std::chrono::duration_cast<std::chrono::duration<double>>( + std::chrono::high_resolution_clock::now() - start); + + std::cout << "#bulk : " << bulk.size() << std::endl; + std::cout << "#lid : " << lid_bc.size() << std::endl; + std::cout << "#wall : " << box_bc.size() << std::endl; + std::cout << "#steps : " << nStep << std::endl; + std::cout << std::endl; + std::cout << "MLUPS : " << nStep*${geometry.volume}/(1e6*duration.count()) << std::endl; + +% if 'moments_vtk' in extras: + collect_moments_to_vtk("test.vtk", f_next); +% endif +} diff --git a/boltzgen/kernel/template/momenta_boundary.cpp.mako b/boltzgen/kernel/template/momenta_boundary.cpp.mako new file mode 100644 index 0000000..d7e0dd2 --- /dev/null +++ b/boltzgen/kernel/template/momenta_boundary.cpp.mako @@ -0,0 +1,35 @@ +void velocity_momenta_boundary( ${float_type}* f_next, + const ${float_type}* f_prev, + std::size_t gid, + ${float_type} velocity[${descriptor.d}]) +{ + ${float_type}* preshifted_f_next = f_next + gid*${layout.gid_offset()}; + const ${float_type}* preshifted_f_prev = f_prev + gid*${layout.gid_offset()}; + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${i} = preshifted_f_prev[${layout.pop_offset(i) + layout.neighbor_offset(-c_i)}]; +% endfor + +% for i, expr in enumerate(moments_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(moments_assignment): + ${float_type} ${ccode(expr)} +% endfor +% for i, expr in enumerate(moments_assignment[1:]): + ${expr.lhs} = velocity[${i}]; +% endfor + +% for i, expr in enumerate(collision_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(collision_assignment): + const ${float_type} ${ccode(expr)} +% endfor + +% for i, expr in enumerate(collision_assignment): + preshifted_f_next[${layout.pop_offset(i)}] = f_next_${i}; +% endfor +} diff --git a/boltzgen/kernel/template/preamble.cl.mako b/boltzgen/kernel/template/preamble.cl.mako new file mode 100644 index 0000000..c47254d --- /dev/null +++ b/boltzgen/kernel/template/preamble.cl.mako @@ -0,0 +1,7 @@ +% if float_type == 'double': +#if defined(cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#elif defined(cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64 : enable +#endif +% endif -- cgit v1.2.3