diff options
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | boltzgen/kernel/memory/precision.py | 3 | ||||
-rw-r--r-- | boltzgen/kernel/template/bounce_back_boundary.cuda.mako | 16 | ||||
-rw-r--r-- | boltzgen/kernel/template/collect_moments.cuda.mako | 20 | ||||
-rw-r--r-- | boltzgen/kernel/template/collide_and_stream.cuda.mako | 17 | ||||
-rw-r--r-- | boltzgen/kernel/template/equilibrilize.cuda.mako | 11 | ||||
-rw-r--r-- | boltzgen/kernel/template/momenta_boundary.cuda.mako | 39 | ||||
-rw-r--r-- | boltzgen/kernel/template/pattern/SSS.cuda.mako | 81 | ||||
-rw-r--r-- | boltzgen/kernel/template/update_sss_control_structure.cpp.mako | 5 | ||||
-rw-r--r-- | boltzgen/kernel/template/update_sss_control_structure.cuda.mako | 21 | ||||
-rw-r--r-- | boltzgen/utility/__init__.py | 1 | ||||
-rw-r--r-- | boltzgen/utility/printer.py | 13 |
12 files changed, 224 insertions, 5 deletions
@@ -15,7 +15,7 @@ At the moment this is a more structured and cleaned up version of the OpenCL ker * configurable cell indexing sequence * static resolution of memory offsets * AB, AA and SSS streaming patterns -* C++ and OpenCL targets +* C++, OpenCL and CUDA targets * simple CLI frontend ## Usage diff --git a/boltzgen/kernel/memory/precision.py b/boltzgen/kernel/memory/precision.py index d22e922..0c52fc0 100644 --- a/boltzgen/kernel/memory/precision.py +++ b/boltzgen/kernel/memory/precision.py @@ -14,3 +14,6 @@ class cpp(common): class cl(common): pass + +class cuda(common): + pass diff --git a/boltzgen/kernel/template/bounce_back_boundary.cuda.mako b/boltzgen/kernel/template/bounce_back_boundary.cuda.mako new file mode 100644 index 0000000..09499e2 --- /dev/null +++ b/boltzgen/kernel/template/bounce_back_boundary.cuda.mako @@ -0,0 +1,16 @@ +<%namespace name="pattern" file="${'/pattern/%s.cuda.mako' % context['streaming']}"/> +<% +from boltzgen.utility.printer import CudaCodePrinter +ccode = CudaCodePrinter(float_type).doprint +subexpr, assignment = model.collision(f_eq = model.equilibrium(resolve_moments = True)) +%> + +<%call expr="pattern.operator('bounce_back_boundary')"> +% for i, expr in enumerate(subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} ${assignment[i].lhs} = ${ccode(assignment[descriptor.c.index(-c_i)].rhs)}; +% endfor +</%call> diff --git a/boltzgen/kernel/template/collect_moments.cuda.mako b/boltzgen/kernel/template/collect_moments.cuda.mako new file mode 100644 index 0000000..36f03b0 --- /dev/null +++ b/boltzgen/kernel/template/collect_moments.cuda.mako @@ -0,0 +1,20 @@ +<%namespace name="pattern" file="${'/pattern/%s.cuda.mako' % context['streaming']}"/> +<% +from boltzgen.utility.printer import CudaCodePrinter +ccode = CudaCodePrinter(float_type).doprint +moments_subexpr, moments_assignment = model.moments() +%> + +<%call expr="pattern.functor('collect_moments', [('%s*' % float_type, 'rho'), ('%s*' % float_type, 'u')])"> +% for i, expr in enumerate(moments_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(moments_assignment): +% if i == 0: + rho[gid] = ${ccode(expr.rhs)}; +% else: + u[gid*${descriptor.d} + ${i-1}] = ${ccode(expr.rhs)}; +% endif +% endfor +</%call> diff --git a/boltzgen/kernel/template/collide_and_stream.cuda.mako b/boltzgen/kernel/template/collide_and_stream.cuda.mako new file mode 100644 index 0000000..b46cf0f --- /dev/null +++ b/boltzgen/kernel/template/collide_and_stream.cuda.mako @@ -0,0 +1,17 @@ +<%namespace name="pattern" file="${'/pattern/%s.cuda.mako' % context['streaming']}"/> +<% +from boltzgen.utility.printer import CudaCodePrinter +ccode = CudaCodePrinter(float_type).doprint +subexpr, assignment = model.collision(f_eq = model.equilibrium(resolve_moments = True)) + +%> + +<%call expr="pattern.operator('collide_and_stream')"> +% for i, expr in enumerate(subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(assignment): + const ${float_type} ${ccode(expr)} +% endfor +</%call> diff --git a/boltzgen/kernel/template/equilibrilize.cuda.mako b/boltzgen/kernel/template/equilibrilize.cuda.mako new file mode 100644 index 0000000..2f4cdee --- /dev/null +++ b/boltzgen/kernel/template/equilibrilize.cuda.mako @@ -0,0 +1,11 @@ +<%namespace name="pattern" file="${'/pattern/%s.cuda.mako' % context['streaming']}"/> +<% +from boltzgen.utility.printer import CudaCodePrinter +ccode = CudaCodePrinter(float_type).doprint +%> + +<%call expr="pattern.operator('equilibrilize')"> +% for i, w_i in enumerate(descriptor.w): + const ${float_type} f_next_${i} = ${ccode(w_i.evalf())}; +% endfor +</%call> diff --git a/boltzgen/kernel/template/momenta_boundary.cuda.mako b/boltzgen/kernel/template/momenta_boundary.cuda.mako new file mode 100644 index 0000000..408a12d --- /dev/null +++ b/boltzgen/kernel/template/momenta_boundary.cuda.mako @@ -0,0 +1,39 @@ +<%namespace name="pattern" file="${'/pattern/%s.cuda.mako' % context['streaming']}"/> +<% +from boltzgen.utility.printer import CudaCodePrinter +ccode = CudaCodePrinter(float_type).doprint +moments_subexpr, moments_assignment = model.moments() +collision_subexpr, collision_assignment = model.collision(f_eq = model.equilibrium(resolve_moments = False)) +%> + +<%def name="momenta_boundary(name, params)"> +<%call expr="pattern.operator('%s_momenta_boundary' % name, params)"> +% for i, expr in enumerate(moments_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + + ${caller.body()} + +% for i, expr in enumerate(collision_subexpr): + const ${float_type} ${expr[0]} = ${ccode(expr[1])}; +% endfor + +% for i, expr in enumerate(collision_assignment): + const ${float_type} ${ccode(expr)} +% endfor +</%call> +</%def> + +<%call expr="momenta_boundary('velocity', list(map(lambda i: (float_type, 'velocity_%d' % i), range(descriptor.d))))"> + const ${float_type} ${ccode(moments_assignment[0])} +% for i, expr in enumerate(moments_assignment[1:]): + const ${float_type} ${expr.lhs} = velocity_${i}; +% endfor +</%call> + +<%call expr="momenta_boundary('density', [(float_type, 'density')])"> + const ${float_type} ${moments_assignment[0].lhs} = density; +% for i, expr in enumerate(moments_assignment[1:]): + const ${float_type} ${ccode(expr)} +% endfor +</%call> diff --git a/boltzgen/kernel/template/pattern/SSS.cuda.mako b/boltzgen/kernel/template/pattern/SSS.cuda.mako new file mode 100644 index 0000000..8f571c7 --- /dev/null +++ b/boltzgen/kernel/template/pattern/SSS.cuda.mako @@ -0,0 +1,81 @@ +<%def name="operator(name, params = None)"> +<% +if layout.__class__.__name__ != 'SOA': + raise Exception('SSS pattern only works for the AOS memory layout') +%> +__global__ void ${name}( + ${float_type}** f +% if 'cell_list_dispatch' in extras: + , std::size_t* cells + , std::size_t cell_count +% else: + , std::size_t gid +% endif +% if params is not None: +% for param_type, param_name in params: + , ${param_type} ${param_name} +% endfor +% endif +) { +% if 'cell_list_dispatch' in extras: + const std::size_t index = blockIdx.x * blockDim.x + threadIdx.x; + if (!(index < cell_count)) { + return; + } + const std::size_t gid = cells[index]; +% endif + +% for i, c_i in enumerate(descriptor.c): + ${float_type}* preshifted_f_${i} = f[${i}] + ${layout.cell_preshift('gid')}; +% endfor + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${i} = *preshifted_f_${i}; +% endfor + + ${caller.body()} + +% for i, c_i in enumerate(descriptor.c): + *preshifted_f_${i} = f_next_${descriptor.c.index(-c_i)}; +% endfor +} +</%def> + +<%def name="functor(name, params = None)"> +<% +if layout.__class__.__name__ != 'SOA': + raise Exception('SSS pattern only works for the AOS memory layout') +%> +__global__ void ${name}( + ${float_type}** f +% if 'cell_list_dispatch' in extras: + , std::size_t* cells + , std::size_t cell_count +% else: + , std::size_t gid +% endif +% if params is not None: +% for param_type, param_name in params: + , ${param_type} ${param_name} +% endfor +% endif +) { +% if 'cell_list_dispatch' in extras: + const std::size_t index = blockIdx.x * blockDim.x + threadIdx.x; + if (!(index < cell_count)) { + return; + } + const std::size_t gid = cells[index]; +% endif + +% for i, c_i in enumerate(descriptor.c): + const ${float_type}* preshifted_f_${i} = f[${i}] + ${layout.cell_preshift('gid')}; +% endfor + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${i} = *preshifted_f_${descriptor.c.index(-c_i)}; +% endfor + + ${caller.body()} +} +</%def> diff --git a/boltzgen/kernel/template/update_sss_control_structure.cpp.mako b/boltzgen/kernel/template/update_sss_control_structure.cpp.mako index 9197022..b7661a3 100644 --- a/boltzgen/kernel/template/update_sss_control_structure.cpp.mako +++ b/boltzgen/kernel/template/update_sss_control_structure.cpp.mako @@ -7,9 +7,6 @@ void update_sss_control_structure(${float_type}** f) { ${float_type}* f_old_${i} = f[${i}]; % endfor % for i, c_i in enumerate(descriptor.c): - f[${i}] = f_old_${descriptor.c.index(-c_i)}; -% endfor -% for i, c_i in enumerate(descriptor.c): - f[${i}] += ${layout.neighbor_offset(-c_i)}; + f[${i}] = f_old_${descriptor.c.index(-c_i)} + ${layout.neighbor_offset(-c_i)}; % endfor } diff --git a/boltzgen/kernel/template/update_sss_control_structure.cuda.mako b/boltzgen/kernel/template/update_sss_control_structure.cuda.mako new file mode 100644 index 0000000..2a98b94 --- /dev/null +++ b/boltzgen/kernel/template/update_sss_control_structure.cuda.mako @@ -0,0 +1,21 @@ +<% +if streaming != 'SSS': + raise Exception('"update_sss_control_structure" function only makes sense for the SSS pattern') + +padding = (max(geometry.size_x,geometry.size_y,geometry.size_z)+1)**(descriptor.d-1) +%> + +__global__ void init_sss_control_structure(${float_type}* f, ${float_type}** control) { +% for i, c_i in enumerate(descriptor.c): + control[${i}] = f + ${padding + layout.pop_offset(i, 2*padding)}; +% endfor +} + +__global__ void update_sss_control_structure(${float_type}** f) { +% for i, c_i in enumerate(descriptor.c): + ${float_type}* f_old_${i} = f[${i}]; +% endfor +% for i, c_i in enumerate(descriptor.c): + f[${i}] = f_old_${descriptor.c.index(-c_i)} + ${layout.neighbor_offset(-c_i)}; +% endfor +} diff --git a/boltzgen/utility/__init__.py b/boltzgen/utility/__init__.py index fa9c760..5905c36 100644 --- a/boltzgen/utility/__init__.py +++ b/boltzgen/utility/__init__.py @@ -1,5 +1,6 @@ from . import optimizations from . import ndindex +from . import printer from sympy.codegen.ast import Assignment diff --git a/boltzgen/utility/printer.py b/boltzgen/utility/printer.py new file mode 100644 index 0000000..b52a5e0 --- /dev/null +++ b/boltzgen/utility/printer.py @@ -0,0 +1,13 @@ +from sympy.printing.ccode import C99CodePrinter +from sympy.codegen.ast import float32, float64 + +class CudaCodePrinter(C99CodePrinter): + pass + + def __init__(self, float_type, **args): + super(CudaCodePrinter, self).__init__(**args) + if float_type == 'float': + self.type_func_suffixes[float32] = 'f' + self.type_func_suffixes[float64] = 'f' + self.type_literal_suffixes[float32] = 'f' + self.type_literal_suffixes[float64] = 'f' |