aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rw-r--r--boltzgen/kernel/memory/precision.py3
-rw-r--r--boltzgen/kernel/template/bounce_back_boundary.cuda.mako16
-rw-r--r--boltzgen/kernel/template/collect_moments.cuda.mako20
-rw-r--r--boltzgen/kernel/template/collide_and_stream.cuda.mako17
-rw-r--r--boltzgen/kernel/template/equilibrilize.cuda.mako11
-rw-r--r--boltzgen/kernel/template/momenta_boundary.cuda.mako39
-rw-r--r--boltzgen/kernel/template/pattern/SSS.cuda.mako81
-rw-r--r--boltzgen/kernel/template/update_sss_control_structure.cpp.mako5
-rw-r--r--boltzgen/kernel/template/update_sss_control_structure.cuda.mako21
-rw-r--r--boltzgen/utility/__init__.py1
-rw-r--r--boltzgen/utility/printer.py13
12 files changed, 224 insertions, 5 deletions
diff --git a/README.md b/README.md
index 13f4d0c..ed2c1bd 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ At the moment this is a more structured and cleaned up version of the OpenCL ker
* configurable cell indexing sequence
* static resolution of memory offsets
* AB, AA and SSS streaming patterns
-* C++ and OpenCL targets
+* C++, OpenCL and CUDA targets
* simple CLI frontend
## Usage
diff --git a/boltzgen/kernel/memory/precision.py b/boltzgen/kernel/memory/precision.py
index d22e922..0c52fc0 100644
--- a/boltzgen/kernel/memory/precision.py
+++ b/boltzgen/kernel/memory/precision.py
@@ -14,3 +14,6 @@ class cpp(common):
class cl(common):
pass
+
+class cuda(common):
+ pass
diff --git a/boltzgen/kernel/template/bounce_back_boundary.cuda.mako b/boltzgen/kernel/template/bounce_back_boundary.cuda.mako
new file mode 100644
index 0000000..09499e2
--- /dev/null
+++ b/boltzgen/kernel/template/bounce_back_boundary.cuda.mako
@@ -0,0 +1,16 @@
+<%namespace name="pattern" file="${'/pattern/%s.cuda.mako' % context['streaming']}"/>
+<%
+from boltzgen.utility.printer import CudaCodePrinter
+ccode = CudaCodePrinter(float_type).doprint
+subexpr, assignment = model.collision(f_eq = model.equilibrium(resolve_moments = True))
+%>
+
+<%call expr="pattern.operator('bounce_back_boundary')">
+% for i, expr in enumerate(subexpr):
+ const ${float_type} ${expr[0]} = ${ccode(expr[1])};
+% endfor
+
+% for i, c_i in enumerate(descriptor.c):
+ const ${float_type} ${assignment[i].lhs} = ${ccode(assignment[descriptor.c.index(-c_i)].rhs)};
+% endfor
+</%call>
diff --git a/boltzgen/kernel/template/collect_moments.cuda.mako b/boltzgen/kernel/template/collect_moments.cuda.mako
new file mode 100644
index 0000000..36f03b0
--- /dev/null
+++ b/boltzgen/kernel/template/collect_moments.cuda.mako
@@ -0,0 +1,20 @@
+<%namespace name="pattern" file="${'/pattern/%s.cuda.mako' % context['streaming']}"/>
+<%
+from boltzgen.utility.printer import CudaCodePrinter
+ccode = CudaCodePrinter(float_type).doprint
+moments_subexpr, moments_assignment = model.moments()
+%>
+
+<%call expr="pattern.functor('collect_moments', [('%s*' % float_type, 'rho'), ('%s*' % float_type, 'u')])">
+% for i, expr in enumerate(moments_subexpr):
+ const ${float_type} ${expr[0]} = ${ccode(expr[1])};
+% endfor
+
+% for i, expr in enumerate(moments_assignment):
+% if i == 0:
+ rho[gid] = ${ccode(expr.rhs)};
+% else:
+ u[gid*${descriptor.d} + ${i-1}] = ${ccode(expr.rhs)};
+% endif
+% endfor
+</%call>
diff --git a/boltzgen/kernel/template/collide_and_stream.cuda.mako b/boltzgen/kernel/template/collide_and_stream.cuda.mako
new file mode 100644
index 0000000..b46cf0f
--- /dev/null
+++ b/boltzgen/kernel/template/collide_and_stream.cuda.mako
@@ -0,0 +1,17 @@
+<%namespace name="pattern" file="${'/pattern/%s.cuda.mako' % context['streaming']}"/>
+<%
+from boltzgen.utility.printer import CudaCodePrinter
+ccode = CudaCodePrinter(float_type).doprint
+subexpr, assignment = model.collision(f_eq = model.equilibrium(resolve_moments = True))
+
+%>
+
+<%call expr="pattern.operator('collide_and_stream')">
+% for i, expr in enumerate(subexpr):
+ const ${float_type} ${expr[0]} = ${ccode(expr[1])};
+% endfor
+
+% for i, expr in enumerate(assignment):
+ const ${float_type} ${ccode(expr)}
+% endfor
+</%call>
diff --git a/boltzgen/kernel/template/equilibrilize.cuda.mako b/boltzgen/kernel/template/equilibrilize.cuda.mako
new file mode 100644
index 0000000..2f4cdee
--- /dev/null
+++ b/boltzgen/kernel/template/equilibrilize.cuda.mako
@@ -0,0 +1,11 @@
+<%namespace name="pattern" file="${'/pattern/%s.cuda.mako' % context['streaming']}"/>
+<%
+from boltzgen.utility.printer import CudaCodePrinter
+ccode = CudaCodePrinter(float_type).doprint
+%>
+
+<%call expr="pattern.operator('equilibrilize')">
+% for i, w_i in enumerate(descriptor.w):
+ const ${float_type} f_next_${i} = ${ccode(w_i.evalf())};
+% endfor
+</%call>
diff --git a/boltzgen/kernel/template/momenta_boundary.cuda.mako b/boltzgen/kernel/template/momenta_boundary.cuda.mako
new file mode 100644
index 0000000..408a12d
--- /dev/null
+++ b/boltzgen/kernel/template/momenta_boundary.cuda.mako
@@ -0,0 +1,39 @@
+<%namespace name="pattern" file="${'/pattern/%s.cuda.mako' % context['streaming']}"/>
+<%
+from boltzgen.utility.printer import CudaCodePrinter
+ccode = CudaCodePrinter(float_type).doprint
+moments_subexpr, moments_assignment = model.moments()
+collision_subexpr, collision_assignment = model.collision(f_eq = model.equilibrium(resolve_moments = False))
+%>
+
+<%def name="momenta_boundary(name, params)">
+<%call expr="pattern.operator('%s_momenta_boundary' % name, params)">
+% for i, expr in enumerate(moments_subexpr):
+ const ${float_type} ${expr[0]} = ${ccode(expr[1])};
+% endfor
+
+ ${caller.body()}
+
+% for i, expr in enumerate(collision_subexpr):
+ const ${float_type} ${expr[0]} = ${ccode(expr[1])};
+% endfor
+
+% for i, expr in enumerate(collision_assignment):
+ const ${float_type} ${ccode(expr)}
+% endfor
+</%call>
+</%def>
+
+<%call expr="momenta_boundary('velocity', list(map(lambda i: (float_type, 'velocity_%d' % i), range(descriptor.d))))">
+ const ${float_type} ${ccode(moments_assignment[0])}
+% for i, expr in enumerate(moments_assignment[1:]):
+ const ${float_type} ${expr.lhs} = velocity_${i};
+% endfor
+</%call>
+
+<%call expr="momenta_boundary('density', [(float_type, 'density')])">
+ const ${float_type} ${moments_assignment[0].lhs} = density;
+% for i, expr in enumerate(moments_assignment[1:]):
+ const ${float_type} ${ccode(expr)}
+% endfor
+</%call>
diff --git a/boltzgen/kernel/template/pattern/SSS.cuda.mako b/boltzgen/kernel/template/pattern/SSS.cuda.mako
new file mode 100644
index 0000000..8f571c7
--- /dev/null
+++ b/boltzgen/kernel/template/pattern/SSS.cuda.mako
@@ -0,0 +1,81 @@
+<%def name="operator(name, params = None)">
+<%
+if layout.__class__.__name__ != 'SOA':
+ raise Exception('SSS pattern only works for the AOS memory layout')
+%>
+__global__ void ${name}(
+ ${float_type}** f
+% if 'cell_list_dispatch' in extras:
+ , std::size_t* cells
+ , std::size_t cell_count
+% else:
+ , std::size_t gid
+% endif
+% if params is not None:
+% for param_type, param_name in params:
+ , ${param_type} ${param_name}
+% endfor
+% endif
+) {
+% if 'cell_list_dispatch' in extras:
+ const std::size_t index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (!(index < cell_count)) {
+ return;
+ }
+ const std::size_t gid = cells[index];
+% endif
+
+% for i, c_i in enumerate(descriptor.c):
+ ${float_type}* preshifted_f_${i} = f[${i}] + ${layout.cell_preshift('gid')};
+% endfor
+
+% for i, c_i in enumerate(descriptor.c):
+ const ${float_type} f_curr_${i} = *preshifted_f_${i};
+% endfor
+
+ ${caller.body()}
+
+% for i, c_i in enumerate(descriptor.c):
+ *preshifted_f_${i} = f_next_${descriptor.c.index(-c_i)};
+% endfor
+}
+</%def>
+
+<%def name="functor(name, params = None)">
+<%
+if layout.__class__.__name__ != 'SOA':
+ raise Exception('SSS pattern only works for the AOS memory layout')
+%>
+__global__ void ${name}(
+ ${float_type}** f
+% if 'cell_list_dispatch' in extras:
+ , std::size_t* cells
+ , std::size_t cell_count
+% else:
+ , std::size_t gid
+% endif
+% if params is not None:
+% for param_type, param_name in params:
+ , ${param_type} ${param_name}
+% endfor
+% endif
+) {
+% if 'cell_list_dispatch' in extras:
+ const std::size_t index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (!(index < cell_count)) {
+ return;
+ }
+ const std::size_t gid = cells[index];
+% endif
+
+% for i, c_i in enumerate(descriptor.c):
+ const ${float_type}* preshifted_f_${i} = f[${i}] + ${layout.cell_preshift('gid')};
+% endfor
+
+% for i, c_i in enumerate(descriptor.c):
+ const ${float_type} f_curr_${i} = *preshifted_f_${descriptor.c.index(-c_i)};
+% endfor
+
+ ${caller.body()}
+}
+</%def>
diff --git a/boltzgen/kernel/template/update_sss_control_structure.cpp.mako b/boltzgen/kernel/template/update_sss_control_structure.cpp.mako
index 9197022..b7661a3 100644
--- a/boltzgen/kernel/template/update_sss_control_structure.cpp.mako
+++ b/boltzgen/kernel/template/update_sss_control_structure.cpp.mako
@@ -7,9 +7,6 @@ void update_sss_control_structure(${float_type}** f) {
${float_type}* f_old_${i} = f[${i}];
% endfor
% for i, c_i in enumerate(descriptor.c):
- f[${i}] = f_old_${descriptor.c.index(-c_i)};
-% endfor
-% for i, c_i in enumerate(descriptor.c):
- f[${i}] += ${layout.neighbor_offset(-c_i)};
+ f[${i}] = f_old_${descriptor.c.index(-c_i)} + ${layout.neighbor_offset(-c_i)};
% endfor
}
diff --git a/boltzgen/kernel/template/update_sss_control_structure.cuda.mako b/boltzgen/kernel/template/update_sss_control_structure.cuda.mako
new file mode 100644
index 0000000..2a98b94
--- /dev/null
+++ b/boltzgen/kernel/template/update_sss_control_structure.cuda.mako
@@ -0,0 +1,21 @@
+<%
+if streaming != 'SSS':
+ raise Exception('"update_sss_control_structure" function only makes sense for the SSS pattern')
+
+padding = (max(geometry.size_x,geometry.size_y,geometry.size_z)+1)**(descriptor.d-1)
+%>
+
+__global__ void init_sss_control_structure(${float_type}* f, ${float_type}** control) {
+% for i, c_i in enumerate(descriptor.c):
+ control[${i}] = f + ${padding + layout.pop_offset(i, 2*padding)};
+% endfor
+}
+
+__global__ void update_sss_control_structure(${float_type}** f) {
+% for i, c_i in enumerate(descriptor.c):
+ ${float_type}* f_old_${i} = f[${i}];
+% endfor
+% for i, c_i in enumerate(descriptor.c):
+ f[${i}] = f_old_${descriptor.c.index(-c_i)} + ${layout.neighbor_offset(-c_i)};
+% endfor
+}
diff --git a/boltzgen/utility/__init__.py b/boltzgen/utility/__init__.py
index fa9c760..5905c36 100644
--- a/boltzgen/utility/__init__.py
+++ b/boltzgen/utility/__init__.py
@@ -1,5 +1,6 @@
from . import optimizations
from . import ndindex
+from . import printer
from sympy.codegen.ast import Assignment
diff --git a/boltzgen/utility/printer.py b/boltzgen/utility/printer.py
new file mode 100644
index 0000000..b52a5e0
--- /dev/null
+++ b/boltzgen/utility/printer.py
@@ -0,0 +1,13 @@
+from sympy.printing.ccode import C99CodePrinter
+from sympy.codegen.ast import float32, float64
+
+class CudaCodePrinter(C99CodePrinter):
+ pass
+
+ def __init__(self, float_type, **args):
+ super(CudaCodePrinter, self).__init__(**args)
+ if float_type == 'float':
+ self.type_func_suffixes[float32] = 'f'
+ self.type_func_suffixes[float64] = 'f'
+ self.type_literal_suffixes[float32] = 'f'
+ self.type_literal_suffixes[float64] = 'f'