From 02cb01c94fe26d425371ab74feeb50e8a9bf6bf6 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Tue, 5 Nov 2019 19:57:17 +0100 Subject: Implement AA pattern for C++ target Note that special care has to be taken to provide ghost cells around active cells so the algorithm has somewhere to stream to and from. This is also the case for the AB pattern but there they only have to be equilibrilized once instead of after every other time step. Even when such an equilibrilization is performed there is still a potential bug as inbound populations at the outer boundary are never streamed to (this is not a problem for AB using pull-only streaming). A vectorizable solution may require direction-specific ghost cell equilibrization. --- boltzgen.py | 4 +- boltzgen/kernel/generator.py | 12 ++-- boltzgen/kernel/template/pattern/AA.cpp.mako | 83 ++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 boltzgen/kernel/template/pattern/AA.cpp.mako diff --git a/boltzgen.py b/boltzgen.py index 57b3ed0..216234f 100755 --- a/boltzgen.py +++ b/boltzgen.py @@ -10,9 +10,10 @@ argparser.add_argument('target', help = 'Target language (currently either "cl" argparser.add_argument('--lattice', required = True, help = 'Lattice type ("D2Q9", "D3Q7", "D3Q19", "D3Q27")') argparser.add_argument('--model', required = False, help = 'LBM model (currently only "BGK")') +argparser.add_argument('--precision', required = True, help = 'Floating precision ("single" or "double")') argparser.add_argument('--layout', required = True, help = 'Memory layout ("AOS" or "SOA")') argparser.add_argument('--index', required = False, help = 'Cell indexing ("XYZ" or "ZYX")') -argparser.add_argument('--precision', required = True, help = 'Floating precision ("single" or "double")') +argparser.add_argument('--streaming', required = True, help = 'Streaming pattern ("AB" or "AA")') argparser.add_argument('--geometry', required = True, help = 'Size of the block geometry ("x:y(:z)")') argparser.add_argument('--tau', required = True, help = 'BGK relaxation time') @@ -42,6 +43,7 @@ generator = Generator( model = model(lattice, tau = float(args.tau), optimize = not args.disable_cse), target = args.target, precision = args.precision, + streaming = args.streaming, index = args.index, layout = args.layout) diff --git a/boltzgen/kernel/generator.py b/boltzgen/kernel/generator.py index dd44a56..ee5bf8b 100644 --- a/boltzgen/kernel/generator.py +++ b/boltzgen/kernel/generator.py @@ -10,7 +10,7 @@ template_lookup = TemplateLookup(directories = [ ]) class Generator: - def __init__(self, model, target, precision, index, layout): + def __init__(self, model, target, precision, index, layout, streaming): self.model = model self.descriptor = self.model.descriptor self.target = target @@ -26,6 +26,8 @@ class Generator: except AttributeError: raise Exception("There is no layout '%s'" % layout) from None + self.streaming = streaming + def instantiate(self, template, geometry, extras = []): template_path = Path(__file__).parent/("template/%s.%s.mako" % (template, self.target)) if not template_path.exists(): @@ -35,10 +37,10 @@ class Generator: descriptor = self.descriptor, model = self.model, geometry = geometry, + float_type = self.float_type, index = self.index_impl(geometry), layout = self.layout_impl(self.descriptor, self.index_impl, geometry), - streaming = 'AB', - float_type = self.float_type, + streaming = self.streaming, extras = extras ) @@ -53,9 +55,9 @@ class Generator: descriptor = self.descriptor, model = self.model, geometry = geometry, + float_type = self.float_type, index = self.index_impl(geometry), layout = self.layout_impl(self.descriptor, self.index_impl, geometry), - streaming = 'AB', - float_type = self.float_type, + streaming = self.streaming, extras = extras ) diff --git a/boltzgen/kernel/template/pattern/AA.cpp.mako b/boltzgen/kernel/template/pattern/AA.cpp.mako new file mode 100644 index 0000000..a61bb41 --- /dev/null +++ b/boltzgen/kernel/template/pattern/AA.cpp.mako @@ -0,0 +1,83 @@ +<%def name="operator(name, params = None)"> +void ${name}_tick( + ${float_type}* f + , std::size_t gid +% if params is not None: +% for param_type, param_name in params: + , ${param_type} ${param_name} +% endfor +% endif +) { + ${float_type}* preshifted_f = f + ${layout.cell_preshift('gid')}; + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(i)}]; +% endfor + + ${caller.body()} + +% for i, c_i in enumerate(descriptor.c): + preshifted_f[${layout.pop_offset(i)}] = f_next_${descriptor.c.index(-c_i)}; +% endfor +} + +void ${name}_tock( + ${float_type}* f + , std::size_t gid +% if params is not None: +% for param_type, param_name in params: + , ${param_type} ${param_name} +% endfor +% endif +) { + ${float_type}* preshifted_f = f + ${layout.cell_preshift('gid')}; + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${descriptor.c.index(-c_i)} = preshifted_f[${layout.pop_offset(i) + layout.neighbor_offset(c_i)}]; +% endfor + + ${caller.body()} + +% for i, c_i in enumerate(descriptor.c): + preshifted_f[${layout.pop_offset(i) + layout.neighbor_offset(c_i)}] = f_next_${i}; +% endfor +} + + +<%def name="functor(name, params = None)"> +void ${name}_tick( + const ${float_type}* f + , std::size_t gid +% if params is not None: +% for param_type, param_name in params: + , ${param_type} ${param_name} +% endfor +% endif +) { + const ${float_type}* preshifted_f = f + ${layout.cell_preshift('gid')}; + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(descriptor.c.index(-c_i))}]; +% endfor + + ${caller.body()} +} + +void ${name}_tock( + const ${float_type}* f + , std::size_t gid +% if params is not None: +% for param_type, param_name in params: + , ${param_type} ${param_name} +% endfor +% endif +) { + const ${float_type}* preshifted_f = f + ${layout.cell_preshift('gid')}; + +% for i, c_i in enumerate(descriptor.c): + const ${float_type} f_curr_${i} = preshifted_f[${layout.pop_offset(i) + layout.neighbor_offset(c_i)}]; +% endfor + + ${caller.body()} +} + -- cgit v1.2.3