diff options
| -rw-r--r-- | codegen_lbm.py | 68 | 
1 files changed, 36 insertions, 32 deletions
| diff --git a/codegen_lbm.py b/codegen_lbm.py index 7bf7481..6b36c23 100644 --- a/codegen_lbm.py +++ b/codegen_lbm.py @@ -16,6 +16,34 @@ from mako.template import Template  kernel = """  __constant float tau = ${tau}; +bool is_in_circle(float x, float y, float a, float b, float r) { +    return sqrt(pow(x-a,2)+pow(y-b,2)) < r; +} + +__kernel void equilibrilize(__global __write_only float* f_a, +                            __global __write_only float* f_b) +{ +    const unsigned int gid = get_global_id(1)*${nX} + get_global_id(0); + +    __global __write_only float* preshifted_f_a = f_a + gid; +    __global __write_only float* preshifted_f_b = f_b + gid; + +    if (  is_in_circle(get_global_id(0), get_global_id(1), ${nX//4},    ${nY//4},    ${nX//10}) +       || is_in_circle(get_global_id(0), get_global_id(1), ${nX//4},    ${nY-nY//4}, ${nX//10}) +       || is_in_circle(get_global_id(0), get_global_id(1), ${nX-nX//4}, ${nY//4},    ${nX//10}) +       || is_in_circle(get_global_id(0), get_global_id(1), ${nX-nX//4}, ${nY-nY//4}, ${nX//10}) ) { +% for i, w_i in enumerate(w): +        preshifted_f_a[${i*nCells}] = 1./24.f; +        preshifted_f_b[${i*nCells}] = 1./24.f; +% endfor +    } else { +% for i, w_i in enumerate(w): +        preshifted_f_a[${i*nCells}] = ${w_i}.f; +        preshifted_f_b[${i*nCells}] = ${w_i}.f; +% endfor +    } +} +  <%  def direction_index(c_i):      return (c_i[0]+1) + 3*(1-c_i[1]) @@ -39,7 +67,8 @@ __kernel void collide_and_stream(__global __write_only float* f_a,          return;      } -    __global __read_only float* preshifted_f_b = f_b + gid; +    __global __write_only float* preshifted_f_a = f_a + gid; +    __global __read_only  float* preshifted_f_b = f_b + gid;  % for i, c_i in enumerate(c):      const float f_curr_${i} = preshifted_f_b[${direction_index(c_i)*nCells + neighbor_offset(-c_i)}]; @@ -67,7 +96,7 @@ __kernel void collide_and_stream(__global __write_only float* f_a,  % endfor  % for i in range(0,len(c)): -    f_a[${i*nCells} + gid] = f_next_${i}; +    preshifted_f_a[${i*nCells}] = f_next_${i};  % endfor  } @@ -106,9 +135,6 @@ class D2Q9_BGK_Lattice:          self.context  = cl.Context(properties=[(cl.context_properties.PLATFORM, self.platform)])          self.queue = cl.CommandQueue(self.context) -        self.np_pop_a = numpy.ndarray(shape=(9, self.nCells), dtype=numpy.float32) -        self.np_pop_b = numpy.ndarray(shape=(9, self.nCells), dtype=numpy.float32) -          self.np_moments  = numpy.ndarray(shape=(3, self.nCells), dtype=numpy.float32)          self.np_material = numpy.ndarray(shape=(self.nCells, 1), dtype=numpy.int32) @@ -116,17 +142,16 @@ class D2Q9_BGK_Lattice:          self.setup_geometry() -        self.equilibrilize() -        self.setup_anomaly() - -        self.cl_pop_a = cl.Buffer(self.context, mf.READ_WRITE | mf.USE_HOST_PTR, hostbuf=self.np_pop_a) -        self.cl_pop_b = cl.Buffer(self.context, mf.READ_WRITE | mf.USE_HOST_PTR, hostbuf=self.np_pop_b) +        self.cl_pop_a = cl.Buffer(self.context, mf.READ_WRITE, size=9*self.nCells*numpy.float32(0).nbytes) +        self.cl_pop_b = cl.Buffer(self.context, mf.READ_WRITE, size=9*self.nCells*numpy.float32(0).nbytes)          self.cl_material = cl.Buffer(self.context, mf.READ_ONLY  | mf.USE_HOST_PTR, hostbuf=self.np_material)          self.cl_moments  = cl.Buffer(self.context, mf.READ_WRITE | mf.USE_HOST_PTR, hostbuf=self.np_moments)          self.build_kernel() +        self.program.equilibrilize(self.queue, (self.nX,self.nY), (32,1), self.cl_pop_a, self.cl_pop_b).wait() +      def setup_geometry(self):          self.np_material[:] = 0          for x in range(1,self.nX-1): @@ -136,28 +161,6 @@ class D2Q9_BGK_Lattice:                  else:                      self.np_material[self.idx(x,y)] = 1 -    def equilibrilize(self): -        self.np_pop_a[(0,2,6,8),:] = 1./36. -        self.np_pop_a[(1,3,5,7),:] = 1./9. -        self.np_pop_a[4,:] = 4./9. - -        self.np_pop_b[(0,2,6,8),:] = 1./36. -        self.np_pop_b[(1,3,5,7),:] = 1./9. -        self.np_pop_b[4,:] = 4./9. - -    def setup_anomaly(self): -        bubbles = [ [        self.nX//4,        self.nY//4], -                    [        self.nX//4,self.nY-self.nY//4], -                    [self.nX-self.nX//4,        self.nY//4], -                    [self.nX-self.nX//4,self.nY-self.nY//4] ] - -        for x in range(0,self.nX-1): -            for y in range(0,self.nY-1): -                for [a,b] in bubbles: -                    if numpy.sqrt((x-a)*(x-a)+(y-b)*(y-b)) < self.nX//10: -                        self.np_pop_a[:,self.idx(x,y)] = 1./24. -                        self.np_pop_b[:,self.idx(x,y)] = 1./24. -      def build_kernel(self):          program_src = Template(kernel).render(              nX     = self.nX, @@ -169,6 +172,7 @@ class D2Q9_BGK_Lattice:              collide_helper     = D2Q9.collide_opt[0],              collide_assignment = D2Q9.collide_opt[1],              c     = D2Q9.c, +            w     = D2Q9.w,              ccode = sympy.ccode          )          self.program = cl.Program(self.context, program_src).build() | 
