From 78567a423668d09571eaaf5ef6a915a821eea0d5 Mon Sep 17 00:00:00 2001
From: Adrian Kummerlaender
Date: Tue, 11 Jun 2019 23:28:24 +0200
Subject: Move  equilibrization to kernel

---
 codegen_lbm.py | 68 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/codegen_lbm.py b/codegen_lbm.py
index 7bf7481..6b36c23 100644
--- a/codegen_lbm.py
+++ b/codegen_lbm.py
@@ -16,6 +16,34 @@ from mako.template import Template
 kernel = """
 __constant float tau = ${tau};
 
+bool is_in_circle(float x, float y, float a, float b, float r) {
+    return sqrt(pow(x-a,2)+pow(y-b,2)) < r;
+}
+
+__kernel void equilibrilize(__global __write_only float* f_a,
+                            __global __write_only float* f_b)
+{
+    const unsigned int gid = get_global_id(1)*${nX} + get_global_id(0);
+
+    __global __write_only float* preshifted_f_a = f_a + gid;
+    __global __write_only float* preshifted_f_b = f_b + gid;
+
+    if (  is_in_circle(get_global_id(0), get_global_id(1), ${nX//4},    ${nY//4},    ${nX//10})
+       || is_in_circle(get_global_id(0), get_global_id(1), ${nX//4},    ${nY-nY//4}, ${nX//10})
+       || is_in_circle(get_global_id(0), get_global_id(1), ${nX-nX//4}, ${nY//4},    ${nX//10})
+       || is_in_circle(get_global_id(0), get_global_id(1), ${nX-nX//4}, ${nY-nY//4}, ${nX//10}) ) {
+% for i, w_i in enumerate(w):
+        preshifted_f_a[${i*nCells}] = 1./24.f;
+        preshifted_f_b[${i*nCells}] = 1./24.f;
+% endfor
+    } else {
+% for i, w_i in enumerate(w):
+        preshifted_f_a[${i*nCells}] = ${w_i}.f;
+        preshifted_f_b[${i*nCells}] = ${w_i}.f;
+% endfor
+    }
+}
+
 <%
 def direction_index(c_i):
     return (c_i[0]+1) + 3*(1-c_i[1])
@@ -39,7 +67,8 @@ __kernel void collide_and_stream(__global __write_only float* f_a,
         return;
     }
 
-    __global __read_only float* preshifted_f_b = f_b + gid;
+    __global __write_only float* preshifted_f_a = f_a + gid;
+    __global __read_only  float* preshifted_f_b = f_b + gid;
 
 % for i, c_i in enumerate(c):
     const float f_curr_${i} = preshifted_f_b[${direction_index(c_i)*nCells + neighbor_offset(-c_i)}];
@@ -67,7 +96,7 @@ __kernel void collide_and_stream(__global __write_only float* f_a,
 % endfor
 
 % for i in range(0,len(c)):
-    f_a[${i*nCells} + gid] = f_next_${i};
+    preshifted_f_a[${i*nCells}] = f_next_${i};
 % endfor
 }
 
@@ -106,9 +135,6 @@ class D2Q9_BGK_Lattice:
         self.context  = cl.Context(properties=[(cl.context_properties.PLATFORM, self.platform)])
         self.queue = cl.CommandQueue(self.context)
 
-        self.np_pop_a = numpy.ndarray(shape=(9, self.nCells), dtype=numpy.float32)
-        self.np_pop_b = numpy.ndarray(shape=(9, self.nCells), dtype=numpy.float32)
-
         self.np_moments  = numpy.ndarray(shape=(3, self.nCells), dtype=numpy.float32)
         self.np_material = numpy.ndarray(shape=(self.nCells, 1), dtype=numpy.int32)
 
@@ -116,17 +142,16 @@ class D2Q9_BGK_Lattice:
 
         self.setup_geometry()
 
-        self.equilibrilize()
-        self.setup_anomaly()
-
-        self.cl_pop_a = cl.Buffer(self.context, mf.READ_WRITE | mf.USE_HOST_PTR, hostbuf=self.np_pop_a)
-        self.cl_pop_b = cl.Buffer(self.context, mf.READ_WRITE | mf.USE_HOST_PTR, hostbuf=self.np_pop_b)
+        self.cl_pop_a = cl.Buffer(self.context, mf.READ_WRITE, size=9*self.nCells*numpy.float32(0).nbytes)
+        self.cl_pop_b = cl.Buffer(self.context, mf.READ_WRITE, size=9*self.nCells*numpy.float32(0).nbytes)
 
         self.cl_material = cl.Buffer(self.context, mf.READ_ONLY  | mf.USE_HOST_PTR, hostbuf=self.np_material)
         self.cl_moments  = cl.Buffer(self.context, mf.READ_WRITE | mf.USE_HOST_PTR, hostbuf=self.np_moments)
 
         self.build_kernel()
 
+        self.program.equilibrilize(self.queue, (self.nX,self.nY), (32,1), self.cl_pop_a, self.cl_pop_b).wait()
+
     def setup_geometry(self):
         self.np_material[:] = 0
         for x in range(1,self.nX-1):
@@ -136,28 +161,6 @@ class D2Q9_BGK_Lattice:
                 else:
                     self.np_material[self.idx(x,y)] = 1
 
-    def equilibrilize(self):
-        self.np_pop_a[(0,2,6,8),:] = 1./36.
-        self.np_pop_a[(1,3,5,7),:] = 1./9.
-        self.np_pop_a[4,:] = 4./9.
-
-        self.np_pop_b[(0,2,6,8),:] = 1./36.
-        self.np_pop_b[(1,3,5,7),:] = 1./9.
-        self.np_pop_b[4,:] = 4./9.
-
-    def setup_anomaly(self):
-        bubbles = [ [        self.nX//4,        self.nY//4],
-                    [        self.nX//4,self.nY-self.nY//4],
-                    [self.nX-self.nX//4,        self.nY//4],
-                    [self.nX-self.nX//4,self.nY-self.nY//4] ]
-
-        for x in range(0,self.nX-1):
-            for y in range(0,self.nY-1):
-                for [a,b] in bubbles:
-                    if numpy.sqrt((x-a)*(x-a)+(y-b)*(y-b)) < self.nX//10:
-                        self.np_pop_a[:,self.idx(x,y)] = 1./24.
-                        self.np_pop_b[:,self.idx(x,y)] = 1./24.
-
     def build_kernel(self):
         program_src = Template(kernel).render(
             nX     = self.nX,
@@ -169,6 +172,7 @@ class D2Q9_BGK_Lattice:
             collide_helper     = D2Q9.collide_opt[0],
             collide_assignment = D2Q9.collide_opt[1],
             c     = D2Q9.c,
+            w     = D2Q9.w,
             ccode = sympy.ccode
         )
         self.program = cl.Program(self.context, program_src).build()
-- 
cgit v1.2.3