diff options
Reduce thread block size
=> ~780 MLUPS
-rw-r--r-- | codegen_lbm.py | 4 | ||||
-rw-r--r-- | lbm_codegen.ipynb | 50 |
2 files changed, 26 insertions, 28 deletions
diff --git a/codegen_lbm.py b/codegen_lbm.py index 29c2dad..235a548 100644 --- a/codegen_lbm.py +++ b/codegen_lbm.py @@ -173,10 +173,10 @@ class D2Q9_BGK_Lattice: def evolve(self): if self.tick: self.tick = False - self.program.collide_and_stream(self.queue, (self.nX,self.nY), (64,1), self.cl_pop_a, self.cl_pop_b, self.cl_moments, self.cl_material) + self.program.collide_and_stream(self.queue, (self.nX,self.nY), (32,1), self.cl_pop_a, self.cl_pop_b, self.cl_moments, self.cl_material) else: self.tick = True - self.program.collide_and_stream(self.queue, (self.nX,self.nY), (64,1), self.cl_pop_b, self.cl_pop_a, self.cl_moments, self.cl_material) + self.program.collide_and_stream(self.queue, (self.nX,self.nY), (32,1), self.cl_pop_b, self.cl_pop_a, self.cl_moments, self.cl_material) def sync(self): self.queue.finish() diff --git a/lbm_codegen.ipynb b/lbm_codegen.ipynb index 34e9e6b..0b85e19 100644 --- a/lbm_codegen.ipynb +++ b/lbm_codegen.ipynb @@ -7,8 +7,6 @@ "outputs": [], "source": [ "from sympy import *\n", - "import matplotlib.pyplot as plt\n", - "\n", "init_printing()" ] }, @@ -132,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -141,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -151,7 +149,7 @@ " f_next_6, f_next_7, f_next_8], dtype=object)" ] }, - "execution_count": 23, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -163,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -173,7 +171,7 @@ " f_curr_6, f_curr_7, f_curr_8], dtype=object)" ] }, - "execution_count": 24, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -185,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -199,7 +197,7 @@ "_curr_7 + f_curr_8" ] }, - "execution_count": 25, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -211,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -221,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -231,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -245,7 +243,7 @@ "⎣u_y⎦" ] }, - "execution_count": 26, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -257,7 +255,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -353,7 +351,7 @@ "36 36 36 ⎠⎦" ] }, - "execution_count": 27, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -373,7 +371,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -524,7 +522,7 @@ " ⎦" ] }, - "execution_count": 28, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +534,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -612,7 +610,7 @@ " ⎝ ⎝ 2 ⎠⎠⎦⎠" ] }, - "execution_count": 29, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -623,7 +621,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -678,7 +676,7 @@ " ⎦⎠" ] }, - "execution_count": 30, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -690,7 +688,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -726,7 +724,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -752,7 +750,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -764,7 +762,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -785,7 +783,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 23, "metadata": {}, "outputs": [ { |