From 91b66eb6fe213528bd71c7345f3449c4a3c0f375 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Mon, 10 Jun 2019 15:38:52 +0200 Subject: Reduce thread block size => ~780 MLUPS --- codegen_lbm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'codegen_lbm.py') diff --git a/codegen_lbm.py b/codegen_lbm.py index 29c2dad..235a548 100644 --- a/codegen_lbm.py +++ b/codegen_lbm.py @@ -173,10 +173,10 @@ class D2Q9_BGK_Lattice: def evolve(self): if self.tick: self.tick = False - self.program.collide_and_stream(self.queue, (self.nX,self.nY), (64,1), self.cl_pop_a, self.cl_pop_b, self.cl_moments, self.cl_material) + self.program.collide_and_stream(self.queue, (self.nX,self.nY), (32,1), self.cl_pop_a, self.cl_pop_b, self.cl_moments, self.cl_material) else: self.tick = True - self.program.collide_and_stream(self.queue, (self.nX,self.nY), (64,1), self.cl_pop_b, self.cl_pop_a, self.cl_moments, self.cl_material) + self.program.collide_and_stream(self.queue, (self.nX,self.nY), (32,1), self.cl_pop_b, self.cl_pop_a, self.cl_moments, self.cl_material) def sync(self): self.queue.finish() -- cgit v1.2.3