From 42f4ae5f67f17ff37b3e95cab3c905668816eee8 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Mon, 28 Oct 2019 23:07:44 +0100 Subject: Basic 2D LDC using boltzgen for kernel generation Using cell lists as parameters for multiple non-branching kernels seems to reduce performance by ~50 MLUPS (for single precision D2Q9). This might be alleviated by padding the cell lists to enable thread layout control or by improved kernel dispatching. On the upside this OpenCL program runs not only on GPUs but is also vectorized on Intel CPUs yielding about 180 MLUPS (single precision) and - anticlimactically - 85 MLUPS for double precision on a i7-4790K. However both these values compare well to the performance of established CPU LBM codes. --- shell.nix | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 shell.nix (limited to 'shell.nix') diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..99d794b --- /dev/null +++ b/shell.nix @@ -0,0 +1,43 @@ +{ pkgs ? import { }, ... }: + +pkgs.stdenvNoCC.mkDerivation rec { + name = "pycl-env"; + env = pkgs.buildEnv { name = name; paths = buildInputs; }; + + buildInputs = let + boltzgen = pkgs.python3.pkgs.buildPythonPackage rec { + pname = "boltzgen"; + version = "0.1"; + + src = pkgs.fetchFromGitHub { + owner = "KnairdA"; + repo = "boltzgen"; + rev = "v0.1"; + sha256 = "072kx4jrzd0g9rn63hjb0yic7qhbga47lp2vbz7rq3gvkqv1hz4d"; + }; + + propagatedBuildInputs = with pkgs.python37Packages; [ + sympy + numpy + Mako + ]; + }; + + local-python = pkgs.python3.withPackages (python-packages: with python-packages; [ + boltzgen + numpy + pyopencl setuptools + matplotlib + ]); + + in [ + local-python + pkgs.opencl-info + ]; + + shellHook = '' + export NIX_SHELL_NAME="${name}" + export PYOPENCL_COMPILER_OUTPUT=1 + export PYTHONPATH="$PWD:$PYTHONPATH" + ''; +} -- cgit v1.2.3