From 4a2885ad3ae0396486d288df94339d0c45e6db8b Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Sun, 10 Nov 2019 21:14:07 +0100 Subject: Implement basic CUDA target Currently only for the SSS streaming pattern. CudaCodePrinter in `utility/printer.py` is required to add a 'f' suffix to all single precision floating point literals. If this is not done (when targeting single precision) most calculations happen in double precision which destroys performance. (In OpenCL this is not necessary as we can simply set the `-cl-single-precision-constant` flag. Sadly such a flag doesn't seem to exist for nvcc.) --- boltzgen/utility/__init__.py | 1 + boltzgen/utility/printer.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 boltzgen/utility/printer.py (limited to 'boltzgen/utility') diff --git a/boltzgen/utility/__init__.py b/boltzgen/utility/__init__.py index fa9c760..5905c36 100644 --- a/boltzgen/utility/__init__.py +++ b/boltzgen/utility/__init__.py @@ -1,5 +1,6 @@ from . import optimizations from . import ndindex +from . import printer from sympy.codegen.ast import Assignment diff --git a/boltzgen/utility/printer.py b/boltzgen/utility/printer.py new file mode 100644 index 0000000..b52a5e0 --- /dev/null +++ b/boltzgen/utility/printer.py @@ -0,0 +1,13 @@ +from sympy.printing.ccode import C99CodePrinter +from sympy.codegen.ast import float32, float64 + +class CudaCodePrinter(C99CodePrinter): + pass + + def __init__(self, float_type, **args): + super(CudaCodePrinter, self).__init__(**args) + if float_type == 'float': + self.type_func_suffixes[float32] = 'f' + self.type_func_suffixes[float64] = 'f' + self.type_literal_suffixes[float32] = 'f' + self.type_literal_suffixes[float64] = 'f' -- cgit v1.2.3