From 32dd41a728ce10113032e20955ba08f8de449857 Mon Sep 17 00:00:00 2001
From: Adrian Kummerlaender
Date: Sun, 12 Sep 2021 14:01:55 +0200
Subject: Start using C++ cuda-api-wrapper instead of raw CUDA

---
 tangle/LLBM/lattice.h | 73 ++++++++++++++++++++++++---------------------------
 1 file changed, 34 insertions(+), 39 deletions(-)

(limited to 'tangle/LLBM/lattice.h')
diff --git a/tangle/LLBM/lattice.h b/tangle/LLBM/lattice.h
index 7157c78..d3a1840 100644
--- a/tangle/LLBM/lattice.h
+++ b/tangle/LLBM/lattice.h
@@ -31,9 +31,10 @@ template <typename... OPERATOR>
 void apply(OPERATOR... ops) {
   const auto block_size = 32;
   const auto block_count = (_cuboid.volume + block_size - 1) / block_size;
-  kernel::call_operators<DESCRIPTOR,T,S,OPERATOR...><<<block_count,block_size>>>(
-    _population.view(), ops...
-  );
+  cuda::launch(kernel::call_operators<DESCRIPTOR,T,S,OPERATOR...>,
+               cuda::launch_configuration_t(block_count, block_size),
+               _population.view(),
+               ops...);
 }
 
 template <typename OPERATOR, typename... ARGS>
@@ -45,27 +46,22 @@ template <typename OPERATOR, typename... ARGS>
 void call_operator(tag::call_by_cell_id, DeviceBuffer<std::size_t>& cells, ARGS... args) {
   const auto block_size = 32;
   const auto block_count = (cells.size() + block_size - 1) / block_size;
-  kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
-    _population.view(), cells.device(), cells.size(), std::forward<ARGS>(args)...
-  );
-}
-
-template <typename OPERATOR, typename... ARGS>
-void call_operator(tag::call_by_cell_id, DeviceBuffer<bool>& mask, ARGS... args) {
-  const auto block_size = 32;
-  const auto block_count = (_cuboid.volume + block_size - 1) / block_size;
-  kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
-    _population.view(), mask.device(), std::forward<ARGS>(args)...
-  );
+  cuda::launch(kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...>,
+               cuda::launch_configuration_t(block_count, block_size),
+               _population.view(),
+               cells.device(), cells.size(),
+               std::forward<ARGS>(args)...);
 }
 
 template <typename OPERATOR, typename... ARGS>
 void call_operator(tag::call_by_list_index, std::size_t count, ARGS... args) {
   const auto block_size = 32;
   const auto block_count = (count + block_size - 1) / block_size;
-  kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
-    _population.view(), count, std::forward<ARGS>(args)...
-  );
+  cuda::launch(kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...>,
+               cuda::launch_configuration_t(block_count, block_size),
+               _population.view(),
+               count,
+               std::forward<ARGS>(args)...);
 }
 
 template <typename FUNCTOR, typename... ARGS>
@@ -73,22 +69,15 @@ void inspect(ARGS&&... args) {
   call_functor<FUNCTOR>(typename FUNCTOR::call_tag{}, std::forward<ARGS&&>(args)...);
 }
 
-template <typename FUNCTOR, typename... ARGS>
-void call_functor(tag::call_by_cell_id, DeviceBuffer<std::size_t>& cells, ARGS... args) {
-  const auto block_size = 32;
-  const auto block_count = (cells.size() + block_size - 1) / block_size;
-  kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
-    _population.view(), cells.device(), cells.size(), std::forward<ARGS>(args)...
-  );
-}
-
 template <typename FUNCTOR, typename... ARGS>
 void call_functor(tag::call_by_cell_id, DeviceBuffer<bool>& mask, ARGS... args) {
   const auto block_size = 32;
   const auto block_count = (_cuboid.volume + block_size - 1) / block_size;
-  kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
-    _population.view(), mask.device(), std::forward<ARGS>(args)...
-  );
+  cuda::launch(kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...>,
+               cuda::launch_configuration_t(block_count, block_size),
+               _population.view(),
+               mask.device(),
+               std::forward<ARGS>(args)...);
 }
 
 template <typename FUNCTOR, typename... ARGS>
@@ -97,9 +86,11 @@ void call_functor(tag::call_by_spatial_cell_mask, DeviceBuffer<bool>& mask, ARGS
   const dim3 grid((_cuboid.nX + block.x - 1) / block.x,
                   (_cuboid.nY + block.y - 1) / block.y,
                   (_cuboid.nZ + block.z - 1) / block.z);
-  kernel::call_spatial_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<grid,block>>>(
-    _population.view(), mask.device(), std::forward<ARGS>(args)...
-  );
+  cuda::launch(kernel::call_spatial_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...>,
+               cuda::launch_configuration_t(grid, block),
+               _population.view(),
+               mask.device(),
+               std::forward<ARGS>(args)...);
 }
 
 template <typename OPERATOR, typename... ARGS>
@@ -111,9 +102,11 @@ template <typename OPERATOR, typename... ARGS>
 void tagged_helper(tag::post_process_by_list_index, std::size_t count, ARGS... args) {
   const auto block_size = 32;
   const auto block_count = (count + block_size - 1) / block_size;
-  kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
-    DESCRIPTOR(), count, std::forward<ARGS>(args)...
-  );
+  cuda::launch(kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...>,
+               cuda::launch_configuration_t(block_count, block_size),
+               DESCRIPTOR(),
+               count,
+               std::forward<ARGS>(args)...);
 }
 
 template <typename OPERATOR, typename... ARGS>
@@ -122,9 +115,11 @@ void tagged_helper(tag::post_process_by_spatial_cell_mask, DeviceBuffer<bool>& m
   const dim3 grid((_cuboid.nX + block.x - 1) / block.x,
                   (_cuboid.nY + block.y - 1) / block.y,
                   (_cuboid.nZ + block.z - 1) / block.z);
-  kernel::call_spatial_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<grid,block>>>(
-    _cuboid, mask.device(), std::forward<ARGS>(args)...
-  );
+  cuda::launch(kernel::call_spatial_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...>,
+               cuda::launch_configuration_t(grid, block),
+               _cuboid,
+               mask.device(),
+               std::forward<ARGS>(args)...);
 }
 
 };
-- 
cgit v1.2.3