diff options
Diffstat (limited to 'tangle/LLBM/lattice.h')
-rw-r--r-- | tangle/LLBM/lattice.h | 73 |
1 files changed, 34 insertions, 39 deletions
diff --git a/tangle/LLBM/lattice.h b/tangle/LLBM/lattice.h index 7157c78..d3a1840 100644 --- a/tangle/LLBM/lattice.h +++ b/tangle/LLBM/lattice.h @@ -31,9 +31,10 @@ template <typename... OPERATOR> void apply(OPERATOR... ops) { const auto block_size = 32; const auto block_count = (_cuboid.volume + block_size - 1) / block_size; - kernel::call_operators<DESCRIPTOR,T,S,OPERATOR...><<<block_count,block_size>>>( - _population.view(), ops... - ); + cuda::launch(kernel::call_operators<DESCRIPTOR,T,S,OPERATOR...>, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + ops...); } template <typename OPERATOR, typename... ARGS> @@ -45,27 +46,22 @@ template <typename OPERATOR, typename... ARGS> void call_operator(tag::call_by_cell_id, DeviceBuffer<std::size_t>& cells, ARGS... args) { const auto block_size = 32; const auto block_count = (cells.size() + block_size - 1) / block_size; - kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - _population.view(), cells.device(), cells.size(), std::forward<ARGS>(args)... - ); -} - -template <typename OPERATOR, typename... ARGS> -void call_operator(tag::call_by_cell_id, DeviceBuffer<bool>& mask, ARGS... args) { - const auto block_size = 32; - const auto block_count = (_cuboid.volume + block_size - 1) / block_size; - kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - _population.view(), mask.device(), std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + cells.device(), cells.size(), + std::forward<ARGS>(args)...); } template <typename OPERATOR, typename... ARGS> void call_operator(tag::call_by_list_index, std::size_t count, ARGS... args) { const auto block_size = 32; const auto block_count = (count + block_size - 1) / block_size; - kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - _population.view(), count, std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + count, + std::forward<ARGS>(args)...); } template <typename FUNCTOR, typename... ARGS> @@ -74,21 +70,14 @@ void inspect(ARGS&&... args) { } template <typename FUNCTOR, typename... ARGS> -void call_functor(tag::call_by_cell_id, DeviceBuffer<std::size_t>& cells, ARGS... args) { - const auto block_size = 32; - const auto block_count = (cells.size() + block_size - 1) / block_size; - kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - _population.view(), cells.device(), cells.size(), std::forward<ARGS>(args)... - ); -} - -template <typename FUNCTOR, typename... ARGS> void call_functor(tag::call_by_cell_id, DeviceBuffer<bool>& mask, ARGS... args) { const auto block_size = 32; const auto block_count = (_cuboid.volume + block_size - 1) / block_size; - kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - _population.view(), mask.device(), std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + mask.device(), + std::forward<ARGS>(args)...); } template <typename FUNCTOR, typename... ARGS> @@ -97,9 +86,11 @@ void call_functor(tag::call_by_spatial_cell_mask, DeviceBuffer<bool>& mask, ARGS const dim3 grid((_cuboid.nX + block.x - 1) / block.x, (_cuboid.nY + block.y - 1) / block.y, (_cuboid.nZ + block.z - 1) / block.z); - kernel::call_spatial_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<grid,block>>>( - _population.view(), mask.device(), std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_spatial_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(grid, block), + _population.view(), + mask.device(), + std::forward<ARGS>(args)...); } template <typename OPERATOR, typename... ARGS> @@ -111,9 +102,11 @@ template <typename OPERATOR, typename... ARGS> void tagged_helper(tag::post_process_by_list_index, std::size_t count, ARGS... args) { const auto block_size = 32; const auto block_count = (count + block_size - 1) / block_size; - kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - DESCRIPTOR(), count, std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(block_count, block_size), + DESCRIPTOR(), + count, + std::forward<ARGS>(args)...); } template <typename OPERATOR, typename... ARGS> @@ -122,9 +115,11 @@ void tagged_helper(tag::post_process_by_spatial_cell_mask, DeviceBuffer<bool>& m const dim3 grid((_cuboid.nX + block.x - 1) / block.x, (_cuboid.nY + block.y - 1) / block.y, (_cuboid.nZ + block.z - 1) / block.z); - kernel::call_spatial_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<grid,block>>>( - _cuboid, mask.device(), std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_spatial_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(grid, block), + _cuboid, + mask.device(), + std::forward<ARGS>(args)...); } }; |