From 32dd41a728ce10113032e20955ba08f8de449857 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Sun, 12 Sep 2021 14:01:55 +0200 Subject: Start using C++ cuda-api-wrapper instead of raw CUDA --- tangle/LLBM/lattice.h | 73 ++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 39 deletions(-) (limited to 'tangle/LLBM/lattice.h') diff --git a/tangle/LLBM/lattice.h b/tangle/LLBM/lattice.h index 7157c78..d3a1840 100644 --- a/tangle/LLBM/lattice.h +++ b/tangle/LLBM/lattice.h @@ -31,9 +31,10 @@ template void apply(OPERATOR... ops) { const auto block_size = 32; const auto block_count = (_cuboid.volume + block_size - 1) / block_size; - kernel::call_operators<<>>( - _population.view(), ops... - ); + cuda::launch(kernel::call_operators, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + ops...); } template @@ -45,27 +46,22 @@ template void call_operator(tag::call_by_cell_id, DeviceBuffer& cells, ARGS... args) { const auto block_size = 32; const auto block_count = (cells.size() + block_size - 1) / block_size; - kernel::call_operator<<>>( - _population.view(), cells.device(), cells.size(), std::forward(args)... - ); -} - -template -void call_operator(tag::call_by_cell_id, DeviceBuffer& mask, ARGS... args) { - const auto block_size = 32; - const auto block_count = (_cuboid.volume + block_size - 1) / block_size; - kernel::call_operator<<>>( - _population.view(), mask.device(), std::forward(args)... - ); + cuda::launch(kernel::call_operator, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + cells.device(), cells.size(), + std::forward(args)...); } template void call_operator(tag::call_by_list_index, std::size_t count, ARGS... args) { const auto block_size = 32; const auto block_count = (count + block_size - 1) / block_size; - kernel::call_operator_using_list<<>>( - _population.view(), count, std::forward(args)... - ); + cuda::launch(kernel::call_operator_using_list, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + count, + std::forward(args)...); } template @@ -73,22 +69,15 @@ void inspect(ARGS&&... args) { call_functor(typename FUNCTOR::call_tag{}, std::forward(args)...); } -template -void call_functor(tag::call_by_cell_id, DeviceBuffer& cells, ARGS... args) { - const auto block_size = 32; - const auto block_count = (cells.size() + block_size - 1) / block_size; - kernel::call_functor<<>>( - _population.view(), cells.device(), cells.size(), std::forward(args)... - ); -} - template void call_functor(tag::call_by_cell_id, DeviceBuffer& mask, ARGS... args) { const auto block_size = 32; const auto block_count = (_cuboid.volume + block_size - 1) / block_size; - kernel::call_functor<<>>( - _population.view(), mask.device(), std::forward(args)... - ); + cuda::launch(kernel::call_functor, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + mask.device(), + std::forward(args)...); } template @@ -97,9 +86,11 @@ void call_functor(tag::call_by_spatial_cell_mask, DeviceBuffer& mask, ARGS const dim3 grid((_cuboid.nX + block.x - 1) / block.x, (_cuboid.nY + block.y - 1) / block.y, (_cuboid.nZ + block.z - 1) / block.z); - kernel::call_spatial_functor<<>>( - _population.view(), mask.device(), std::forward(args)... - ); + cuda::launch(kernel::call_spatial_functor, + cuda::launch_configuration_t(grid, block), + _population.view(), + mask.device(), + std::forward(args)...); } template @@ -111,9 +102,11 @@ template void tagged_helper(tag::post_process_by_list_index, std::size_t count, ARGS... args) { const auto block_size = 32; const auto block_count = (count + block_size - 1) / block_size; - kernel::call_operator_using_list<<>>( - DESCRIPTOR(), count, std::forward(args)... - ); + cuda::launch(kernel::call_operator_using_list, + cuda::launch_configuration_t(block_count, block_size), + DESCRIPTOR(), + count, + std::forward(args)...); } template @@ -122,9 +115,11 @@ void tagged_helper(tag::post_process_by_spatial_cell_mask, DeviceBuffer& m const dim3 grid((_cuboid.nX + block.x - 1) / block.x, (_cuboid.nY + block.y - 1) / block.y, (_cuboid.nZ + block.z - 1) / block.z); - kernel::call_spatial_operator<<>>( - _cuboid, mask.device(), std::forward(args)... - ); + cuda::launch(kernel::call_spatial_operator, + cuda::launch_configuration_t(grid, block), + _cuboid, + mask.device(), + std::forward(args)...); } }; -- cgit v1.2.3