diff options
author | Adrian Kummerlaender | 2021-09-12 14:01:55 +0200 |
---|---|---|
committer | Adrian Kummerlaender | 2021-09-12 14:01:55 +0200 |
commit | 32dd41a728ce10113032e20955ba08f8de449857 (patch) | |
tree | 8a5f851a342590d05137b2c900a7d3c4e8c545b6 /tangle | |
parent | 8bca21a550e0ef134d51c4c4c007720885d76791 (diff) | |
download | LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.gz LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.bz2 LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.lz LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.xz LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.zst LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.zip |
Start using C++ cuda-api-wrapper instead of raw CUDA
Diffstat (limited to 'tangle')
-rw-r--r-- | tangle/LLBM/kernel/executor.h | 37 | ||||
-rw-r--r-- | tangle/LLBM/lattice.h | 73 | ||||
-rw-r--r-- | tangle/LLBM/materials.h | 2 | ||||
-rw-r--r-- | tangle/LLBM/memory.h | 29 | ||||
-rw-r--r-- | tangle/LLBM/propagate.h | 32 | ||||
-rw-r--r-- | tangle/benchmark-ldc.cu | 12 | ||||
-rw-r--r-- | tangle/channel-with-sphere.cu | 8 | ||||
-rw-r--r-- | tangle/ldc-2d.cu | 8 | ||||
-rw-r--r-- | tangle/ldc-3d.cu | 8 | ||||
-rw-r--r-- | tangle/magnus.cu | 10 | ||||
-rw-r--r-- | tangle/nozzle.cu | 8 | ||||
-rw-r--r-- | tangle/taylor-couette.cu | 8 |
12 files changed, 115 insertions, 120 deletions
diff --git a/tangle/LLBM/kernel/executor.h b/tangle/LLBM/kernel/executor.h index 942918d..cce023b 100644 --- a/tangle/LLBM/kernel/executor.h +++ b/tangle/LLBM/kernel/executor.h @@ -30,30 +30,6 @@ __global__ void call_operator( } } -template <typename OPERATOR, typename DESCRIPTOR, typename T, typename S, typename... ARGS> -__global__ void call_operator( - LatticeView<DESCRIPTOR,S> lattice - , bool* mask - , ARGS... args -) { - const std::size_t gid = blockIdx.x * blockDim.x + threadIdx.x; - if (!(gid < lattice.cuboid.volume) || !mask[gid]) { - return; - } - - S f_curr[DESCRIPTOR::q]; - S f_next[DESCRIPTOR::q]; - S* preshifted_f[DESCRIPTOR::q]; - for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { - preshifted_f[iPop] = lattice.pop(iPop, gid); - f_curr[iPop] = *preshifted_f[iPop]; - } - OPERATOR::template apply<T,S>(DESCRIPTOR(), f_curr, f_next, gid, std::forward<ARGS>(args)...); - for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { - *preshifted_f[iPop] = f_next[iPop]; - } -} - template <typename FUNCTOR, typename DESCRIPTOR, typename T, typename S, typename... ARGS> __global__ void call_functor( LatticeView<DESCRIPTOR,S> lattice @@ -110,19 +86,6 @@ __global__ void call_operator_using_list( OPERATOR::template apply<T,S>(lattice, index, count, std::forward<ARGS>(args)...); } -template <typename OPERATOR, typename DESCRIPTOR, typename T, typename S, typename... ARGS> -__global__ void call_operator_using_list( - DESCRIPTOR descriptor - , std::size_t count - , ARGS... args -) { - const std::size_t index = blockIdx.x * blockDim.x + threadIdx.x; - if (!(index < count)) { - return; - } - OPERATOR::template apply<T,S>(descriptor, index, count, std::forward<ARGS>(args)...); -} - template <typename FUNCTOR, typename DESCRIPTOR, typename T, typename S, typename... ARGS> __global__ void call_spatial_functor( LatticeView<DESCRIPTOR,S> lattice diff --git a/tangle/LLBM/lattice.h b/tangle/LLBM/lattice.h index 7157c78..d3a1840 100644 --- a/tangle/LLBM/lattice.h +++ b/tangle/LLBM/lattice.h @@ -31,9 +31,10 @@ template <typename... OPERATOR> void apply(OPERATOR... ops) { const auto block_size = 32; const auto block_count = (_cuboid.volume + block_size - 1) / block_size; - kernel::call_operators<DESCRIPTOR,T,S,OPERATOR...><<<block_count,block_size>>>( - _population.view(), ops... - ); + cuda::launch(kernel::call_operators<DESCRIPTOR,T,S,OPERATOR...>, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + ops...); } template <typename OPERATOR, typename... ARGS> @@ -45,27 +46,22 @@ template <typename OPERATOR, typename... ARGS> void call_operator(tag::call_by_cell_id, DeviceBuffer<std::size_t>& cells, ARGS... args) { const auto block_size = 32; const auto block_count = (cells.size() + block_size - 1) / block_size; - kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - _population.view(), cells.device(), cells.size(), std::forward<ARGS>(args)... - ); -} - -template <typename OPERATOR, typename... ARGS> -void call_operator(tag::call_by_cell_id, DeviceBuffer<bool>& mask, ARGS... args) { - const auto block_size = 32; - const auto block_count = (_cuboid.volume + block_size - 1) / block_size; - kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - _population.view(), mask.device(), std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + cells.device(), cells.size(), + std::forward<ARGS>(args)...); } template <typename OPERATOR, typename... ARGS> void call_operator(tag::call_by_list_index, std::size_t count, ARGS... args) { const auto block_size = 32; const auto block_count = (count + block_size - 1) / block_size; - kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - _population.view(), count, std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + count, + std::forward<ARGS>(args)...); } template <typename FUNCTOR, typename... ARGS> @@ -74,21 +70,14 @@ void inspect(ARGS&&... args) { } template <typename FUNCTOR, typename... ARGS> -void call_functor(tag::call_by_cell_id, DeviceBuffer<std::size_t>& cells, ARGS... args) { - const auto block_size = 32; - const auto block_count = (cells.size() + block_size - 1) / block_size; - kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - _population.view(), cells.device(), cells.size(), std::forward<ARGS>(args)... - ); -} - -template <typename FUNCTOR, typename... ARGS> void call_functor(tag::call_by_cell_id, DeviceBuffer<bool>& mask, ARGS... args) { const auto block_size = 32; const auto block_count = (_cuboid.volume + block_size - 1) / block_size; - kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - _population.view(), mask.device(), std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(block_count, block_size), + _population.view(), + mask.device(), + std::forward<ARGS>(args)...); } template <typename FUNCTOR, typename... ARGS> @@ -97,9 +86,11 @@ void call_functor(tag::call_by_spatial_cell_mask, DeviceBuffer<bool>& mask, ARGS const dim3 grid((_cuboid.nX + block.x - 1) / block.x, (_cuboid.nY + block.y - 1) / block.y, (_cuboid.nZ + block.z - 1) / block.z); - kernel::call_spatial_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<grid,block>>>( - _population.view(), mask.device(), std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_spatial_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(grid, block), + _population.view(), + mask.device(), + std::forward<ARGS>(args)...); } template <typename OPERATOR, typename... ARGS> @@ -111,9 +102,11 @@ template <typename OPERATOR, typename... ARGS> void tagged_helper(tag::post_process_by_list_index, std::size_t count, ARGS... args) { const auto block_size = 32; const auto block_count = (count + block_size - 1) / block_size; - kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>( - DESCRIPTOR(), count, std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(block_count, block_size), + DESCRIPTOR(), + count, + std::forward<ARGS>(args)...); } template <typename OPERATOR, typename... ARGS> @@ -122,9 +115,11 @@ void tagged_helper(tag::post_process_by_spatial_cell_mask, DeviceBuffer<bool>& m const dim3 grid((_cuboid.nX + block.x - 1) / block.x, (_cuboid.nY + block.y - 1) / block.y, (_cuboid.nZ + block.z - 1) / block.z); - kernel::call_spatial_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<grid,block>>>( - _cuboid, mask.device(), std::forward<ARGS>(args)... - ); + cuda::launch(kernel::call_spatial_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...>, + cuda::launch_configuration_t(grid, block), + _cuboid, + mask.device(), + std::forward<ARGS>(args)...); } }; diff --git a/tangle/LLBM/materials.h b/tangle/LLBM/materials.h index d782d8d..5734688 100644 --- a/tangle/LLBM/materials.h +++ b/tangle/LLBM/materials.h @@ -24,7 +24,7 @@ public: descriptor::Cuboid<DESCRIPTOR> cuboid() const { return _cuboid; }; - + int get(std::size_t iCell) const { return _materials[iCell]; } diff --git a/tangle/LLBM/memory.h b/tangle/LLBM/memory.h index 97fec5c..149034b 100644 --- a/tangle/LLBM/memory.h +++ b/tangle/LLBM/memory.h @@ -4,31 +4,30 @@ #include <vector> #include <cstring> +#include <cuda/runtime_api.hpp> + template <typename T> class DeviceBuffer { protected: const std::size_t _size; - T* _data; + cuda::device_t _device; + cuda::memory::device::unique_ptr<T[]> _data; public: DeviceBuffer(std::size_t size): - _size(size) { - cudaMalloc(&_data, _size*sizeof(T)); - cudaMemset(_data, 0, _size*sizeof(T)); - } + _size(size), + _device(cuda::device::current::get()), + _data(cuda::memory::device::make_unique<T[]>(_device, size)) + { } DeviceBuffer(const T* data, std::size_t size): DeviceBuffer(size) { - cudaMemcpy(_data, data, size*sizeof(T), cudaMemcpyHostToDevice); + cuda::memory::copy(_data.get(), data, size*sizeof(T)); } DeviceBuffer(const std::vector<T>& data): DeviceBuffer(data.data(), data.size()) { } - - ~DeviceBuffer() { - cudaFree(_data); - } T* device() { - return _data; + return _data.get(); } std::size_t size() const { @@ -40,7 +39,7 @@ template <typename T> class SharedVector : public DeviceBuffer<T> { private: std::unique_ptr<T[]> _host_data; - + public: SharedVector(std::size_t size): DeviceBuffer<T>(size), @@ -57,11 +56,11 @@ public: } void syncHostFromDevice() { - cudaMemcpy(_host_data.get(), this->_data, this->_size*sizeof(T), cudaMemcpyDeviceToHost); + cuda::memory::copy(_host_data.get(), this->_data.get(), this->_size*sizeof(T)); } void syncDeviceFromHost() { - cudaMemcpy(this->_data, _host_data.get(), this->_size*sizeof(T), cudaMemcpyHostToDevice); + cuda::memory::copy(this->_data.get(), _host_data.get(), this->_size*sizeof(T)); } }; @@ -103,7 +102,7 @@ public: DeviceTexture(descriptor::CuboidD<3> c): DeviceTexture(c.nX, c.nY, c.nZ) { } - + ~DeviceTexture() { cudaFreeArray(_array); } diff --git a/tangle/LLBM/propagate.h b/tangle/LLBM/propagate.h index acb1d6c..91b1131 100644 --- a/tangle/LLBM/propagate.h +++ b/tangle/LLBM/propagate.h @@ -5,6 +5,7 @@ #include "kernel/propagate.h" #include <cuda.h> +#include <cuda/runtime_api.hpp> template <typename DESCRIPTOR, typename S> struct LatticeView { @@ -37,6 +38,7 @@ protected: public: CyclicPopulationBuffer(descriptor::Cuboid<DESCRIPTOR> cuboid); + ~CyclicPopulationBuffer(); LatticeView<DESCRIPTOR,S> view() { return LatticeView<DESCRIPTOR,S>{ _cuboid, _population.device() }; @@ -46,15 +48,13 @@ public: }; -std::size_t getDevicePageSize(int device_id=-1) { - if (device_id == -1) { - cudaGetDevice(&device_id); - } +std::size_t getDevicePageSize() { + auto device = cuda::device::current::get(); std::size_t granularity = 0; CUmemAllocationProp prop = {}; prop.type = CU_MEM_ALLOCATION_TYPE_PINNED; prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; - prop.location.id = device_id; + prop.location.id = device.id(); cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM); return granularity; } @@ -69,12 +69,11 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer( _population(DESCRIPTOR::q) { - int device_id = -1; - cudaGetDevice(&device_id); + auto device = cuda::device::current::get(); _prop.type = CU_MEM_ALLOCATION_TYPE_PINNED; _prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; - _prop.location.id = device_id; + _prop.location.id = device.id(); cuMemAddressReserve(&_ptr, 2 * _volume * DESCRIPTOR::q, 0, 0, 0); for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { @@ -85,7 +84,7 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer( } _access.location.type = CU_MEM_LOCATION_TYPE_DEVICE; - _access.location.id = 0; + _access.location.id = device.id(); _access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; cuMemSetAccess(_ptr, 2 * _volume * DESCRIPTOR::q, &_access, 1); @@ -95,7 +94,7 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer( } for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { - _base[iPop] = device() + iPop * 2 * (_volume / sizeof(S)); + _base[iPop] = this->device() + iPop * 2 * (_volume / sizeof(S)); _population[iPop] = _base[iPop] + iPop * ((_volume / sizeof(S)) / DESCRIPTOR::q); } @@ -104,6 +103,15 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer( } template <typename DESCRIPTOR, typename S> +CyclicPopulationBuffer<DESCRIPTOR,S>::~CyclicPopulationBuffer() { + cuMemUnmap(_ptr, 2 * _volume * DESCRIPTOR::q); + for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { + cuMemRelease(_handle[iPop]); + } + cuMemAddressFree(_ptr, 2 * _volume * DESCRIPTOR::q); +} + +template <typename DESCRIPTOR, typename S> __device__ __forceinline__ S* LatticeView<DESCRIPTOR,S>::pop(pop_index_t iPop, std::size_t gid) const { return population[iPop] + gid; @@ -111,5 +119,7 @@ S* LatticeView<DESCRIPTOR,S>::pop(pop_index_t iPop, std::size_t gid) const { template <typename DESCRIPTOR, typename S> void CyclicPopulationBuffer<DESCRIPTOR,S>::stream() { - propagate<DESCRIPTOR,S><<<1,1>>>(view(), _base.device(), _volume / sizeof(S)); + cuda::launch(propagate<DESCRIPTOR,S>, + cuda::launch_configuration_t(1,1), + view(), _base.device(), _volume / sizeof(S)); } diff --git a/tangle/benchmark-ldc.cu b/tangle/benchmark-ldc.cu index 1400547..18b30d1 100644 --- a/tangle/benchmark-ldc.cu +++ b/tangle/benchmark-ldc.cu @@ -12,7 +12,7 @@ using T = float; using DESCRIPTOR = descriptor::D3Q19; void simulate(descriptor::Cuboid<DESCRIPTOR> cuboid, std::size_t nStep) { - cudaSetDevice(0); + auto current = cuda::device::current::get(); Lattice<DESCRIPTOR,T> lattice(cuboid); @@ -30,7 +30,7 @@ void simulate(descriptor::Cuboid<DESCRIPTOR> cuboid, std::size_t nStep) { auto box_mask = materials.mask_of_material(2); auto lid_mask = materials.mask_of_material(3); - cudaDeviceSynchronize(); + cuda::synchronize(current); for (std::size_t iStep=0; iStep < 100; ++iStep) { lattice.apply(Operator(BgkCollideO(), bulk_mask, 0.56), @@ -39,7 +39,7 @@ void simulate(descriptor::Cuboid<DESCRIPTOR> cuboid, std::size_t nStep) { lattice.stream(); } - cudaDeviceSynchronize(); + cuda::synchronize(current); auto start = timer::now(); @@ -50,7 +50,7 @@ void simulate(descriptor::Cuboid<DESCRIPTOR> cuboid, std::size_t nStep) { lattice.stream(); } - cudaDeviceSynchronize(); + cuda::synchronize(current); auto mlups = timer::mlups(cuboid.volume, nStep, start); @@ -58,6 +58,10 @@ void simulate(descriptor::Cuboid<DESCRIPTOR> cuboid, std::size_t nStep) { } int main(int argc, char* argv[]) { + if (cuda::device::count() == 0) { + std::cerr << "No CUDA devices on this system" << std::endl; + return -1; + } if (argc != 3) { std::cerr << "Invalid parameter count" << std::endl; return -1; diff --git a/tangle/channel-with-sphere.cu b/tangle/channel-with-sphere.cu index 29cc7de..280a142 100644 --- a/tangle/channel-with-sphere.cu +++ b/tangle/channel-with-sphere.cu @@ -15,7 +15,11 @@ using T = float; using DESCRIPTOR = descriptor::D3Q19; int main() { -cudaSetDevice(0); +if (cuda::device::count() == 0) { + std::cerr << "No CUDA devices on this system" << std::endl; + return -1; +} +auto current = cuda::device::current::get(); const descriptor::Cuboid<DESCRIPTOR> cuboid(448, 64, 64); Lattice<DESCRIPTOR,T> lattice(cuboid); @@ -55,7 +59,7 @@ auto inflow_mask = materials.mask_of_material(4); auto outflow_mask = materials.mask_of_material(5); auto edge_mask = materials.mask_of_material(6); -cudaDeviceSynchronize(); +cuda::synchronize(current); VolumetricExample renderer(cuboid); renderer.add<QCriterionS>(lattice, bulk_mask, obstacle); diff --git a/tangle/ldc-2d.cu b/tangle/ldc-2d.cu index acba98f..5fa36f9 100644 --- a/tangle/ldc-2d.cu +++ b/tangle/ldc-2d.cu @@ -13,7 +13,11 @@ using T = float; using DESCRIPTOR = descriptor::D2Q9; int main() { -cudaSetDevice(0); +if (cuda::device::count() == 0) { + std::cerr << "No CUDA devices on this system" << std::endl; + return -1; +} +auto current = cuda::device::current::get(); const descriptor::Cuboid<DESCRIPTOR> cuboid(500, 500); Lattice<DESCRIPTOR,T> lattice(cuboid); @@ -52,7 +56,7 @@ while (window.isOpen()) { Operator(BounceBackMovingWallO(), lid_mask, std::min(iStep*1e-3, 1.0)*u_lid, 0.f)); lattice.stream(); if (iStep % 100 == 0) { - cudaDeviceSynchronize(); + cuda::synchronize(current); lattice.inspect<CollectMomentsF>(bulk_mask, moments_rho.device(), moments_u.device()); renderSliceViewToTexture<<< dim3(cuboid.nX / 32 + 1, cuboid.nY / 32 + 1), diff --git a/tangle/ldc-3d.cu b/tangle/ldc-3d.cu index e9b42f2..c70b12e 100644 --- a/tangle/ldc-3d.cu +++ b/tangle/ldc-3d.cu @@ -15,7 +15,11 @@ using T = float; using DESCRIPTOR = descriptor::D3Q19; int main() { -cudaSetDevice(0); +if (cuda::device::count() == 0) { + std::cerr << "No CUDA devices on this system" << std::endl; + return -1; +} +auto current = cuda::device::current::get(); const descriptor::Cuboid<DESCRIPTOR> cuboid(100, 100, 100); Lattice<DESCRIPTOR,T> lattice(cuboid); @@ -34,7 +38,7 @@ auto bulk_mask = materials.mask_of_material(1); auto wall_mask = materials.mask_of_material(2); auto lid_mask = materials.mask_of_material(3); -cudaDeviceSynchronize(); +cuda::synchronize(current); auto none = [] __device__ (float3) -> float { return 1; }; VolumetricExample renderer(cuboid); diff --git a/tangle/magnus.cu b/tangle/magnus.cu index 08a4515..a98bcf3 100644 --- a/tangle/magnus.cu +++ b/tangle/magnus.cu @@ -13,7 +13,11 @@ using T = float; using DESCRIPTOR = descriptor::D2Q9; int main() { -cudaSetDevice(0); +if (cuda::device::count() == 0) { + std::cerr << "No CUDA devices on this system" << std::endl; + return -1; +} +auto current = cuda::device::current::get(); const descriptor::Cuboid<DESCRIPTOR> cuboid(1200, 500); Lattice<DESCRIPTOR,T> lattice(cuboid); @@ -64,7 +68,7 @@ auto inflow_mask = materials.mask_of_material(3); auto outflow_mask = materials.mask_of_material(4); auto edge_mask = materials.mask_of_material(5); -cudaDeviceSynchronize(); +cuda::synchronize(current); RenderWindow window("Magnus"); cudaSurfaceObject_t colormap; @@ -83,7 +87,7 @@ while (window.isOpen()) { lattice.apply<BouzidiO>(bouzidi.getCount(), bouzidi.getConfig()); lattice.stream(); if (iStep % 200 == 0) { - cudaDeviceSynchronize(); + cuda::synchronize(current); lattice.inspect<CollectMomentsF>(bulk_mask, moments_rho.device(), moments_u.device()); renderSliceViewToTexture<<< dim3(cuboid.nX / 32 + 1, cuboid.nY / 32 + 1), diff --git a/tangle/nozzle.cu b/tangle/nozzle.cu index 03c18f9..5278096 100644 --- a/tangle/nozzle.cu +++ b/tangle/nozzle.cu @@ -15,7 +15,11 @@ using T = float; using DESCRIPTOR = descriptor::D3Q19; int main() { -cudaSetDevice(0); +if (cuda::device::count() == 0) { + std::cerr << "No CUDA devices on this system" << std::endl; + return -1; +} +auto current = cuda::device::current::get(); const descriptor::Cuboid<DESCRIPTOR> cuboid(500, 80, 80); Lattice<DESCRIPTOR,T> lattice(cuboid); @@ -46,7 +50,7 @@ auto boundary_mask = materials.mask_of_material(2); auto inflow_mask = materials.mask_of_material(3); auto outflow_mask = materials.mask_of_material(4); -cudaDeviceSynchronize(); +cuda::synchronize(current); VolumetricExample renderer(cuboid); renderer.add<CurlNormS>(lattice, bulk_mask, obstacle); diff --git a/tangle/taylor-couette.cu b/tangle/taylor-couette.cu index 2e69bfb..6b39afa 100644 --- a/tangle/taylor-couette.cu +++ b/tangle/taylor-couette.cu @@ -15,7 +15,11 @@ using T = float; using DESCRIPTOR = descriptor::D3Q19; int main() { -cudaSetDevice(0); +if (cuda::device::count() == 0) { + std::cerr << "No CUDA devices on this system" << std::endl; + return -1; +} +auto current = cuda::device::current::get(); const descriptor::Cuboid<DESCRIPTOR> cuboid(500, 96, 96); Lattice<DESCRIPTOR,T> lattice(cuboid); @@ -55,7 +59,7 @@ auto bulk_list = materials.list_of_material(1); auto wall_mask = materials.mask_of_material(2); auto wall_list = materials.list_of_material(2); -cudaDeviceSynchronize(); +cuda::synchronize(current); VolumetricExample renderer(cuboid); renderer.add<VelocityNormS>(lattice, bulk_mask, inner_cylinder); |