summaryrefslogtreecommitdiff
path: root/tangle
diff options
context:
space:
mode:
authorAdrian Kummerlaender2021-09-12 14:01:55 +0200
committerAdrian Kummerlaender2021-09-12 14:01:55 +0200
commit32dd41a728ce10113032e20955ba08f8de449857 (patch)
tree8a5f851a342590d05137b2c900a7d3c4e8c545b6 /tangle
parent8bca21a550e0ef134d51c4c4c007720885d76791 (diff)
downloadLiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.gz
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.bz2
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.lz
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.xz
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.zst
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.zip
Start using C++ cuda-api-wrapper instead of raw CUDA
Diffstat (limited to 'tangle')
-rw-r--r--tangle/LLBM/kernel/executor.h37
-rw-r--r--tangle/LLBM/lattice.h73
-rw-r--r--tangle/LLBM/materials.h2
-rw-r--r--tangle/LLBM/memory.h29
-rw-r--r--tangle/LLBM/propagate.h32
-rw-r--r--tangle/benchmark-ldc.cu12
-rw-r--r--tangle/channel-with-sphere.cu8
-rw-r--r--tangle/ldc-2d.cu8
-rw-r--r--tangle/ldc-3d.cu8
-rw-r--r--tangle/magnus.cu10
-rw-r--r--tangle/nozzle.cu8
-rw-r--r--tangle/taylor-couette.cu8
12 files changed, 115 insertions, 120 deletions
diff --git a/tangle/LLBM/kernel/executor.h b/tangle/LLBM/kernel/executor.h
index 942918d..cce023b 100644
--- a/tangle/LLBM/kernel/executor.h
+++ b/tangle/LLBM/kernel/executor.h
@@ -30,30 +30,6 @@ __global__ void call_operator(
}
}
-template <typename OPERATOR, typename DESCRIPTOR, typename T, typename S, typename... ARGS>
-__global__ void call_operator(
- LatticeView<DESCRIPTOR,S> lattice
- , bool* mask
- , ARGS... args
-) {
- const std::size_t gid = blockIdx.x * blockDim.x + threadIdx.x;
- if (!(gid < lattice.cuboid.volume) || !mask[gid]) {
- return;
- }
-
- S f_curr[DESCRIPTOR::q];
- S f_next[DESCRIPTOR::q];
- S* preshifted_f[DESCRIPTOR::q];
- for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) {
- preshifted_f[iPop] = lattice.pop(iPop, gid);
- f_curr[iPop] = *preshifted_f[iPop];
- }
- OPERATOR::template apply<T,S>(DESCRIPTOR(), f_curr, f_next, gid, std::forward<ARGS>(args)...);
- for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) {
- *preshifted_f[iPop] = f_next[iPop];
- }
-}
-
template <typename FUNCTOR, typename DESCRIPTOR, typename T, typename S, typename... ARGS>
__global__ void call_functor(
LatticeView<DESCRIPTOR,S> lattice
@@ -110,19 +86,6 @@ __global__ void call_operator_using_list(
OPERATOR::template apply<T,S>(lattice, index, count, std::forward<ARGS>(args)...);
}
-template <typename OPERATOR, typename DESCRIPTOR, typename T, typename S, typename... ARGS>
-__global__ void call_operator_using_list(
- DESCRIPTOR descriptor
- , std::size_t count
- , ARGS... args
-) {
- const std::size_t index = blockIdx.x * blockDim.x + threadIdx.x;
- if (!(index < count)) {
- return;
- }
- OPERATOR::template apply<T,S>(descriptor, index, count, std::forward<ARGS>(args)...);
-}
-
template <typename FUNCTOR, typename DESCRIPTOR, typename T, typename S, typename... ARGS>
__global__ void call_spatial_functor(
LatticeView<DESCRIPTOR,S> lattice
diff --git a/tangle/LLBM/lattice.h b/tangle/LLBM/lattice.h
index 7157c78..d3a1840 100644
--- a/tangle/LLBM/lattice.h
+++ b/tangle/LLBM/lattice.h
@@ -31,9 +31,10 @@ template <typename... OPERATOR>
void apply(OPERATOR... ops) {
const auto block_size = 32;
const auto block_count = (_cuboid.volume + block_size - 1) / block_size;
- kernel::call_operators<DESCRIPTOR,T,S,OPERATOR...><<<block_count,block_size>>>(
- _population.view(), ops...
- );
+ cuda::launch(kernel::call_operators<DESCRIPTOR,T,S,OPERATOR...>,
+ cuda::launch_configuration_t(block_count, block_size),
+ _population.view(),
+ ops...);
}
template <typename OPERATOR, typename... ARGS>
@@ -45,27 +46,22 @@ template <typename OPERATOR, typename... ARGS>
void call_operator(tag::call_by_cell_id, DeviceBuffer<std::size_t>& cells, ARGS... args) {
const auto block_size = 32;
const auto block_count = (cells.size() + block_size - 1) / block_size;
- kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
- _population.view(), cells.device(), cells.size(), std::forward<ARGS>(args)...
- );
-}
-
-template <typename OPERATOR, typename... ARGS>
-void call_operator(tag::call_by_cell_id, DeviceBuffer<bool>& mask, ARGS... args) {
- const auto block_size = 32;
- const auto block_count = (_cuboid.volume + block_size - 1) / block_size;
- kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
- _population.view(), mask.device(), std::forward<ARGS>(args)...
- );
+ cuda::launch(kernel::call_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...>,
+ cuda::launch_configuration_t(block_count, block_size),
+ _population.view(),
+ cells.device(), cells.size(),
+ std::forward<ARGS>(args)...);
}
template <typename OPERATOR, typename... ARGS>
void call_operator(tag::call_by_list_index, std::size_t count, ARGS... args) {
const auto block_size = 32;
const auto block_count = (count + block_size - 1) / block_size;
- kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
- _population.view(), count, std::forward<ARGS>(args)...
- );
+ cuda::launch(kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...>,
+ cuda::launch_configuration_t(block_count, block_size),
+ _population.view(),
+ count,
+ std::forward<ARGS>(args)...);
}
template <typename FUNCTOR, typename... ARGS>
@@ -74,21 +70,14 @@ void inspect(ARGS&&... args) {
}
template <typename FUNCTOR, typename... ARGS>
-void call_functor(tag::call_by_cell_id, DeviceBuffer<std::size_t>& cells, ARGS... args) {
- const auto block_size = 32;
- const auto block_count = (cells.size() + block_size - 1) / block_size;
- kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
- _population.view(), cells.device(), cells.size(), std::forward<ARGS>(args)...
- );
-}
-
-template <typename FUNCTOR, typename... ARGS>
void call_functor(tag::call_by_cell_id, DeviceBuffer<bool>& mask, ARGS... args) {
const auto block_size = 32;
const auto block_count = (_cuboid.volume + block_size - 1) / block_size;
- kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
- _population.view(), mask.device(), std::forward<ARGS>(args)...
- );
+ cuda::launch(kernel::call_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...>,
+ cuda::launch_configuration_t(block_count, block_size),
+ _population.view(),
+ mask.device(),
+ std::forward<ARGS>(args)...);
}
template <typename FUNCTOR, typename... ARGS>
@@ -97,9 +86,11 @@ void call_functor(tag::call_by_spatial_cell_mask, DeviceBuffer<bool>& mask, ARGS
const dim3 grid((_cuboid.nX + block.x - 1) / block.x,
(_cuboid.nY + block.y - 1) / block.y,
(_cuboid.nZ + block.z - 1) / block.z);
- kernel::call_spatial_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...><<<grid,block>>>(
- _population.view(), mask.device(), std::forward<ARGS>(args)...
- );
+ cuda::launch(kernel::call_spatial_functor<FUNCTOR,DESCRIPTOR,T,S,ARGS...>,
+ cuda::launch_configuration_t(grid, block),
+ _population.view(),
+ mask.device(),
+ std::forward<ARGS>(args)...);
}
template <typename OPERATOR, typename... ARGS>
@@ -111,9 +102,11 @@ template <typename OPERATOR, typename... ARGS>
void tagged_helper(tag::post_process_by_list_index, std::size_t count, ARGS... args) {
const auto block_size = 32;
const auto block_count = (count + block_size - 1) / block_size;
- kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<block_count,block_size>>>(
- DESCRIPTOR(), count, std::forward<ARGS>(args)...
- );
+ cuda::launch(kernel::call_operator_using_list<OPERATOR,DESCRIPTOR,T,S,ARGS...>,
+ cuda::launch_configuration_t(block_count, block_size),
+ DESCRIPTOR(),
+ count,
+ std::forward<ARGS>(args)...);
}
template <typename OPERATOR, typename... ARGS>
@@ -122,9 +115,11 @@ void tagged_helper(tag::post_process_by_spatial_cell_mask, DeviceBuffer<bool>& m
const dim3 grid((_cuboid.nX + block.x - 1) / block.x,
(_cuboid.nY + block.y - 1) / block.y,
(_cuboid.nZ + block.z - 1) / block.z);
- kernel::call_spatial_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...><<<grid,block>>>(
- _cuboid, mask.device(), std::forward<ARGS>(args)...
- );
+ cuda::launch(kernel::call_spatial_operator<OPERATOR,DESCRIPTOR,T,S,ARGS...>,
+ cuda::launch_configuration_t(grid, block),
+ _cuboid,
+ mask.device(),
+ std::forward<ARGS>(args)...);
}
};
diff --git a/tangle/LLBM/materials.h b/tangle/LLBM/materials.h
index d782d8d..5734688 100644
--- a/tangle/LLBM/materials.h
+++ b/tangle/LLBM/materials.h
@@ -24,7 +24,7 @@ public:
descriptor::Cuboid<DESCRIPTOR> cuboid() const {
return _cuboid;
};
-
+
int get(std::size_t iCell) const {
return _materials[iCell];
}
diff --git a/tangle/LLBM/memory.h b/tangle/LLBM/memory.h
index 97fec5c..149034b 100644
--- a/tangle/LLBM/memory.h
+++ b/tangle/LLBM/memory.h
@@ -4,31 +4,30 @@
#include <vector>
#include <cstring>
+#include <cuda/runtime_api.hpp>
+
template <typename T>
class DeviceBuffer {
protected:
const std::size_t _size;
- T* _data;
+ cuda::device_t _device;
+ cuda::memory::device::unique_ptr<T[]> _data;
public:
DeviceBuffer(std::size_t size):
- _size(size) {
- cudaMalloc(&_data, _size*sizeof(T));
- cudaMemset(_data, 0, _size*sizeof(T));
- }
+ _size(size),
+ _device(cuda::device::current::get()),
+ _data(cuda::memory::device::make_unique<T[]>(_device, size))
+ { }
DeviceBuffer(const T* data, std::size_t size):
DeviceBuffer(size) {
- cudaMemcpy(_data, data, size*sizeof(T), cudaMemcpyHostToDevice);
+ cuda::memory::copy(_data.get(), data, size*sizeof(T));
}
DeviceBuffer(const std::vector<T>& data):
DeviceBuffer(data.data(), data.size()) { }
-
- ~DeviceBuffer() {
- cudaFree(_data);
- }
T* device() {
- return _data;
+ return _data.get();
}
std::size_t size() const {
@@ -40,7 +39,7 @@ template <typename T>
class SharedVector : public DeviceBuffer<T> {
private:
std::unique_ptr<T[]> _host_data;
-
+
public:
SharedVector(std::size_t size):
DeviceBuffer<T>(size),
@@ -57,11 +56,11 @@ public:
}
void syncHostFromDevice() {
- cudaMemcpy(_host_data.get(), this->_data, this->_size*sizeof(T), cudaMemcpyDeviceToHost);
+ cuda::memory::copy(_host_data.get(), this->_data.get(), this->_size*sizeof(T));
}
void syncDeviceFromHost() {
- cudaMemcpy(this->_data, _host_data.get(), this->_size*sizeof(T), cudaMemcpyHostToDevice);
+ cuda::memory::copy(this->_data.get(), _host_data.get(), this->_size*sizeof(T));
}
};
@@ -103,7 +102,7 @@ public:
DeviceTexture(descriptor::CuboidD<3> c):
DeviceTexture(c.nX, c.nY, c.nZ) { }
-
+
~DeviceTexture() {
cudaFreeArray(_array);
}
diff --git a/tangle/LLBM/propagate.h b/tangle/LLBM/propagate.h
index acb1d6c..91b1131 100644
--- a/tangle/LLBM/propagate.h
+++ b/tangle/LLBM/propagate.h
@@ -5,6 +5,7 @@
#include "kernel/propagate.h"
#include <cuda.h>
+#include <cuda/runtime_api.hpp>
template <typename DESCRIPTOR, typename S>
struct LatticeView {
@@ -37,6 +38,7 @@ protected:
public:
CyclicPopulationBuffer(descriptor::Cuboid<DESCRIPTOR> cuboid);
+ ~CyclicPopulationBuffer();
LatticeView<DESCRIPTOR,S> view() {
return LatticeView<DESCRIPTOR,S>{ _cuboid, _population.device() };
@@ -46,15 +48,13 @@ public:
};
-std::size_t getDevicePageSize(int device_id=-1) {
- if (device_id == -1) {
- cudaGetDevice(&device_id);
- }
+std::size_t getDevicePageSize() {
+ auto device = cuda::device::current::get();
std::size_t granularity = 0;
CUmemAllocationProp prop = {};
prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
- prop.location.id = device_id;
+ prop.location.id = device.id();
cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM);
return granularity;
}
@@ -69,12 +69,11 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(
_population(DESCRIPTOR::q)
{
- int device_id = -1;
- cudaGetDevice(&device_id);
+ auto device = cuda::device::current::get();
_prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
_prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
- _prop.location.id = device_id;
+ _prop.location.id = device.id();
cuMemAddressReserve(&_ptr, 2 * _volume * DESCRIPTOR::q, 0, 0, 0);
for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) {
@@ -85,7 +84,7 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(
}
_access.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
- _access.location.id = 0;
+ _access.location.id = device.id();
_access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
cuMemSetAccess(_ptr, 2 * _volume * DESCRIPTOR::q, &_access, 1);
@@ -95,7 +94,7 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(
}
for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) {
- _base[iPop] = device() + iPop * 2 * (_volume / sizeof(S));
+ _base[iPop] = this->device() + iPop * 2 * (_volume / sizeof(S));
_population[iPop] = _base[iPop] + iPop * ((_volume / sizeof(S)) / DESCRIPTOR::q);
}
@@ -104,6 +103,15 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(
}
template <typename DESCRIPTOR, typename S>
+CyclicPopulationBuffer<DESCRIPTOR,S>::~CyclicPopulationBuffer() {
+ cuMemUnmap(_ptr, 2 * _volume * DESCRIPTOR::q);
+ for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) {
+ cuMemRelease(_handle[iPop]);
+ }
+ cuMemAddressFree(_ptr, 2 * _volume * DESCRIPTOR::q);
+}
+
+template <typename DESCRIPTOR, typename S>
__device__ __forceinline__
S* LatticeView<DESCRIPTOR,S>::pop(pop_index_t iPop, std::size_t gid) const {
return population[iPop] + gid;
@@ -111,5 +119,7 @@ S* LatticeView<DESCRIPTOR,S>::pop(pop_index_t iPop, std::size_t gid) const {
template <typename DESCRIPTOR, typename S>
void CyclicPopulationBuffer<DESCRIPTOR,S>::stream() {
- propagate<DESCRIPTOR,S><<<1,1>>>(view(), _base.device(), _volume / sizeof(S));
+ cuda::launch(propagate<DESCRIPTOR,S>,
+ cuda::launch_configuration_t(1,1),
+ view(), _base.device(), _volume / sizeof(S));
}
diff --git a/tangle/benchmark-ldc.cu b/tangle/benchmark-ldc.cu
index 1400547..18b30d1 100644
--- a/tangle/benchmark-ldc.cu
+++ b/tangle/benchmark-ldc.cu
@@ -12,7 +12,7 @@ using T = float;
using DESCRIPTOR = descriptor::D3Q19;
void simulate(descriptor::Cuboid<DESCRIPTOR> cuboid, std::size_t nStep) {
- cudaSetDevice(0);
+ auto current = cuda::device::current::get();
Lattice<DESCRIPTOR,T> lattice(cuboid);
@@ -30,7 +30,7 @@ void simulate(descriptor::Cuboid<DESCRIPTOR> cuboid, std::size_t nStep) {
auto box_mask = materials.mask_of_material(2);
auto lid_mask = materials.mask_of_material(3);
- cudaDeviceSynchronize();
+ cuda::synchronize(current);
for (std::size_t iStep=0; iStep < 100; ++iStep) {
lattice.apply(Operator(BgkCollideO(), bulk_mask, 0.56),
@@ -39,7 +39,7 @@ void simulate(descriptor::Cuboid<DESCRIPTOR> cuboid, std::size_t nStep) {
lattice.stream();
}
- cudaDeviceSynchronize();
+ cuda::synchronize(current);
auto start = timer::now();
@@ -50,7 +50,7 @@ void simulate(descriptor::Cuboid<DESCRIPTOR> cuboid, std::size_t nStep) {
lattice.stream();
}
- cudaDeviceSynchronize();
+ cuda::synchronize(current);
auto mlups = timer::mlups(cuboid.volume, nStep, start);
@@ -58,6 +58,10 @@ void simulate(descriptor::Cuboid<DESCRIPTOR> cuboid, std::size_t nStep) {
}
int main(int argc, char* argv[]) {
+ if (cuda::device::count() == 0) {
+ std::cerr << "No CUDA devices on this system" << std::endl;
+ return -1;
+ }
if (argc != 3) {
std::cerr << "Invalid parameter count" << std::endl;
return -1;
diff --git a/tangle/channel-with-sphere.cu b/tangle/channel-with-sphere.cu
index 29cc7de..280a142 100644
--- a/tangle/channel-with-sphere.cu
+++ b/tangle/channel-with-sphere.cu
@@ -15,7 +15,11 @@ using T = float;
using DESCRIPTOR = descriptor::D3Q19;
int main() {
-cudaSetDevice(0);
+if (cuda::device::count() == 0) {
+ std::cerr << "No CUDA devices on this system" << std::endl;
+ return -1;
+}
+auto current = cuda::device::current::get();
const descriptor::Cuboid<DESCRIPTOR> cuboid(448, 64, 64);
Lattice<DESCRIPTOR,T> lattice(cuboid);
@@ -55,7 +59,7 @@ auto inflow_mask = materials.mask_of_material(4);
auto outflow_mask = materials.mask_of_material(5);
auto edge_mask = materials.mask_of_material(6);
-cudaDeviceSynchronize();
+cuda::synchronize(current);
VolumetricExample renderer(cuboid);
renderer.add<QCriterionS>(lattice, bulk_mask, obstacle);
diff --git a/tangle/ldc-2d.cu b/tangle/ldc-2d.cu
index acba98f..5fa36f9 100644
--- a/tangle/ldc-2d.cu
+++ b/tangle/ldc-2d.cu
@@ -13,7 +13,11 @@ using T = float;
using DESCRIPTOR = descriptor::D2Q9;
int main() {
-cudaSetDevice(0);
+if (cuda::device::count() == 0) {
+ std::cerr << "No CUDA devices on this system" << std::endl;
+ return -1;
+}
+auto current = cuda::device::current::get();
const descriptor::Cuboid<DESCRIPTOR> cuboid(500, 500);
Lattice<DESCRIPTOR,T> lattice(cuboid);
@@ -52,7 +56,7 @@ while (window.isOpen()) {
Operator(BounceBackMovingWallO(), lid_mask, std::min(iStep*1e-3, 1.0)*u_lid, 0.f));
lattice.stream();
if (iStep % 100 == 0) {
- cudaDeviceSynchronize();
+ cuda::synchronize(current);
lattice.inspect<CollectMomentsF>(bulk_mask, moments_rho.device(), moments_u.device());
renderSliceViewToTexture<<<
dim3(cuboid.nX / 32 + 1, cuboid.nY / 32 + 1),
diff --git a/tangle/ldc-3d.cu b/tangle/ldc-3d.cu
index e9b42f2..c70b12e 100644
--- a/tangle/ldc-3d.cu
+++ b/tangle/ldc-3d.cu
@@ -15,7 +15,11 @@ using T = float;
using DESCRIPTOR = descriptor::D3Q19;
int main() {
-cudaSetDevice(0);
+if (cuda::device::count() == 0) {
+ std::cerr << "No CUDA devices on this system" << std::endl;
+ return -1;
+}
+auto current = cuda::device::current::get();
const descriptor::Cuboid<DESCRIPTOR> cuboid(100, 100, 100);
Lattice<DESCRIPTOR,T> lattice(cuboid);
@@ -34,7 +38,7 @@ auto bulk_mask = materials.mask_of_material(1);
auto wall_mask = materials.mask_of_material(2);
auto lid_mask = materials.mask_of_material(3);
-cudaDeviceSynchronize();
+cuda::synchronize(current);
auto none = [] __device__ (float3) -> float { return 1; };
VolumetricExample renderer(cuboid);
diff --git a/tangle/magnus.cu b/tangle/magnus.cu
index 08a4515..a98bcf3 100644
--- a/tangle/magnus.cu
+++ b/tangle/magnus.cu
@@ -13,7 +13,11 @@ using T = float;
using DESCRIPTOR = descriptor::D2Q9;
int main() {
-cudaSetDevice(0);
+if (cuda::device::count() == 0) {
+ std::cerr << "No CUDA devices on this system" << std::endl;
+ return -1;
+}
+auto current = cuda::device::current::get();
const descriptor::Cuboid<DESCRIPTOR> cuboid(1200, 500);
Lattice<DESCRIPTOR,T> lattice(cuboid);
@@ -64,7 +68,7 @@ auto inflow_mask = materials.mask_of_material(3);
auto outflow_mask = materials.mask_of_material(4);
auto edge_mask = materials.mask_of_material(5);
-cudaDeviceSynchronize();
+cuda::synchronize(current);
RenderWindow window("Magnus");
cudaSurfaceObject_t colormap;
@@ -83,7 +87,7 @@ while (window.isOpen()) {
lattice.apply<BouzidiO>(bouzidi.getCount(), bouzidi.getConfig());
lattice.stream();
if (iStep % 200 == 0) {
- cudaDeviceSynchronize();
+ cuda::synchronize(current);
lattice.inspect<CollectMomentsF>(bulk_mask, moments_rho.device(), moments_u.device());
renderSliceViewToTexture<<<
dim3(cuboid.nX / 32 + 1, cuboid.nY / 32 + 1),
diff --git a/tangle/nozzle.cu b/tangle/nozzle.cu
index 03c18f9..5278096 100644
--- a/tangle/nozzle.cu
+++ b/tangle/nozzle.cu
@@ -15,7 +15,11 @@ using T = float;
using DESCRIPTOR = descriptor::D3Q19;
int main() {
-cudaSetDevice(0);
+if (cuda::device::count() == 0) {
+ std::cerr << "No CUDA devices on this system" << std::endl;
+ return -1;
+}
+auto current = cuda::device::current::get();
const descriptor::Cuboid<DESCRIPTOR> cuboid(500, 80, 80);
Lattice<DESCRIPTOR,T> lattice(cuboid);
@@ -46,7 +50,7 @@ auto boundary_mask = materials.mask_of_material(2);
auto inflow_mask = materials.mask_of_material(3);
auto outflow_mask = materials.mask_of_material(4);
-cudaDeviceSynchronize();
+cuda::synchronize(current);
VolumetricExample renderer(cuboid);
renderer.add<CurlNormS>(lattice, bulk_mask, obstacle);
diff --git a/tangle/taylor-couette.cu b/tangle/taylor-couette.cu
index 2e69bfb..6b39afa 100644
--- a/tangle/taylor-couette.cu
+++ b/tangle/taylor-couette.cu
@@ -15,7 +15,11 @@ using T = float;
using DESCRIPTOR = descriptor::D3Q19;
int main() {
-cudaSetDevice(0);
+if (cuda::device::count() == 0) {
+ std::cerr << "No CUDA devices on this system" << std::endl;
+ return -1;
+}
+auto current = cuda::device::current::get();
const descriptor::Cuboid<DESCRIPTOR> cuboid(500, 96, 96);
Lattice<DESCRIPTOR,T> lattice(cuboid);
@@ -55,7 +59,7 @@ auto bulk_list = materials.list_of_material(1);
auto wall_mask = materials.mask_of_material(2);
auto wall_list = materials.list_of_material(2);
-cudaDeviceSynchronize();
+cuda::synchronize(current);
VolumetricExample renderer(cuboid);
renderer.add<VelocityNormS>(lattice, bulk_mask, inner_cylinder);