Start using C++ cuda-api-wrapper instead of raw CUDA

author: Adrian Kummerlaender 2021-09-12 14:01:55 +0200
committer: Adrian Kummerlaender 2021-09-12 14:01:55 +0200
commit: 32dd41a728ce10113032e20955ba08f8de449857 (patch)
tree: 8a5f851a342590d05137b2c900a7d3c4e8c545b6 /tangle/LLBM/propagate.h
parent: 8bca21a550e0ef134d51c4c4c007720885d76791 (diff)
download: LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.gz
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.bz2
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.lz
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.xz
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.zst
LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.zip
1 files changed, 21 insertions, 11 deletions
diff --git a/tangle/LLBM/propagate.h b/tangle/LLBM/propagate.h
index acb1d6c..91b1131 100644
--- a/tangle/LLBM/propagate.h
+++ b/tangle/LLBM/propagate.h
@@ -5,6 +5,7 @@
 #include "kernel/propagate.h"
 
 #include <cuda.h>
+#include <cuda/runtime_api.hpp>
 
 template <typename DESCRIPTOR, typename S>
 struct LatticeView {
@@ -37,6 +38,7 @@ protected:
 
 public:
   CyclicPopulationBuffer(descriptor::Cuboid<DESCRIPTOR> cuboid);
+  ~CyclicPopulationBuffer();
 
   LatticeView<DESCRIPTOR,S> view() {
     return LatticeView<DESCRIPTOR,S>{ _cuboid, _population.device() };
@@ -46,15 +48,13 @@ public:
 
 };
 
-std::size_t getDevicePageSize(int device_id=-1) {
-  if (device_id == -1) {
-    cudaGetDevice(&device_id);
-  }
+std::size_t getDevicePageSize() {
+  auto device = cuda::device::current::get();
   std::size_t granularity = 0;
   CUmemAllocationProp prop = {};
   prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
   prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
-  prop.location.id = device_id;
+  prop.location.id = device.id();
   cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM);
   return granularity;
 }
@@ -69,12 +69,11 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(
   _population(DESCRIPTOR::q)
 {
 
-  int device_id = -1;
-  cudaGetDevice(&device_id);
+  auto device = cuda::device::current::get();
 
   _prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
   _prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
-  _prop.location.id = device_id;
+  _prop.location.id = device.id();
   cuMemAddressReserve(&_ptr, 2 * _volume * DESCRIPTOR::q, 0, 0, 0);
 
   for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) {
@@ -85,7 +84,7 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(
   }
 
   _access.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
-  _access.location.id = 0;
+  _access.location.id = device.id();
   _access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
   cuMemSetAccess(_ptr, 2 * _volume * DESCRIPTOR::q, &_access, 1);
 
@@ -95,7 +94,7 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(
   }
 
   for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) {
-    _base[iPop] = device() + iPop * 2 * (_volume / sizeof(S));
+    _base[iPop] = this->device() + iPop * 2 * (_volume / sizeof(S));
     _population[iPop] = _base[iPop] + iPop * ((_volume / sizeof(S)) / DESCRIPTOR::q);
   }
 
@@ -104,6 +103,15 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(
 }
 
 template <typename DESCRIPTOR, typename S>
+CyclicPopulationBuffer<DESCRIPTOR,S>::~CyclicPopulationBuffer() {
+  cuMemUnmap(_ptr, 2 * _volume * DESCRIPTOR::q);
+  for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) {
+    cuMemRelease(_handle[iPop]);
+  }
+  cuMemAddressFree(_ptr, 2 * _volume * DESCRIPTOR::q);
+}
+
+template <typename DESCRIPTOR, typename S>
 __device__ __forceinline__
 S* LatticeView<DESCRIPTOR,S>::pop(pop_index_t iPop, std::size_t gid) const {
   return population[iPop] + gid;
@@ -111,5 +119,7 @@ S* LatticeView<DESCRIPTOR,S>::pop(pop_index_t iPop, std::size_t gid) const {
 
 template <typename DESCRIPTOR, typename S>
 void CyclicPopulationBuffer<DESCRIPTOR,S>::stream() {
-  propagate<DESCRIPTOR,S><<<1,1>>>(view(), _base.device(), _volume / sizeof(S));
+  cuda::launch(propagate<DESCRIPTOR,S>,
+               cuda::launch_configuration_t(1,1),
+               view(), _base.device(), _volume / sizeof(S));
 }
author	Adrian Kummerlaender	2021-09-12 14:01:55 +0200
committer	Adrian Kummerlaender	2021-09-12 14:01:55 +0200
commit	32dd41a728ce10113032e20955ba08f8de449857 (patch)
tree	8a5f851a342590d05137b2c900a7d3c4e8c545b6 /tangle/LLBM/propagate.h
parent	8bca21a550e0ef134d51c4c4c007720885d76791 (diff)
download	LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.gz LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.bz2 LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.lz LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.xz LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.zst LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.zip