diff options
| author | Adrian Kummerlaender | 2021-09-12 14:01:55 +0200 | 
|---|---|---|
| committer | Adrian Kummerlaender | 2021-09-12 14:01:55 +0200 | 
| commit | 32dd41a728ce10113032e20955ba08f8de449857 (patch) | |
| tree | 8a5f851a342590d05137b2c900a7d3c4e8c545b6 /tangle/LLBM/propagate.h | |
| parent | 8bca21a550e0ef134d51c4c4c007720885d76791 (diff) | |
| download | LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.gz LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.bz2 LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.lz LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.xz LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.tar.zst LiterateLB-32dd41a728ce10113032e20955ba08f8de449857.zip  | |
Start using C++ cuda-api-wrapper instead of raw CUDA
Diffstat (limited to 'tangle/LLBM/propagate.h')
| -rw-r--r-- | tangle/LLBM/propagate.h | 32 | 
1 files changed, 21 insertions, 11 deletions
diff --git a/tangle/LLBM/propagate.h b/tangle/LLBM/propagate.h index acb1d6c..91b1131 100644 --- a/tangle/LLBM/propagate.h +++ b/tangle/LLBM/propagate.h @@ -5,6 +5,7 @@  #include "kernel/propagate.h"  #include <cuda.h> +#include <cuda/runtime_api.hpp>  template <typename DESCRIPTOR, typename S>  struct LatticeView { @@ -37,6 +38,7 @@ protected:  public:    CyclicPopulationBuffer(descriptor::Cuboid<DESCRIPTOR> cuboid); +  ~CyclicPopulationBuffer();    LatticeView<DESCRIPTOR,S> view() {      return LatticeView<DESCRIPTOR,S>{ _cuboid, _population.device() }; @@ -46,15 +48,13 @@ public:  }; -std::size_t getDevicePageSize(int device_id=-1) { -  if (device_id == -1) { -    cudaGetDevice(&device_id); -  } +std::size_t getDevicePageSize() { +  auto device = cuda::device::current::get();    std::size_t granularity = 0;    CUmemAllocationProp prop = {};    prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;    prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; -  prop.location.id = device_id; +  prop.location.id = device.id();    cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM);    return granularity;  } @@ -69,12 +69,11 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(    _population(DESCRIPTOR::q)  { -  int device_id = -1; -  cudaGetDevice(&device_id); +  auto device = cuda::device::current::get();    _prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;    _prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; -  _prop.location.id = device_id; +  _prop.location.id = device.id();    cuMemAddressReserve(&_ptr, 2 * _volume * DESCRIPTOR::q, 0, 0, 0);    for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { @@ -85,7 +84,7 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(    }    _access.location.type = CU_MEM_LOCATION_TYPE_DEVICE; -  _access.location.id = 0; +  _access.location.id = device.id();    _access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;    cuMemSetAccess(_ptr, 2 * _volume * DESCRIPTOR::q, &_access, 1); @@ -95,7 +94,7 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(    }    for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { -    _base[iPop] = device() + iPop * 2 * (_volume / sizeof(S)); +    _base[iPop] = this->device() + iPop * 2 * (_volume / sizeof(S));      _population[iPop] = _base[iPop] + iPop * ((_volume / sizeof(S)) / DESCRIPTOR::q);    } @@ -104,6 +103,15 @@ CyclicPopulationBuffer<DESCRIPTOR,S>::CyclicPopulationBuffer(  }  template <typename DESCRIPTOR, typename S> +CyclicPopulationBuffer<DESCRIPTOR,S>::~CyclicPopulationBuffer() { +  cuMemUnmap(_ptr, 2 * _volume * DESCRIPTOR::q); +  for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { +    cuMemRelease(_handle[iPop]); +  } +  cuMemAddressFree(_ptr, 2 * _volume * DESCRIPTOR::q); +} + +template <typename DESCRIPTOR, typename S>  __device__ __forceinline__  S* LatticeView<DESCRIPTOR,S>::pop(pop_index_t iPop, std::size_t gid) const {    return population[iPop] + gid; @@ -111,5 +119,7 @@ S* LatticeView<DESCRIPTOR,S>::pop(pop_index_t iPop, std::size_t gid) const {  template <typename DESCRIPTOR, typename S>  void CyclicPopulationBuffer<DESCRIPTOR,S>::stream() { -  propagate<DESCRIPTOR,S><<<1,1>>>(view(), _base.device(), _volume / sizeof(S)); +  cuda::launch(propagate<DESCRIPTOR,S>, +               cuda::launch_configuration_t(1,1), +               view(), _base.device(), _volume / sizeof(S));  }  | 
