From 4ec94c97879aafef15f7663135745e4ba61e62cf Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Mon, 17 May 2021 00:15:33 +0200 Subject: Extract first public LiterateLB version --- tangle/LLBM/propagate.h | 111 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 tangle/LLBM/propagate.h (limited to 'tangle/LLBM/propagate.h') diff --git a/tangle/LLBM/propagate.h b/tangle/LLBM/propagate.h new file mode 100644 index 0000000..d63ccd8 --- /dev/null +++ b/tangle/LLBM/propagate.h @@ -0,0 +1,111 @@ +#pragma once + +#include "memory.h" +#include "descriptor.h" +#include "kernel/propagate.h" + +#include + +template +struct LatticeView { + const descriptor::Cuboid cuboid; + S** population; + + __device__ __forceinline__ + S* pop(pop_index_t iPop, std::size_t gid) const; +}; + +template +class CyclicPopulationBuffer { +protected: + const descriptor::Cuboid _cuboid; + + const std::size_t _page_size; + const std::size_t _volume; + + CUmemGenericAllocationHandle _handle[DESCRIPTOR::q]; + CUmemAllocationProp _prop{}; + CUmemAccessDesc _access{}; + CUdeviceptr _ptr; + + SharedVector _base; + SharedVector _population; + + S* device() { + return reinterpret_cast(_ptr); + } + +public: + CyclicPopulationBuffer(descriptor::Cuboid cuboid); + + LatticeView view() { + return LatticeView{ _cuboid, _population.device() }; + } + + void stream(); + +}; + +std::size_t getDevicePageSize(int device_id=-1) { + if (device_id == -1) { + cudaGetDevice(&device_id); + } + std::size_t granularity = 0; + CUmemAllocationProp prop = {}; + prop.type = CU_MEM_ALLOCATION_TYPE_PINNED; + prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; + prop.location.id = device_id; + cuMemGetAllocationGranularity(&granularity, &prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM); + return granularity; +} + +template +CyclicPopulationBuffer::CyclicPopulationBuffer( + descriptor::Cuboid cuboid): + _cuboid(cuboid), + _page_size{getDevicePageSize()}, + _volume{((cuboid.volume * sizeof(S) - 1) / _page_size + 1) * _page_size}, + _base(DESCRIPTOR::q), + _population(DESCRIPTOR::q) +{ + + int device_id = -1; + cudaGetDevice(&device_id); + + _prop.type = CU_MEM_ALLOCATION_TYPE_PINNED; + _prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; + _prop.location.id = device_id; + cuMemAddressReserve(&_ptr, 2 * _volume * DESCRIPTOR::q, 0, 0, 0); + + for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { + // per-population handle until cuMemMap accepts non-zero offset + cuMemCreate(&_handle[iPop], _volume, &_prop, 0); + cuMemMap(_ptr + iPop * 2 * _volume, _volume, 0, _handle[iPop], 0); + cuMemMap(_ptr + iPop * 2 * _volume + _volume, _volume, 0, _handle[iPop], 0); + } + + _access.location.type = CU_MEM_LOCATION_TYPE_DEVICE; + _access.location.id = 0; + _access.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE; + cuMemSetAccess(_ptr, 2 * _volume * DESCRIPTOR::q, &_access, 1); + cuMemsetD8(_ptr, 0, 2 * _volume * DESCRIPTOR::q); + + for (unsigned iPop=0; iPop < DESCRIPTOR::q; ++iPop) { + _base[iPop] = device() + iPop * 2 * (_volume / sizeof(S)); + _population[iPop] = _base[iPop] + iPop * ((_volume / sizeof(S)) / DESCRIPTOR::q); + } + + _base.syncDeviceFromHost(); + _population.syncDeviceFromHost(); +} + +template +__device__ __forceinline__ +S* LatticeView::pop(pop_index_t iPop, std::size_t gid) const { + return population[iPop] + gid; +} + +template +void CyclicPopulationBuffer::stream() { + propagate<<<1,1>>>(view(), _base.device(), _volume / sizeof(S)); +} -- cgit v1.2.3