summaryrefslogtreecommitdiff
path: root/tangle/LLBM/memory.h
blob: 149034baa813ebfeb50a7a207792de5ecf01b5dc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#pragma once

#include <memory>
#include <vector>
#include <cstring>

#include <cuda/runtime_api.hpp>

template <typename T>
class DeviceBuffer {
protected:
  const std::size_t _size;
  cuda::device_t _device;
  cuda::memory::device::unique_ptr<T[]> _data;

public:
  DeviceBuffer(std::size_t size):
    _size(size),
    _device(cuda::device::current::get()),
    _data(cuda::memory::device::make_unique<T[]>(_device, size))
  { }
  DeviceBuffer(const T* data, std::size_t size):
    DeviceBuffer(size) {
    cuda::memory::copy(_data.get(), data, size*sizeof(T));
  }
  DeviceBuffer(const std::vector<T>& data):
    DeviceBuffer(data.data(), data.size()) { }

  T* device() {
    return _data.get();
  }

  std::size_t size() const {
    return _size;
  }
};

template <typename T>
class SharedVector : public DeviceBuffer<T> {
private:
  std::unique_ptr<T[]> _host_data;

public:
  SharedVector(std::size_t size):
    DeviceBuffer<T>(size),
    _host_data(new T[size]{}) {
    syncDeviceFromHost();
  }

  T* host() {
    return _host_data.get();
  }

  T& operator[](unsigned i) {
    return host()[i];
  }

  void syncHostFromDevice() {
    cuda::memory::copy(_host_data.get(), this->_data.get(), this->_size*sizeof(T));
  }

  void syncDeviceFromHost() {
    cuda::memory::copy(this->_data.get(), _host_data.get(), this->_size*sizeof(T));
  }

};

template <typename T>
class DeviceTexture {
protected:
  cudaExtent _extent;
  cudaArray_t _array;

  cudaChannelFormatDesc _channel_desc;
  cudaResourceDesc _res_desc;
  cudaTextureDesc  _tex_desc;

  cudaTextureObject_t _texture;
  cudaSurfaceObject_t _surface;

public:
  DeviceTexture(std::size_t nX, std::size_t nY, std::size_t nZ=0):
    _extent(make_cudaExtent(nX,nY,nZ)),
    _channel_desc(cudaCreateChannelDesc<float>()) {
    cudaMalloc3DArray(&_array, &_channel_desc, _extent);

    std::memset(&_res_desc, 0, sizeof(_res_desc));
    _res_desc.resType = cudaResourceTypeArray;
    _res_desc.res.array.array = _array;

    std::memset(&_tex_desc, 0, sizeof(_tex_desc));
    _res_desc.resType = cudaResourceTypeArray;
    _tex_desc.addressMode[0]   = cudaAddressModeClamp;
    _tex_desc.addressMode[1]   = cudaAddressModeClamp;
    _tex_desc.addressMode[2]   = cudaAddressModeClamp;
    _tex_desc.filterMode       = cudaFilterModeLinear;
    _tex_desc.normalizedCoords = 0;

    cudaCreateTextureObject(&_texture, &_res_desc, &_tex_desc, NULL);
    cudaCreateSurfaceObject(&_surface, &_res_desc);
  }

  DeviceTexture(descriptor::CuboidD<3> c):
    DeviceTexture(c.nX, c.nY, c.nZ) { }

  ~DeviceTexture() {
    cudaFreeArray(_array);
  }

  cudaTextureObject_t getTexture() const {
    return _texture;
  }

  cudaSurfaceObject_t getSurface() const {
    return _surface;
  }

};

__device__ float3 colorFromTexture(cudaSurfaceObject_t colormap, float value) {
  uchar4 color{};
  value = clamp(value, 0.f, 1.f);
  surf2Dread(&color, colormap, unsigned(value * 999)*sizeof(uchar4), 0);
  return make_float3(color.x / 255.f,
                     color.y / 255.f,
                     color.z / 255.f);
}

__device__ float noiseFromTexture(cudaSurfaceObject_t noisemap, int x, int y) {
  uchar4 color{};
  surf2Dread(&color, noisemap, x*sizeof(uchar4), y);
  return color.x / 255.f;
}