1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
#pragma once
#include "population.h"
#include "propagation.h"
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
const int PROT_RW = PROT_READ | PROT_WRITE;
constexpr std::uint32_t log2(std::uint32_t x) {
return 31 - __builtin_clz(x);
}
namespace pattern {
template <concepts::Arithmetic T>
class PS {
private:
Cuboid _cuboid;
std::ptrdiff_t _volume;
std::string _shm_path;
int _shm_name;
int _shm_file;
std::uint8_t* _base_buffer;
std::uint8_t* _buffer[population::q];
T* _base[population::q];
T* _f[population::q];
public:
using value_t = T;
PS(Cuboid cuboid):
_cuboid(cuboid)
{
const std::size_t page_size = sysconf(_SC_PAGESIZE);
const std::size_t line_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
const std::size_t size = ((_cuboid.volume() * sizeof(T) - 1) / page_size + 1) * page_size;
_volume = size / sizeof(T);
if (size % page_size != 0) {
throw std::invalid_argument("Array size must be multiple of PAGE_SIZE");
}
_shm_path = "/lbm_XXXXXX";
_shm_name = mkstemp(const_cast<char*>(_shm_path.data()));
if (_shm_name != -1) {
throw std::runtime_error("Could not generate unique shared memory object name");
}
// Open shared memory object as physical lattice memory
_shm_file = shm_open(_shm_path.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
if (_shm_file == -1) {
throw std::runtime_error("Failed to create shared memory object");
}
// Resize to fit lattice populations
if (ftruncate(_shm_file, population::q * size) == -1) {
throw std::runtime_error("Failed to resize shared memory object");
}
// Allocate virtual address space for q times two consecutive lattices
_base_buffer = static_cast<std::uint8_t*>(
mmap(NULL, population::q * 2 * size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
for (unsigned iPop=0; iPop < population::q; ++iPop) {
_buffer[iPop] = _base_buffer + iPop * 2 * size;
// Map single physical lattice into virtual address space
mmap(_buffer[iPop] , size, PROT_RW, MAP_SHARED | MAP_FIXED, _shm_file, iPop * size);
mmap(_buffer[iPop] + size, size, PROT_RW, MAP_SHARED | MAP_FIXED, _shm_file, iPop * size);
// Store base pointer for reference
_base[iPop] = reinterpret_cast<T*>(_buffer[iPop]);
// Initialize shiftable f pointer to be used for lattice access
_f[iPop] = _base[iPop];
}
// Pre-shift to reduce cache and TLB conflict misses
// due to alignment of start address and page-boundaries every `page_size/sizeof(T)`-steps
for (unsigned iPop=0; iPop < population::q; ++iPop) {
std::ptrdiff_t shift = iPop * (1 << log2(line_size))
+ iPop * (1 << log2(page_size));
shift /= sizeof(T);
if (shift < _volume-1) {
_f[iPop] += shift;
}
}
}
~PS() {
munmap(_base_buffer, population::q * 2 * _volume * sizeof(T));
shm_unlink(_shm_path.c_str());
close(_shm_name);
unlink(_shm_path.c_str());
}
T* get(unsigned iPop, stage::pre_collision) {
return _f[iPop];
}
T* get(unsigned iPop, stage::post_collision) {
return _f[iPop];
}
void stream() {
for (unsigned iPop=0; iPop < population::q; ++iPop) {
_f[iPop] -= population::offset(_cuboid, iPop);
}
for (unsigned iPop=0; iPop < population::q; ++iPop) {
if (_f[iPop] - _base[iPop] >= _volume) {
_f[iPop] -= _volume;
} else if (_f[iPop] < _base[iPop]) {
_f[iPop] += _volume;
}
}
}
};
}
|