summaryrefslogtreecommitdiff
path: root/src/pattern/ps.h
blob: 667b09f8e54d4360b971be9c5470b8046fe3fc68 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#pragma once

#include "population.h"
#include "propagation.h"

#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>

const int PROT_RW = PROT_READ | PROT_WRITE;

constexpr std::uint32_t log2(std::uint32_t x) {
	return 31 - __builtin_clz(x);
}

namespace pattern {

template <concepts::Arithmetic T>
class PS {
private:
	Cuboid _cuboid;
	std::ptrdiff_t _volume;

	std::string _shm_path;
	int _shm_name;
	int _shm_file;

	std::uint8_t* _base_buffer;
	std::uint8_t* _buffer[population::q];

	T* _base[population::q];
	T* _f[population::q];

public:
	using value_t = T;

	PS(Cuboid cuboid):
		_cuboid(cuboid)
	{
		const std::size_t page_size = sysconf(_SC_PAGESIZE);
		const std::size_t line_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
		const std::size_t size = ((_cuboid.volume() * sizeof(T) - 1) / page_size + 1) * page_size;
		_volume = size / sizeof(T);

		if (size % page_size != 0) {
			throw std::invalid_argument("Array size must be multiple of PAGE_SIZE");
		}

		_shm_path = "/lbm_XXXXXX";
		_shm_name = mkstemp(const_cast<char*>(_shm_path.data()));
		if (_shm_name != -1) {
			throw std::runtime_error("Could not generate unique shared memory object name");
		}
		// Open shared memory object as physical lattice memory
		_shm_file = shm_open(_shm_path.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
		if (_shm_file == -1) {
			throw std::runtime_error("Failed to create shared memory object");
		}
		// Resize to fit lattice populations
		if (ftruncate(_shm_file, population::q * size) == -1) {
			throw std::runtime_error("Failed to resize shared memory object");
		}

		// Allocate virtual address space for q times two consecutive lattices
		_base_buffer = static_cast<std::uint8_t*>(
		  mmap(NULL, population::q * 2 * size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));

		for (unsigned iPop=0; iPop < population::q; ++iPop) {
			_buffer[iPop] = _base_buffer + iPop * 2 * size;
			// Map single physical lattice into virtual address space
			mmap(_buffer[iPop]       , size, PROT_RW, MAP_SHARED | MAP_FIXED, _shm_file, iPop * size);
			mmap(_buffer[iPop] + size, size, PROT_RW, MAP_SHARED | MAP_FIXED, _shm_file, iPop * size);

			// Store base pointer for reference
			_base[iPop] = reinterpret_cast<T*>(_buffer[iPop]);
			// Initialize shiftable f pointer to be used for lattice access
			_f[iPop] = _base[iPop];
		}

		// Pre-shift to reduce cache and TLB conflict misses
		// due to alignment of start address and page-boundaries every `page_size/sizeof(T)`-steps
		for (unsigned iPop=0; iPop < population::q; ++iPop) {
			std::ptrdiff_t shift = iPop * (1 << log2(line_size))
			                     + iPop * (1 << log2(page_size));
			shift /= sizeof(T);
			if (shift < _volume-1) {
				_f[iPop] += shift;
			}
		}
	}

	~PS() {
		munmap(_base_buffer, population::q * 2 * _volume * sizeof(T));
		shm_unlink(_shm_path.c_str());
		close(_shm_name);
		unlink(_shm_path.c_str());
	}

	T* get(unsigned iPop, stage::pre_collision) {
		return _f[iPop];
	}

	T* get(unsigned iPop, stage::post_collision) {
		return _f[iPop];
	}

	void stream() {
		for (unsigned iPop=0; iPop < population::q; ++iPop) {
		  _f[iPop] -= population::offset(_cuboid, iPop);
		}

		for (unsigned iPop=0; iPop < population::q; ++iPop) {
			if (_f[iPop] - _base[iPop] >= _volume) {
				_f[iPop] -= _volume;
			} else if (_f[iPop] < _base[iPop]) {
				_f[iPop] += _volume;
			}
		}
	}

};

}