#pragma once #include "population.h" #include "propagation.h" #include #include #include const int PROT_RW = PROT_READ | PROT_WRITE; constexpr std::uint32_t log2(std::uint32_t x) { return 31 - __builtin_clz(x); } namespace pattern { template class PS { private: Cuboid _cuboid; std::ptrdiff_t _volume; std::string _shm_path; int _shm_name; int _shm_file; std::uint8_t* _base_buffer; std::uint8_t* _buffer[population::q]; T* _base[population::q]; T* _f[population::q]; public: using value_t = T; PS(Cuboid cuboid): _cuboid(cuboid) { const std::size_t page_size = sysconf(_SC_PAGESIZE); const std::size_t line_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); const std::size_t size = ((_cuboid.volume() * sizeof(T) - 1) / page_size + 1) * page_size; _volume = size / sizeof(T); if (size % page_size != 0) { throw std::invalid_argument("Array size must be multiple of PAGE_SIZE"); } _shm_path = "/lbm_XXXXXX"; _shm_name = mkstemp(const_cast(_shm_path.data())); if (_shm_name != -1) { throw std::runtime_error("Could not generate unique shared memory object name"); } // Open shared memory object as physical lattice memory _shm_file = shm_open(_shm_path.c_str(), O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); if (_shm_file == -1) { throw std::runtime_error("Failed to create shared memory object"); } // Resize to fit lattice populations if (ftruncate(_shm_file, population::q * size) == -1) { throw std::runtime_error("Failed to resize shared memory object"); } // Allocate virtual address space for q times two consecutive lattices _base_buffer = static_cast( mmap(NULL, population::q * 2 * size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); for (unsigned iPop=0; iPop < population::q; ++iPop) { _buffer[iPop] = _base_buffer + iPop * 2 * size; // Map single physical lattice into virtual address space mmap(_buffer[iPop] , size, PROT_RW, MAP_SHARED | MAP_FIXED, _shm_file, iPop * size); mmap(_buffer[iPop] + size, size, PROT_RW, MAP_SHARED | MAP_FIXED, _shm_file, iPop * size); // Store base pointer for reference _base[iPop] = reinterpret_cast(_buffer[iPop]); // Initialize shiftable f pointer to be used for lattice access _f[iPop] = _base[iPop]; } // Pre-shift to reduce cache and TLB conflict misses // due to alignment of start address and page-boundaries every `page_size/sizeof(T)`-steps for (unsigned iPop=0; iPop < population::q; ++iPop) { std::ptrdiff_t shift = iPop * (1 << log2(line_size)) + iPop * (1 << log2(page_size)); shift /= sizeof(T); if (shift < _volume-1) { _f[iPop] += shift; } } } ~PS() { munmap(_base_buffer, population::q * 2 * _volume * sizeof(T)); shm_unlink(_shm_path.c_str()); close(_shm_name); unlink(_shm_path.c_str()); } T* get(unsigned iPop, stage::pre_collision) { return _f[iPop]; } T* get(unsigned iPop, stage::post_collision) { return _f[iPop]; } void stream() { for (unsigned iPop=0; iPop < population::q; ++iPop) { _f[iPop] -= population::offset(_cuboid, iPop); } for (unsigned iPop=0; iPop < population::q; ++iPop) { if (_f[iPop] - _base[iPop] >= _volume) { _f[iPop] -= _volume; } else if (_f[iPop] < _base[iPop]) { _f[iPop] += _volume; } } } }; }