aboutsummaryrefslogtreecommitdiff
path: root/template/standalone.mako
blob: 39f0da0d3ea0f6ddd2367a9188be7183c98c1e96 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#include <array>
#include <cstdint>
#include <memory>
#include <chrono>
#include <iostream>

<%
def pop_offset(i):
    return i * geometry.volume
%>

void equilibrilize(${float_type}*    f_next,
                   ${float_type}*    f_prev,
                   const std::size_t gid)
{
    ${float_type}* preshifted_f_next = f_next + gid;
    ${float_type}* preshifted_f_prev = f_prev + gid;

% for i, w_i in enumerate(descriptor.w):
    preshifted_f_next[${pop_offset(i)}] = ${w_i.evalf()};
    preshifted_f_prev[${pop_offset(i)}] = ${w_i.evalf()};
% endfor
}

<%
def neighbor_offset(c_i):
    return {
        2: lambda:                                          c_i[1]*geometry.size_x + c_i[0],
        3: lambda: c_i[2]*geometry.size_x*geometry.size_y + c_i[1]*geometry.size_x + c_i[0]
    }.get(descriptor.d)()

def padding():
    return {
        2: lambda:                                     1*geometry.size_x + 1,
        3: lambda: 1*geometry.size_x*geometry.size_y + 1*geometry.size_x + 1
    }.get(descriptor.d)()
%>

void collide_and_stream(      ${float_type}* f_next,
                        const ${float_type}* f_prev,
                        const int*           material,
                        const std::size_t    gid)
{
    const int m = material[gid];

          ${float_type}* preshifted_f_next = f_next + gid;
    const ${float_type}* preshifted_f_prev = f_prev + gid;

% for i, c_i in enumerate(descriptor.c):
    const ${float_type} f_curr_${i} = preshifted_f_prev[${pop_offset(i) + neighbor_offset(-c_i)}];
% endfor

% for i, expr in enumerate(moments_subexpr):
    const ${float_type} ${expr[0]} = ${ccode(expr[1])};
% endfor

% for i, expr in enumerate(moments_assignment):
    ${float_type} ${ccode(expr)}
% endfor

% for i, expr in enumerate(collide_subexpr):
    const ${float_type} ${expr[0]} = ${ccode(expr[1])};
% endfor

% for i, expr in enumerate(collide_assignment):
    const ${float_type} ${ccode(expr)}
% endfor

% for i, expr in enumerate(collide_assignment):
    preshifted_f_next[${pop_offset(i)}] = m*f_next_${i} + (1.0-m)*${descriptor.w[i].evalf()};
% endfor
}

int main()
{
    auto f_a = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()});
    auto f_b = std::make_unique<${float_type}[]>(${geometry.volume*descriptor.q + 2*padding()});
    auto material = std::make_unique<int[]>(${geometry.volume});

    // buffers are padded by maximum neighbor overreach to prevent invalid memory access
    ${float_type}* f_prev = f_a.get() + ${padding()};
    ${float_type}* f_next = f_b.get() + ${padding()};

    for (int iX = 0; iX < ${geometry.size_x}; ++iX) {
        for (int iY = 0; iY < ${geometry.size_y}; ++iY) {
            for (int iZ = 0; iZ < ${geometry.size_z}; ++iZ) {
                if (iX == 0 || iY == 0 || iZ == 0 || iX == ${geometry.size_x-1} || iY == ${geometry.size_y-1} || iZ == ${geometry.size_z-1}) {
                    material[iZ*${geometry.size_x*geometry.size_y} + iY*${geometry.size_x} + iX] = 0;
                } else {
                    material[iZ*${geometry.size_x*geometry.size_y} + iY*${geometry.size_x} + iX] = 1;
                }
            }
        }
    }

    for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) {
        equilibrilize(f_prev, f_next, iCell);
    }

    const auto start = std::chrono::high_resolution_clock::now();

    for (std::size_t iStep = 0; iStep < ${steps}; ++iStep) {
        if (iStep % 2 == 0) {
            f_next = f_a.get();
            f_prev = f_b.get();
        } else {
            f_next = f_b.get();
            f_prev = f_a.get();
        }

% if enable_omp_simd:
#pragma omp simd
% endif
        for (std::size_t iCell = 0; iCell < ${geometry.volume}; ++iCell) {
            collide_and_stream(f_next, f_prev, material.get(), iCell);
        }
    }

    auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(
        std::chrono::high_resolution_clock::now() - start);

    std::cout << "MLUPS: " << ${steps*geometry.volume}/(1e6*duration.count()) << std::endl;

    // calculate average rho as a basic quality check
    double rho_sum = 0.0;

    for (std::size_t i = 0; i < ${geometry.volume*descriptor.q}; ++i) {
        rho_sum += f_next[i];
    }

    std::cout << "avg rho: " << rho_sum/${geometry.volume} << std::endl;

    return 0;
}