From ad2f632fb355c9bc91246552d97c7c1a4304ec99 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Thu, 10 Jun 2021 18:47:13 +0200 Subject: Update GPU results of new system --- lbm.org | 104 +++++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 57 insertions(+), 47 deletions(-) (limited to 'lbm.org') diff --git a/lbm.org b/lbm.org index 6df9a50..7732c3a 100644 --- a/lbm.org +++ b/lbm.org @@ -270,6 +270,7 @@ a custom =ReplaceOptim= structure during the CSE optimization step that conditio #+BEGIN_SRC python :session :results none from sympy.codegen.rewriting import ReplaceOptim +from sympy.simplify import cse_main expand_pos_square = ReplaceOptim( lambda e: e.is_Pow and e.exp.is_integer and e.exp == 2, @@ -3666,44 +3667,42 @@ T x1 = f_curr[3] + f_curr[6]; T x2 = x0 + x1 + f_curr[0] + f_curr[4] + f_curr[5] + f_curr[7] + f_curr[8]; T x3 = f_curr[0] - f_curr[8]; T x4 = T{1} / (x2); -T x10 = T{72.0000000000000}*f_curr[2]; -T x11 = T{72.0000000000000}*f_curr[6]; +T x9 = T{72.0000000000000}*f_curr[2]; +T x10 = T{72.0000000000000}*f_curr[6]; T rho = x2; -T x31 = T{4.00000000000000}*rho; +T x29 = T{4.00000000000000}*rho; T u_0 = -x4*(x0 + x3 - f_curr[6] - f_curr[7]); -T x6 = u_0*u_0; -T x13 = -T{3.00000000000000}*x6; -T x16 = T{6.00000000000000}*u_0; -T x17 = -x16; -T x30 = T{0.0277777777777778}*u_0; +T x5 = u_0*u_0; +T x12 = -T{3.00000000000000}*x5; +T x15 = T{6.00000000000000}*u_0; +T x16 = -x15; T u_1 = -x4*(x1 + x3 - f_curr[2] - f_curr[5]); -T x5 = T{0.0277777777777778}*u_1; -T x7 = u_1*u_1; -T x8 = x6 + x7; -T x9 = pow(x8, T{-0.500000000000000}); -T x12 = -u_0 + u_1; -T x14 = T{6.00000000000000}*u_1; -T x15 = x13 + x14; -T x18 = T{2.00000000000000} - T{3.00000000000000}*x7; -T x19 = x17 + x18; -T x20 = rho*(x15 + x19 + T{9.00000000000000}*(x12*x12)); -T x21 = u_0 - u_1; -T x22 = x13 - x14; -T x23 = x16 + x18; -T x24 = rho*(x22 + x23 + T{9.00000000000000}*(x21*x21)); -T x25 = u_0 + u_1; -T x26 = T{9.00000000000000}*(x25*x25); -T x27 = rho*(x15 + x23 + x26) + rho*(x19 + x22 + x26) - T{72.0000000000000}*f_curr[0] - T{72.0000000000000}*f_curr[8]; -T x28 = x10 + x11 - x20 - x24 + x27; -T x29 = x28*x9; -T x32 = x18 + T{6.00000000000000}*x6; -T x33 = -x10 - x11 + x20 + x24 + x27; -T x34 = x31*(x16 + x32) + x31*(x17 + x32) + x33 - T{72.0000000000000}*f_curr[1] - T{72.0000000000000}*f_curr[7]; -T x35 = T{6.00000000000000}*x7 + T{2.00000000000000}; -T x36 = x31*(x15 + x35) + x31*(x22 + x35) + x33 - T{72.0000000000000}*f_curr[3] - T{72.0000000000000}*f_curr[5]; -T x37 = ((x28*u_0 + x36*u_1)*u_1 + (x28*u_1 + x34*u_0)*u_0)/x8; -T n_0 = -x29*x5 - x30*x34*x9 + x30*x37; -T n_1 = -x29*x30 - x36*x5*x9 + x37*x5; +T x6 = u_1*u_1; +T x7 = x5 + x6; +T x8 = pow(x7, T{-0.500000000000000}); +T x11 = -u_0 + u_1; +T x13 = T{6.00000000000000}*u_1; +T x14 = x12 + x13; +T x17 = T{2.00000000000000} - T{3.00000000000000}*x6; +T x18 = x16 + x17; +T x19 = rho*(x14 + x18 + T{9.00000000000000}*(x11*x11)); +T x20 = u_0 - u_1; +T x21 = x12 - x13; +T x22 = x15 + x17; +T x23 = rho*(x21 + x22 + T{9.00000000000000}*(x20*x20)); +T x24 = u_0 + u_1; +T x25 = T{9.00000000000000}*(x24*x24); +T x26 = rho*(x14 + x22 + x25) + rho*(x18 + x21 + x25) - T{72.0000000000000}*f_curr[0] - T{72.0000000000000}*f_curr[8]; +T x27 = x10 - x19 - x23 + x26 + x9; +T x28 = x27*x8; +T x30 = x17 + T{6.00000000000000}*x5; +T x31 = -x10 + x19 + x23 + x26 - x9; +T x32 = x29*(x15 + x30) + x29*(x16 + x30) + x31 - T{72.0000000000000}*f_curr[1] - T{72.0000000000000}*f_curr[7]; +T x33 = T{6.00000000000000}*x6 + T{2.00000000000000}; +T x34 = x29*(x14 + x33) + x29*(x21 + x33) + x31 - T{72.0000000000000}*f_curr[3] - T{72.0000000000000}*f_curr[5]; +T x35 = ((x27*u_0 + x34*u_1)*u_1 + (x27*u_1 + x32*u_0)*u_0)/x7; +T n_0 = -T{0.0277777777777778}*x28*u_1 - T{0.0277777777777778}*x32*x8*u_0 + T{0.0277777777777778}*x35*u_0; +T n_1 = -T{0.0277777777777778}*x28*u_0 - T{0.0277777777777778}*x34*x8*u_1 + T{0.0277777777777778}*x35*u_1; #+end_example *** Determine shear layer visibility @@ -3877,11 +3876,14 @@ T x17 = u_0 + u_1; T x18 = T{9.00000000000000}*(x17*x17); T x19 = x11 + x9 + T{-2.00000000000000}; T x20 = rho*(x14 + x18 + x2) - rho*(-x18 + x19 + x2 + x7) - T{72.0000000000000}*f_curr[0] - T{72.0000000000000}*f_curr[8]; -T x21 = T{4.00000000000000}*rho; -T x22 = T{6.00000000000000}*x10 + x8; -T x23 = -x0 - x1 + x15 + x16 + x20; -T x24 = T{6.00000000000000}*x6; -T strain = T{0.0277777777777778}*sqrt((x0 + x1 - x15 - x16 + x20)*(x0 + x1 - x15 - x16 + x20) + T{0.500000000000000}*((-x21*(x19 - x24) + x21*(x13 + x24 + 2) + x23 - 72*f_curr[3] - 72*f_curr[5])*(-x21*(x19 - x24) + x21*(x13 + x24 + 2) + x23 - 72*f_curr[3] - 72*f_curr[5])) + T{0.500000000000000}*((x21*(x2 + x22) + x21*(x22 + x3) + x23 - 72*f_curr[1] - 72*f_curr[7])*(x21*(x2 + x22) + x21*(x22 + x3) + x23 - 72*f_curr[1] - 72*f_curr[7]))); +T x21 = x0 + x1 - x15 - x16 + x20; +T x22 = T{4.00000000000000}*rho; +T x23 = T{6.00000000000000}*x10 + x8; +T x24 = -x0 - x1 + x15 + x16 + x20; +T x25 = x22*(x2 + x23) + x22*(x23 + x3) + x24 - T{72.0000000000000}*f_curr[1] - T{72.0000000000000}*f_curr[7]; +T x26 = T{6.00000000000000}*x6; +T x27 = -x22*(x19 - x26) + x22*(x13 + x26 + T{2.00000000000000}) + x24 - T{72.0000000000000}*f_curr[3] - T{72.0000000000000}*f_curr[5]; +T strain = T{0.0277777777777778}*sqrt(x21*x21 + T{0.500000000000000}*(x25*x25) + T{0.500000000000000}*(x27*x27)); #+end_example #+BEGIN_SRC cpp :tangle tangle/LLBM/kernel/collect_q_criterion.h @@ -5788,21 +5790,29 @@ nvidia-smi --query-gpu=name --format=csv,noheader #+END_SRC #+RESULTS: -: GeForce RTX 2070 +: GeForce RTX 3070 #+NAME: benchmark-ldc -#+BEGIN_SRC bash :dir build :eval query :var min=64 :var max=128 :var step=16 :var nSteps=1000 :async t +#+BEGIN_SRC bash :dir build :eval query :var min=64 :var max=256 :var step=16 :var nSteps=1000 :async t for n in $(seq $min $step $max); do ./benchmark-ldc $n $nSteps done #+END_SRC #+RESULTS: benchmark-ldc -| 4 | 64 | 1000 | 2299.85 | -| 4 | 80 | 1000 | 2321.52 | -| 4 | 96 | 1000 | 2456.87 | -| 4 | 112 | 1000 | 2456.11 | -| 4 | 128 | 1000 | 2462.46 | +| 4 | 64 | 1000 | 2416.56 | +| 4 | 80 | 1000 | 2471.92 | +| 4 | 96 | 1000 | 2534.33 | +| 4 | 112 | 1000 | 2512.18 | +| 4 | 128 | 1000 | 2569.58 | +| 4 | 144 | 1000 | 2541.29 | +| 4 | 160 | 1000 | 2599.92 | +| 4 | 176 | 1000 | 2499.82 | +| 4 | 192 | 1000 | 2513.63 | +| 4 | 208 | 1000 | 2492.54 | +| 4 | 224 | 1000 | 2533.04 | +| 4 | 240 | 1000 | 2561.12 | +| 4 | 256 | 1000 | 2511.97 | * Open tasks :properties: -- cgit v1.2.3