summaryrefslogtreecommitdiff
path: root/lbm.org
diff options
context:
space:
mode:
authorAdrian Kummerlaender2021-06-10 18:47:13 +0200
committerAdrian Kummerlaender2021-06-10 18:47:13 +0200
commitad2f632fb355c9bc91246552d97c7c1a4304ec99 (patch)
treeb499810b669b18dd07c4ed4451df81adb0bce120 /lbm.org
parent4ec94c97879aafef15f7663135745e4ba61e62cf (diff)
downloadLiterateLB-ad2f632fb355c9bc91246552d97c7c1a4304ec99.tar
LiterateLB-ad2f632fb355c9bc91246552d97c7c1a4304ec99.tar.gz
LiterateLB-ad2f632fb355c9bc91246552d97c7c1a4304ec99.tar.bz2
LiterateLB-ad2f632fb355c9bc91246552d97c7c1a4304ec99.tar.lz
LiterateLB-ad2f632fb355c9bc91246552d97c7c1a4304ec99.tar.xz
LiterateLB-ad2f632fb355c9bc91246552d97c7c1a4304ec99.tar.zst
LiterateLB-ad2f632fb355c9bc91246552d97c7c1a4304ec99.zip
Update GPU results of new system
Diffstat (limited to 'lbm.org')
-rw-r--r--lbm.org104
1 files changed, 57 insertions, 47 deletions
diff --git a/lbm.org b/lbm.org
index 6df9a50..7732c3a 100644
--- a/lbm.org
+++ b/lbm.org
@@ -270,6 +270,7 @@ a custom =ReplaceOptim= structure during the CSE optimization step that conditio
#+BEGIN_SRC python :session :results none
from sympy.codegen.rewriting import ReplaceOptim
+from sympy.simplify import cse_main
expand_pos_square = ReplaceOptim(
lambda e: e.is_Pow and e.exp.is_integer and e.exp == 2,
@@ -3666,44 +3667,42 @@ T x1 = f_curr[3] + f_curr[6];
T x2 = x0 + x1 + f_curr[0] + f_curr[4] + f_curr[5] + f_curr[7] + f_curr[8];
T x3 = f_curr[0] - f_curr[8];
T x4 = T{1} / (x2);
-T x10 = T{72.0000000000000}*f_curr[2];
-T x11 = T{72.0000000000000}*f_curr[6];
+T x9 = T{72.0000000000000}*f_curr[2];
+T x10 = T{72.0000000000000}*f_curr[6];
T rho = x2;
-T x31 = T{4.00000000000000}*rho;
+T x29 = T{4.00000000000000}*rho;
T u_0 = -x4*(x0 + x3 - f_curr[6] - f_curr[7]);
-T x6 = u_0*u_0;
-T x13 = -T{3.00000000000000}*x6;
-T x16 = T{6.00000000000000}*u_0;
-T x17 = -x16;
-T x30 = T{0.0277777777777778}*u_0;
+T x5 = u_0*u_0;
+T x12 = -T{3.00000000000000}*x5;
+T x15 = T{6.00000000000000}*u_0;
+T x16 = -x15;
T u_1 = -x4*(x1 + x3 - f_curr[2] - f_curr[5]);
-T x5 = T{0.0277777777777778}*u_1;
-T x7 = u_1*u_1;
-T x8 = x6 + x7;
-T x9 = pow(x8, T{-0.500000000000000});
-T x12 = -u_0 + u_1;
-T x14 = T{6.00000000000000}*u_1;
-T x15 = x13 + x14;
-T x18 = T{2.00000000000000} - T{3.00000000000000}*x7;
-T x19 = x17 + x18;
-T x20 = rho*(x15 + x19 + T{9.00000000000000}*(x12*x12));
-T x21 = u_0 - u_1;
-T x22 = x13 - x14;
-T x23 = x16 + x18;
-T x24 = rho*(x22 + x23 + T{9.00000000000000}*(x21*x21));
-T x25 = u_0 + u_1;
-T x26 = T{9.00000000000000}*(x25*x25);
-T x27 = rho*(x15 + x23 + x26) + rho*(x19 + x22 + x26) - T{72.0000000000000}*f_curr[0] - T{72.0000000000000}*f_curr[8];
-T x28 = x10 + x11 - x20 - x24 + x27;
-T x29 = x28*x9;
-T x32 = x18 + T{6.00000000000000}*x6;
-T x33 = -x10 - x11 + x20 + x24 + x27;
-T x34 = x31*(x16 + x32) + x31*(x17 + x32) + x33 - T{72.0000000000000}*f_curr[1] - T{72.0000000000000}*f_curr[7];
-T x35 = T{6.00000000000000}*x7 + T{2.00000000000000};
-T x36 = x31*(x15 + x35) + x31*(x22 + x35) + x33 - T{72.0000000000000}*f_curr[3] - T{72.0000000000000}*f_curr[5];
-T x37 = ((x28*u_0 + x36*u_1)*u_1 + (x28*u_1 + x34*u_0)*u_0)/x8;
-T n_0 = -x29*x5 - x30*x34*x9 + x30*x37;
-T n_1 = -x29*x30 - x36*x5*x9 + x37*x5;
+T x6 = u_1*u_1;
+T x7 = x5 + x6;
+T x8 = pow(x7, T{-0.500000000000000});
+T x11 = -u_0 + u_1;
+T x13 = T{6.00000000000000}*u_1;
+T x14 = x12 + x13;
+T x17 = T{2.00000000000000} - T{3.00000000000000}*x6;
+T x18 = x16 + x17;
+T x19 = rho*(x14 + x18 + T{9.00000000000000}*(x11*x11));
+T x20 = u_0 - u_1;
+T x21 = x12 - x13;
+T x22 = x15 + x17;
+T x23 = rho*(x21 + x22 + T{9.00000000000000}*(x20*x20));
+T x24 = u_0 + u_1;
+T x25 = T{9.00000000000000}*(x24*x24);
+T x26 = rho*(x14 + x22 + x25) + rho*(x18 + x21 + x25) - T{72.0000000000000}*f_curr[0] - T{72.0000000000000}*f_curr[8];
+T x27 = x10 - x19 - x23 + x26 + x9;
+T x28 = x27*x8;
+T x30 = x17 + T{6.00000000000000}*x5;
+T x31 = -x10 + x19 + x23 + x26 - x9;
+T x32 = x29*(x15 + x30) + x29*(x16 + x30) + x31 - T{72.0000000000000}*f_curr[1] - T{72.0000000000000}*f_curr[7];
+T x33 = T{6.00000000000000}*x6 + T{2.00000000000000};
+T x34 = x29*(x14 + x33) + x29*(x21 + x33) + x31 - T{72.0000000000000}*f_curr[3] - T{72.0000000000000}*f_curr[5];
+T x35 = ((x27*u_0 + x34*u_1)*u_1 + (x27*u_1 + x32*u_0)*u_0)/x7;
+T n_0 = -T{0.0277777777777778}*x28*u_1 - T{0.0277777777777778}*x32*x8*u_0 + T{0.0277777777777778}*x35*u_0;
+T n_1 = -T{0.0277777777777778}*x28*u_0 - T{0.0277777777777778}*x34*x8*u_1 + T{0.0277777777777778}*x35*u_1;
#+end_example
*** Determine shear layer visibility
@@ -3877,11 +3876,14 @@ T x17 = u_0 + u_1;
T x18 = T{9.00000000000000}*(x17*x17);
T x19 = x11 + x9 + T{-2.00000000000000};
T x20 = rho*(x14 + x18 + x2) - rho*(-x18 + x19 + x2 + x7) - T{72.0000000000000}*f_curr[0] - T{72.0000000000000}*f_curr[8];
-T x21 = T{4.00000000000000}*rho;
-T x22 = T{6.00000000000000}*x10 + x8;
-T x23 = -x0 - x1 + x15 + x16 + x20;
-T x24 = T{6.00000000000000}*x6;
-T strain = T{0.0277777777777778}*sqrt((x0 + x1 - x15 - x16 + x20)*(x0 + x1 - x15 - x16 + x20) + T{0.500000000000000}*((-x21*(x19 - x24) + x21*(x13 + x24 + 2) + x23 - 72*f_curr[3] - 72*f_curr[5])*(-x21*(x19 - x24) + x21*(x13 + x24 + 2) + x23 - 72*f_curr[3] - 72*f_curr[5])) + T{0.500000000000000}*((x21*(x2 + x22) + x21*(x22 + x3) + x23 - 72*f_curr[1] - 72*f_curr[7])*(x21*(x2 + x22) + x21*(x22 + x3) + x23 - 72*f_curr[1] - 72*f_curr[7])));
+T x21 = x0 + x1 - x15 - x16 + x20;
+T x22 = T{4.00000000000000}*rho;
+T x23 = T{6.00000000000000}*x10 + x8;
+T x24 = -x0 - x1 + x15 + x16 + x20;
+T x25 = x22*(x2 + x23) + x22*(x23 + x3) + x24 - T{72.0000000000000}*f_curr[1] - T{72.0000000000000}*f_curr[7];
+T x26 = T{6.00000000000000}*x6;
+T x27 = -x22*(x19 - x26) + x22*(x13 + x26 + T{2.00000000000000}) + x24 - T{72.0000000000000}*f_curr[3] - T{72.0000000000000}*f_curr[5];
+T strain = T{0.0277777777777778}*sqrt(x21*x21 + T{0.500000000000000}*(x25*x25) + T{0.500000000000000}*(x27*x27));
#+end_example
#+BEGIN_SRC cpp :tangle tangle/LLBM/kernel/collect_q_criterion.h
@@ -5788,21 +5790,29 @@ nvidia-smi --query-gpu=name --format=csv,noheader
#+END_SRC
#+RESULTS:
-: GeForce RTX 2070
+: GeForce RTX 3070
#+NAME: benchmark-ldc
-#+BEGIN_SRC bash :dir build :eval query :var min=64 :var max=128 :var step=16 :var nSteps=1000 :async t
+#+BEGIN_SRC bash :dir build :eval query :var min=64 :var max=256 :var step=16 :var nSteps=1000 :async t
for n in $(seq $min $step $max); do
./benchmark-ldc $n $nSteps
done
#+END_SRC
#+RESULTS: benchmark-ldc
-| 4 | 64 | 1000 | 2299.85 |
-| 4 | 80 | 1000 | 2321.52 |
-| 4 | 96 | 1000 | 2456.87 |
-| 4 | 112 | 1000 | 2456.11 |
-| 4 | 128 | 1000 | 2462.46 |
+| 4 | 64 | 1000 | 2416.56 |
+| 4 | 80 | 1000 | 2471.92 |
+| 4 | 96 | 1000 | 2534.33 |
+| 4 | 112 | 1000 | 2512.18 |
+| 4 | 128 | 1000 | 2569.58 |
+| 4 | 144 | 1000 | 2541.29 |
+| 4 | 160 | 1000 | 2599.92 |
+| 4 | 176 | 1000 | 2499.82 |
+| 4 | 192 | 1000 | 2513.63 |
+| 4 | 208 | 1000 | 2492.54 |
+| 4 | 224 | 1000 | 2533.04 |
+| 4 | 240 | 1000 | 2561.12 |
+| 4 | 256 | 1000 | 2511.97 |
* Open tasks
:properties: