From ad2f632fb355c9bc91246552d97c7c1a4304ec99 Mon Sep 17 00:00:00 2001 From: Adrian Kummerlaender Date: Thu, 10 Jun 2021 18:47:13 +0200 Subject: Update GPU results of new system --- tangle/LLBM/kernel/smagorinsky_collide.h | 140 ++++++++++++++++--------------- 1 file changed, 73 insertions(+), 67 deletions(-) (limited to 'tangle/LLBM/kernel/smagorinsky_collide.h') diff --git a/tangle/LLBM/kernel/smagorinsky_collide.h b/tangle/LLBM/kernel/smagorinsky_collide.h index 3489479..a7355c5 100644 --- a/tangle/LLBM/kernel/smagorinsky_collide.h +++ b/tangle/LLBM/kernel/smagorinsky_collide.h @@ -75,13 +75,13 @@ __device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std: T x7 = f_curr[0] - f_curr[18]; T x8 = T{72.0000000000000}*f_curr[5]; T x9 = T{72.0000000000000}*f_curr[13]; - T x42 = T{72.0000000000000}*f_curr[1]; - T x43 = T{72.0000000000000}*f_curr[17]; - T x61 = T{72.0000000000000}*f_curr[4]; - T x62 = T{72.0000000000000}*f_curr[14]; + T x43 = T{72.0000000000000}*f_curr[1]; + T x44 = T{72.0000000000000}*f_curr[17]; + T x63 = T{72.0000000000000}*f_curr[4]; + T x64 = T{72.0000000000000}*f_curr[14]; T rho = x3; - T x74 = T{2.00000000000000}*rho; - T x89 = T{2.00000000000000}*rho; + T x77 = T{2.00000000000000}*rho; + T x95 = T{2.00000000000000}*rho; T u_0 = x6*(x0 + x4 + x5 - f_curr[1] - f_curr[5] - f_curr[8]); T x12 = -u_0; T x14 = T{6.00000000000000}*u_0; @@ -89,7 +89,7 @@ __device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std: T x18 = T{3.00000000000000}*x17; T x26 = -x14; T x35 = T{2.00000000000000} - x18; - T x75 = T{6.00000000000000}*x17; + T x78 = T{6.00000000000000}*x17; T u_1 = x6*(x1 + x4 + x7 - f_curr[12] - f_curr[13] - f_curr[4]); T x10 = T{6.00000000000000}*u_1; T x11 = -x10; @@ -103,7 +103,7 @@ __device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std: T x33 = T{9.00000000000000}*(x32*x32); T x34 = -x20; T x36 = x34 + x35; - T x81 = T{6.00000000000000}*x19; + T x85 = T{6.00000000000000}*x19; T u_2 = x6*(x2 + x5 + x7 - f_curr[14] - f_curr[16] - f_curr[17]); T x15 = u_2*u_2; T x16 = T{3.00000000000000}*x15; @@ -119,65 +119,71 @@ __device__ static void apply(descriptor::D3Q19, S f_curr[19], S f_next[19], std: T x39 = x14 + x33 + x36 + x38; T x40 = x14 + x29 - x33; T x41 = rho*x39 - rho*x40 - T{72.0000000000000}*f_curr[11] - T{72.0000000000000}*f_curr[7]; - T x44 = T{6.00000000000000}*u_2; - T x45 = -x44; - T x46 = x12 + u_2; - T x47 = x23 + x45 - T{9.00000000000000}*x46*x46; - T x48 = rho*x47; - T x49 = -u_2; - T x50 = x49 + u_0; - T x51 = x22 + x44; - T x52 = x26 + x51 - T{9.00000000000000}*x50*x50; - T x53 = rho*x52; - T x54 = u_0 + u_2; - T x55 = T{9.00000000000000}*(x54*x54); - T x56 = x36 + x44; - T x57 = x14 + x37; - T x58 = x55 + x56 + x57; - T x59 = x14 + x51 - x55; - T x60 = rho*x58 - rho*x59 - T{72.0000000000000}*f_curr[15] - T{72.0000000000000}*f_curr[3]; - T x63 = x27 + u_2; - T x64 = x29 + x45 - T{9.00000000000000}*x63*x63; - T x65 = rho*x64; - T x66 = x49 + u_1; - T x67 = x11 + x51 - T{9.00000000000000}*x66*x66; - T x68 = rho*x67; - T x69 = u_1 + u_2; - T x70 = T{9.00000000000000}*(x69*x69); - T x71 = x38 + x56 + x70; - T x72 = x29 + x44 - x70; - T x73 = rho*x71 - rho*x72 - T{72.0000000000000}*f_curr[0] - T{72.0000000000000}*f_curr[18]; - T x76 = x16 + T{-2.00000000000000}; - T x77 = x14 + x20 - x75 + x76; - T x78 = x34 + x57 + x75 + T{2.00000000000000}; - T x79 = -x42 - x43 - x48 - x53 + x60; - T x80 = -x25 - x31 + x41 - x8 - x9; - T x82 = x10 + x18 + x76 - x81; - T x83 = x35 + x38 + x81; - T x84 = -x61 - x62 - x65 - x68 + x73; - T x85 = T{6.00000000000000}*x15; - T x86 = x21 + x44 - x85; - T x87 = x56 + x85; - T x88 = T{1} / (tau + sqrt(T{0.707106781186548}*(smagorinsky*smagorinsky)*sqrt((x25 + x31 + x41 + x8 + x9)*(x25 + x31 + x41 + x8 + x9) + (x42 + x43 + x48 + x53 + x60)*(x42 + x43 + x48 + x53 + x60) + (x61 + x62 + x65 + x68 + x73)*(x61 + x62 + x65 + x68 + x73) + T{0.500000000000000}*((-x74*x77 + x74*x78 + x79 + x80 - 72*f_curr[10] - 72*f_curr[8])*(-x74*x77 + x74*x78 + x79 + x80 - 72*f_curr[10] - 72*f_curr[8])) + T{0.500000000000000}*((-x74*x82 + x74*x83 + x80 + x84 - 72*f_curr[12] - 72*f_curr[6])*(-x74*x82 + x74*x83 + x80 + x84 - 72*f_curr[12] - 72*f_curr[6])) + T{0.500000000000000}*((-x74*x86 + x74*x87 + x79 + x84 - 72*f_curr[16] - 72*f_curr[2])*(-x74*x86 + x74*x87 + x79 + x84 - 72*f_curr[16] - 72*f_curr[2]))) + tau*tau)); - f_next[0] = T{0.0138888888888889}*x88*(x71*x89 - T{144.000000000000}*f_curr[0]) + f_curr[0]; - f_next[1] = -T{0.0138888888888889}*x88*(x47*x89 + T{144.000000000000}*f_curr[1]) + f_curr[1]; - f_next[2] = T{0.0277777777777778}*x88*(x87*x89 - T{72.0000000000000}*f_curr[2]) + f_curr[2]; - f_next[3] = T{0.0138888888888889}*x88*(x58*x89 - T{144.000000000000}*f_curr[3]) + f_curr[3]; - f_next[4] = -T{0.0138888888888889}*x88*(x64*x89 + T{144.000000000000}*f_curr[4]) + f_curr[4]; - f_next[5] = -T{0.0138888888888889}*x88*(x24*x89 + T{144.000000000000}*f_curr[5]) + f_curr[5]; - f_next[6] = T{0.0277777777777778}*x88*(x83*x89 - T{72.0000000000000}*f_curr[6]) + f_curr[6]; - f_next[7] = T{0.0138888888888889}*x88*(x39*x89 - T{144.000000000000}*f_curr[7]) + f_curr[7]; - f_next[8] = -T{0.0277777777777778}*x88*(x77*x89 + T{72.0000000000000}*f_curr[8]) + f_curr[8]; - f_next[9] = -T{0.166666666666667}*x88*(x22*x89 + T{12.0000000000000}*f_curr[9]) + f_curr[9]; - f_next[10] = T{0.0277777777777778}*x88*(x78*x89 - T{72.0000000000000}*f_curr[10]) + f_curr[10]; - f_next[11] = -T{0.0138888888888889}*x88*(x40*x89 + T{144.000000000000}*f_curr[11]) + f_curr[11]; - f_next[12] = -T{0.0277777777777778}*x88*(x82*x89 + T{72.0000000000000}*f_curr[12]) + f_curr[12]; - f_next[13] = -T{0.0138888888888889}*x88*(x30*x89 + T{144.000000000000}*f_curr[13]) + f_curr[13]; - f_next[14] = -T{0.0138888888888889}*x88*(x67*x89 + T{144.000000000000}*f_curr[14]) + f_curr[14]; - f_next[15] = -T{0.0138888888888889}*x88*(x59*x89 + T{144.000000000000}*f_curr[15]) + f_curr[15]; - f_next[16] = -T{0.0277777777777778}*x88*(x86*x89 + T{72.0000000000000}*f_curr[16]) + f_curr[16]; - f_next[17] = -T{0.0138888888888889}*x88*(x52*x89 + T{144.000000000000}*f_curr[17]) + f_curr[17]; - f_next[18] = -T{0.0138888888888889}*x88*(x72*x89 + T{144.000000000000}*f_curr[18]) + f_curr[18]; + T x42 = x25 + x31 + x41 + x8 + x9; + T x45 = T{6.00000000000000}*u_2; + T x46 = -x45; + T x47 = x12 + u_2; + T x48 = x23 + x46 - T{9.00000000000000}*x47*x47; + T x49 = rho*x48; + T x50 = -u_2; + T x51 = x50 + u_0; + T x52 = x22 + x45; + T x53 = x26 + x52 - T{9.00000000000000}*x51*x51; + T x54 = rho*x53; + T x55 = u_0 + u_2; + T x56 = T{9.00000000000000}*(x55*x55); + T x57 = x36 + x45; + T x58 = x14 + x37; + T x59 = x56 + x57 + x58; + T x60 = x14 + x52 - x56; + T x61 = rho*x59 - rho*x60 - T{72.0000000000000}*f_curr[15] - T{72.0000000000000}*f_curr[3]; + T x62 = x43 + x44 + x49 + x54 + x61; + T x65 = x27 + u_2; + T x66 = x29 + x46 - T{9.00000000000000}*x65*x65; + T x67 = rho*x66; + T x68 = x50 + u_1; + T x69 = x11 + x52 - T{9.00000000000000}*x68*x68; + T x70 = rho*x69; + T x71 = u_1 + u_2; + T x72 = T{9.00000000000000}*(x71*x71); + T x73 = x38 + x57 + x72; + T x74 = x29 + x45 - x72; + T x75 = rho*x73 - rho*x74 - T{72.0000000000000}*f_curr[0] - T{72.0000000000000}*f_curr[18]; + T x76 = x63 + x64 + x67 + x70 + x75; + T x79 = x16 + T{-2.00000000000000}; + T x80 = x14 + x20 - x78 + x79; + T x81 = x34 + x58 + x78 + T{2.00000000000000}; + T x82 = -x43 - x44 - x49 - x54 + x61; + T x83 = -x25 - x31 + x41 - x8 - x9; + T x84 = -x77*x80 + x77*x81 + x82 + x83 - T{72.0000000000000}*f_curr[10] - T{72.0000000000000}*f_curr[8]; + T x86 = x10 + x18 + x79 - x85; + T x87 = x35 + x38 + x85; + T x88 = -x63 - x64 - x67 - x70 + x75; + T x89 = -x77*x86 + x77*x87 + x83 + x88 - T{72.0000000000000}*f_curr[12] - T{72.0000000000000}*f_curr[6]; + T x90 = T{6.00000000000000}*x15; + T x91 = x21 + x45 - x90; + T x92 = x57 + x90; + T x93 = -x77*x91 + x77*x92 + x82 + x88 - T{72.0000000000000}*f_curr[16] - T{72.0000000000000}*f_curr[2]; + T x94 = T{1} / (tau + sqrt(T{0.707106781186548}*(smagorinsky*smagorinsky)*sqrt(x42*x42 + x62*x62 + x76*x76 + T{0.500000000000000}*(x84*x84) + T{0.500000000000000}*(x89*x89) + T{0.500000000000000}*(x93*x93)) + tau*tau)); + f_next[0] = T{0.0138888888888889}*x94*(x73*x95 - T{144.000000000000}*f_curr[0]) + f_curr[0]; + f_next[1] = -T{0.0138888888888889}*x94*(x48*x95 + T{144.000000000000}*f_curr[1]) + f_curr[1]; + f_next[2] = T{0.0277777777777778}*x94*(x92*x95 - T{72.0000000000000}*f_curr[2]) + f_curr[2]; + f_next[3] = T{0.0138888888888889}*x94*(x59*x95 - T{144.000000000000}*f_curr[3]) + f_curr[3]; + f_next[4] = -T{0.0138888888888889}*x94*(x66*x95 + T{144.000000000000}*f_curr[4]) + f_curr[4]; + f_next[5] = -T{0.0138888888888889}*x94*(x24*x95 + T{144.000000000000}*f_curr[5]) + f_curr[5]; + f_next[6] = T{0.0277777777777778}*x94*(x87*x95 - T{72.0000000000000}*f_curr[6]) + f_curr[6]; + f_next[7] = T{0.0138888888888889}*x94*(x39*x95 - T{144.000000000000}*f_curr[7]) + f_curr[7]; + f_next[8] = -T{0.0277777777777778}*x94*(x80*x95 + T{72.0000000000000}*f_curr[8]) + f_curr[8]; + f_next[9] = -T{0.166666666666667}*x94*(x22*x95 + T{12.0000000000000}*f_curr[9]) + f_curr[9]; + f_next[10] = T{0.0277777777777778}*x94*(x81*x95 - T{72.0000000000000}*f_curr[10]) + f_curr[10]; + f_next[11] = -T{0.0138888888888889}*x94*(x40*x95 + T{144.000000000000}*f_curr[11]) + f_curr[11]; + f_next[12] = -T{0.0277777777777778}*x94*(x86*x95 + T{72.0000000000000}*f_curr[12]) + f_curr[12]; + f_next[13] = -T{0.0138888888888889}*x94*(x30*x95 + T{144.000000000000}*f_curr[13]) + f_curr[13]; + f_next[14] = -T{0.0138888888888889}*x94*(x69*x95 + T{144.000000000000}*f_curr[14]) + f_curr[14]; + f_next[15] = -T{0.0138888888888889}*x94*(x60*x95 + T{144.000000000000}*f_curr[15]) + f_curr[15]; + f_next[16] = -T{0.0277777777777778}*x94*(x91*x95 + T{72.0000000000000}*f_curr[16]) + f_curr[16]; + f_next[17] = -T{0.0138888888888889}*x94*(x53*x95 + T{144.000000000000}*f_curr[17]) + f_curr[17]; + f_next[18] = -T{0.0138888888888889}*x94*(x74*x95 + T{144.000000000000}*f_curr[18]) + f_curr[18]; } }; -- cgit v1.2.3