diff options
Diffstat (limited to 'boltzgas/kernel.cl')
-rw-r--r-- | boltzgas/kernel.cl | 120 |
1 files changed, 86 insertions, 34 deletions
diff --git a/boltzgas/kernel.cl b/boltzgas/kernel.cl index ad984bb..aabd1af 100644 --- a/boltzgas/kernel.cl +++ b/boltzgas/kernel.cl @@ -2,22 +2,44 @@ #define N_PARTICLES $n_particles #define DELTA_T $delta_t +#define ENABLE_3D + typedef float scalar_t; + +#ifdef ENABLE_3D +typedef float3 vec_t; +typedef float4 data_vec_t; +#define N_WALLS 6 +#else typedef float2 vec_t; +typedef float2 data_vec_t; +#define N_WALLS 4 +#endif #define SCALAR_MAX FLT_MAX -__constant vec_t wall_normals[4] = { - (vec_t)(-1.,0.), - (vec_t)( 1.,0.), - (vec_t)(0., 1.), - (vec_t)(0.,-1.), +__constant float4 wall_normals[N_WALLS] = { + (float4)(-1., 0., 0., 0.), + (float4)( 1., 0., 0., 0.), + (float4)( 0., 1., 0., 0.), + (float4)( 0.,-1., 0., 0.), + (float4)( 0., 0.,-1., 0.), + (float4)( 0., 0., 1., 0.), }; -__constant scalar_t wall_loc[4] = { - -RADIUS, 1.-RADIUS, 1.-RADIUS, -RADIUS + +__constant scalar_t wall_loc[N_WALLS] = { + -RADIUS, 1.-RADIUS, 1.-RADIUS, -RADIUS, -RADIUS, 1.-RADIUS }; +vec_t wall_normal(unsigned int iWall) { +#ifdef ENABLE_3D + return wall_normals[iWall].xyz; +#else + return wall_normals[iWall].xy; +#endif +} + scalar_t solve_wall_collision(vec_t n, scalar_t loc, vec_t p, vec_t v) { if (dot(n,v) > 0.) { vec_t wall_v = dot(n,v) * n; @@ -59,24 +81,59 @@ scalar_t solve_collision(vec_t p, vec_t v, vec_t p_, vec_t v_) { } } -__kernel void evolve(__global vec_t* pos_a, - __global vec_t* vel_a, - __global vec_t* pos_b, - __global vec_t* vel_b, +vec_t getPosition(__global data_vec_t* pos, + unsigned int iParticle) +{ +#ifdef ENABLE_3D + return pos[iParticle].xyz; +#else + return pos[iParticle]; +#endif +} + +vec_t getVelocity(__global data_vec_t* vel, + unsigned int iParticle) +{ +#ifdef ENABLE_3D + return vel[iParticle].xyz; +#else + return vel[iParticle]; +#endif +} + +void set(__global data_vec_t* pos, + __global data_vec_t* vel, + unsigned int iParticle, + vec_t p, + vec_t v) +{ +#ifdef ENABLE_3D + pos[iParticle].xyz = p; + vel[iParticle].xyz = v; +#else + pos[iParticle] = p; + vel[iParticle] = v; +#endif +} + +__kernel void evolve(__global data_vec_t* pos_a, + __global data_vec_t* vel_a, + __global data_vec_t* pos_b, + __global data_vec_t* vel_b, __global volatile unsigned int* last_collide) { unsigned int i = get_global_id(0); - vec_t p = pos_a[i]; - vec_t v = vel_a[i]; + vec_t p = getPosition(pos_a, i); + vec_t v = getVelocity(vel_a, i); unsigned int jParticle = N_PARTICLES; scalar_t min2intersect = SCALAR_MAX; for (unsigned int iParticle=0; iParticle < N_PARTICLES; ++iParticle) { if (iParticle != i && !(last_collide[i] == iParticle && last_collide[iParticle] == i)) { - vec_t p_ = pos_a[iParticle]; - vec_t v_ = vel_a[iParticle]; + vec_t p_ = getPosition(pos_a, iParticle); + vec_t v_ = getVelocity(vel_a, iParticle); scalar_t time2intersect = solve_collision(p, v, p_, v_); @@ -90,8 +147,8 @@ __kernel void evolve(__global vec_t* pos_a, unsigned int jWall = N_PARTICLES; scalar_t min2wall = SCALAR_MAX; - for (unsigned int iWall=0; iWall < 4; ++iWall) { - scalar_t time2wall = solve_wall_collision(wall_normals[iWall], wall_loc[iWall], p, v); + for (unsigned int iWall=0; iWall < N_WALLS; ++iWall) { + scalar_t time2wall = solve_wall_collision(wall_normal(iWall), wall_loc[iWall], p, v); if (time2wall < min2wall) { min2wall = time2wall; jWall = iWall; @@ -101,33 +158,29 @@ __kernel void evolve(__global vec_t* pos_a, if (min2intersect < DELTA_T) { if (min2wall < min2intersect) { p += min2wall * v; - v -= 2*dot(v,wall_normals[jWall])*wall_normals[jWall]; + v -= 2*dot(v,wall_normal(jWall)) * wall_normal(jWall); p += (DELTA_T - min2wall) * v; last_collide[i] = N_PARTICLES; - pos_b[i] = p; - vel_b[i] = v; + set(pos_b, vel_b, i, p, v); } else { if (i < jParticle) { - vec_t p_ = pos_a[jParticle]; - vec_t v_ = vel_a[jParticle]; + vec_t p_ = getPosition(pos_a, jParticle); + vec_t v_ = getVelocity(vel_a, jParticle); p += min2intersect * v; p_ += min2intersect * v_; vec_t omega = normalize(p - p_); - v -= dot(vel_a[i] - vel_a[jParticle], omega) * omega; - v_ -= dot(vel_a[jParticle] - vel_a[i], omega) * omega; + v -= dot(getVelocity(vel_a, i) - getVelocity(vel_a, jParticle), omega) * omega; + v_ -= dot(getVelocity(vel_a, jParticle) - getVelocity(vel_a, i), omega) * omega; p += (DELTA_T - min2intersect) * v; p_ += (DELTA_T - min2intersect) * v_; - pos_b[i] = p; - vel_b[i] = v; - - pos_b[jParticle] = p_; - vel_b[jParticle] = v_; + set(pos_b, vel_b, i, p, v); + set(pos_b, vel_b, jParticle, p_, v_); last_collide[i] = jParticle; last_collide[jParticle] = i; @@ -136,21 +189,20 @@ __kernel void evolve(__global vec_t* pos_a, } else { if (min2wall < DELTA_T) { p += min2wall * v; - v -= 2*dot(v,wall_normals[jWall])*wall_normals[jWall]; + v -= 2*dot(v,wall_normal(jWall)) * wall_normal(jWall); p += (DELTA_T - min2wall) * v; last_collide[i] = N_PARTICLES; } else { p += DELTA_T * v; } - pos_b[i] = p; - vel_b[i] = v; + set(pos_b, vel_b, i, p, v); } } -__kernel void get_velocity_norms(__global vec_t* velocities, __global scalar_t* norms) +__kernel void get_velocity_norms(__global data_vec_t* velocities, __global scalar_t* norms) { unsigned int i = get_global_id(0); - norms[i] = length(velocities[i]); + norms[i] = length(getVelocity(velocities, i)); } |