From a75906a940e6eed15718f8513afd2413e771dc56 Mon Sep 17 00:00:00 2001 From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@univ-reims.fr> Date: Thu, 31 Jul 2014 18:07:17 +0200 Subject: [PATCH] Fix use of mesh structure for opencl kernels --- HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl | 2 +- HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl | 2 +- HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl | 2 +- HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl | 2 +- .../hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl | 2 +- .../gpu/cl_src/advection/comm_builtin_rk2_noVec.cl | 2 +- HySoP/hysop/gpu/cl_src/common.cl | 12 ++++++------ HySoP/hysop/gpu/cl_src/kernels/advection.cl | 5 +---- .../gpu/cl_src/kernels/advection_and_remeshing.cl | 4 +--- .../cl_src/kernels/advection_and_remeshing_noVec.cl | 7 ++----- HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl | 5 +---- .../gpu/cl_src/kernels/comm_MS_advection_noVec.cl | 7 +------ .../hysop/gpu/cl_src/kernels/comm_advection_noVec.cl | 5 ++--- .../hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl | 1 - HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl | 1 - HySoP/hysop/gpu/multi_gpu_particle_advection.py | 2 ++ HySoP/setup.py.in | 2 +- 17 files changed, 23 insertions(+), 40 deletions(-) diff --git a/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl b/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl index 05760d25b..7bdcab7f8 100644 --- a/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl +++ b/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl @@ -105,5 +105,5 @@ float__N__ advection(uint i, float dt, __local float* velocity_cache, __constant k += kn; - return c + (float__N__)(dt * 0.16666666666666666) * k; // 1./6. = 0.16666666666666666 + return c + (float__N__)(dt / 6.0) * k; } diff --git a/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl index b5a790649..c60ef1c55 100644 --- a/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl @@ -84,7 +84,7 @@ float advection(uint i, float dt, __local float* velocity_cache, __constant stru k += kn; /* k = k1 + 2*k2 + 2*k3 + k4 */ - return c + dt * k * 0.16666666666666666; // 1./6. = 0.16666666666666666 + return c + dt * k / 6.0; } /* Operations number : */ /* - 4 positions = 4 * 2 + 3 */ diff --git a/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl b/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl index f51fe57fb..f5fca5392 100644 --- a/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl +++ b/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl @@ -52,7 +52,7 @@ float__N__ advection(uint i, float dt, __local float* velocity_cache, __constant ); vp = (float__N__)(velocity_cache[noBC_id(i_ind_p.s__NN__)], ); - p = fma(hdt, mix(v,vp,p), c) * mesh->v_invdx; + p = fma(hdt, mix(v,vp,p), c) * v_invdx; #endif i_ind = convert_int__N___rtn(p); diff --git a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl index 1f6dcdab0..633fd4929 100644 --- a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl +++ b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl @@ -99,5 +99,5 @@ float__N__ advection(uint i, float dt, __local float* velocity_cache, __constant k += kn; - return fma(k,(float__N__)(dt * 0.16666666666666666),c); // 1./6. = 0.16666666666666666 + return fma(k,(float__N__)(dt/6.0),c); } diff --git a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl index 9bcce06df..75a3aa3c8 100644 --- a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl @@ -79,7 +79,7 @@ float advection(uint i, float dt, __local float* velocity_cache, __constant stru k += kn; /* k = k1 + 2*k2 + 2*k3 + k4 */ - return fma(k, dt * 0.16666666666666666, c); // 1./6. = 0.16666666666666666 + return fma(k, dt/6.0, c); } /* Operations number : */ diff --git a/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl index 32ba60fe7..a9a717f80 100644 --- a/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl @@ -26,7 +26,7 @@ float advection(uint i, float dt, __local float* velocity_cache, __constant stru { float v, /* Velocity at point */ p, /* Intermediary position */ - c = i * mesh->dx.x + mesh->min_position, /* initial coordinate */ + c = i * dx + min_position, /* initial coordinate */ hdt = 0.5 * dt; /* half time step */ int i_ind, /* Interpolation left point */ i_ind_p; /* Interpolation right point */ diff --git a/HySoP/hysop/gpu/cl_src/common.cl b/HySoP/hysop/gpu/cl_src/common.cl index 08ddc80fe..009872163 100644 --- a/HySoP/hysop/gpu/cl_src/common.cl +++ b/HySoP/hysop/gpu/cl_src/common.cl @@ -148,10 +148,10 @@ a minmax element is a 12 int defined as follows: /* Structure to store __constants advection parameters */ typedef struct AdvectionMeshInfo { - float4 dx; /* Mesh step (advected grid) */ - float4 v_dx; /* Mesh step (velocity) */ + float4 dx; /* Mesh step (advected grid) */ + float4 v_dx; /* Mesh step (velocity) */ float min_position; /* Domain minimum coordinate in current direction */ - float invdx; /* Store 1./dx.x */ - float v_invdx; /* Store 1./v_dx.x */ - float x; /* Padding */ -} AdvectionMeshInfo __attribute__ ((aligned)); + float invdx; /* Store 1./dx.x */ + float v_invdx; /* Store 1./v_dx.x */ + float x; /* Padding */ +} AdvectionMeshInfo; diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection.cl b/HySoP/hysop/gpu/cl_src/kernels/advection.cl index 5eeeb9e96..8b8cbfbb8 100644 --- a/HySoP/hysop/gpu/cl_src/kernels/advection.cl +++ b/HySoP/hysop/gpu/cl_src/kernels/advection.cl @@ -25,14 +25,11 @@ __kernel void advection_kernel(__global const float* gvelo, __global float* ppos, __local float* velocity_cache, - float dt, - __constant struct AdvectionMeshInfo* mesh) + float dt, __constant struct AdvectionMeshInfo* mesh) { uint gidX = get_global_id(0); /* OpenCL work-itme global index (X) */ uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */ uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */ - //float invdx = 1.0/dx.x; /* Space step inverse */ - //float v_invdx = 1.0/v_dx.x; /* Space step inverse */ uint i; /* Particle index in 1D problem */ float__N__ p; /* Particle position */ uint line_index = gidY*NB_I+gidZ*NB_I*NB_II; /* Current 1D problem index */ diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl index 2d79e8eee..e763b60b8 100644 --- a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl +++ b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl @@ -31,13 +31,11 @@ __kernel void advection_and_remeshing(__global const float* gvelo, __RCOMP_P__global float* gscal__ID__, __local float* velocity_cache, __RCOMP_P__local float* gscal_loc__ID__, - float dt, float min_position, float4 dx, float4 v_dx) + float dt, __constant struct AdvectionMeshInfo* mesh) { uint gidX = get_global_id(0); /* OpenCL work-itme global index (X) */ uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */ uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */ - // float invdx = 1.0/dx.x; /* Space step inverse */ - // float v_invdx = 1.0/v_dx.x; /* Space step inverse */ uint i; /* Particle index in 1D problem */ float__N__ p; /* Particle position */ __RCOMP_I float__N__ s__ID__; /* Particle scalar */ diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl index cc4a3872d..17a83113d 100644 --- a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl @@ -31,14 +31,11 @@ __kernel void advection_and_remeshing(__global const float* gvelo, __RCOMP_P__global float* gscal__ID__, __local float* gvelo_loc, __RCOMP_P__local float* gscal_loc__ID__, - float dt, - __constant struct AdvectionMeshInfo* mesh) + float dt, __constant struct AdvectionMeshInfo* mesh) { uint gidX = get_global_id(0); /* OpenCL work-itme global index (X) */ uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */ uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */ - //float invdx = 1.0/dx.x; /* Space step inverse */ - //float v_invdx = 1.0/v_dx.x; /* Space step inverse */ uint i; /* Particle index in 1D problem */ float p; /* Particle position */ __RCOMP_I float s__ID__; /* Particle scalar */ @@ -64,7 +61,7 @@ __kernel void advection_and_remeshing(__global const float* gvelo, /* Compute particle position */ p = advection(i, dt, gvelo_loc, mesh); /* Remesh particle */ - remesh(i, dx.x, invdx, __RCOMP_Ps__ID__, p, min_position, __RCOMP_Pgscal_loc__ID__); + remesh(i, __RCOMP_Ps__ID__, p, __RCOMP_Pgscal_loc__ID__, mesh); } /* Synchronize work-group */ diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl index 243163876..4b3fa67e5 100644 --- a/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl @@ -20,14 +20,11 @@ __kernel void advection_kernel(__global const float* gvelo, __global float* ppos, __local float* velocity_cache, - float dt, - __constant struct AdvectionMeshInfo* mesh) + float dt, __constant struct AdvectionMeshInfo* mesh) { uint gidX = get_global_id(0); /* OpenCL work-itme global index (X) */ uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */ uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */ - // float invdx = 1.0/dx.x; /* Space step inverse */ - // float v_invdx = 1.0/v_dx.x; /* Space step inverse */ uint i; /* Particle index in 1D problem */ uint line_index = gidY*NB_I+gidZ*NB_I*NB_II; /* Current 1D problem index */ diff --git a/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl index f3737883d..71ca92157 100644 --- a/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl @@ -17,15 +17,12 @@ __kernel void buff_advec(__global const float* gvelo, __local float* velocity_cache, __local float* buff_l_loc, __local float* buff_r_loc, - float dt, - __constant struct AdvectionMeshInfo* mesh, + float dt, __constant struct AdvectionMeshInfo* mesh, int4 l_nb, int4 r_nb) { int gidX = get_global_id(0); /* OpenCL work-itme global index (X) */ int gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */ int gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */ - // float invdx = 1.0/dx.x; /* Space step inverse */ - // float v_invdx = 1.0/v_dx.x; /* Space step inverse */ int i; /* Particle index in 1D problem */ int line_index = gidY*NB_I+gidZ*NB_I*NB_II; /* Current 1D problem index */ float p,v,c, hY, hZ; @@ -118,8 +115,6 @@ __kernel void reduce_stage1_advec(__global const float* gvelo, { int lid = get_global_id(0); /* OpenCL work-itme global index (X) */ int gidY, gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */ - // float invdx = 1.0/dx.x; /* Space step inverse */ - // float v_invdx = 1.0/v_dx.x; /* Space step inverse */ int i; /* Particle index in 1D problem */ int line_index; /* Current 1D problem index */ int my_minmax[12] = {1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30}; diff --git a/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl index 61c02fadd..4d1c202ba 100644 --- a/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl @@ -18,8 +18,6 @@ __kernel void buff_advec(__global const float* gvelo, int gidX = get_global_id(0); /* OpenCL work-itme global index (X) */ int gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */ int gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */ - // float invdx = 1.0/dx.x; /* Space step inverse */ - // float v_invdx = 1.0/v_dx.x; /* Space step inverse */ int i; /* Particle index in 1D problem */ int line_index ; /* Current 1D problem index */ @@ -90,7 +88,8 @@ __kernel void reduce_stage1_advec(__global const float* gvelo, __global int* minmax_buffer, __local float* velocity_cache, __local int* minmax, - float dt, __constant struct AdvectionMeshInfo* mesh) + float dt, + __constant struct AdvectionMeshInfo* mesh) { int gidY, gidZ = get_global_id(2); int lid = get_global_id(0); diff --git a/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl index e1691bb87..ff6839efb 100644 --- a/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl @@ -109,7 +109,6 @@ __kernel void buff_remesh(__global const float* ppos, int lid = get_local_id(0); /* OpenCL work-itme global index (X) */ int gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */ int gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */ - // float invdx = 1.0/dx; /* Space step inverse */ int i; /* Particle index in 1D problem */ float p; /* Particle position */ float s; /* Particle scalar */ diff --git a/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl index b83eaec35..71fa58453 100644 --- a/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl @@ -34,7 +34,6 @@ __kernel void remeshing_kernel(__global const float* ppos, uint gidX = get_global_id(0); /* OpenCL work-itme global index (X) */ uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */ uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */ - //float invdx = 1.0/dx; /* Space step inverse */ uint i; /* Particle index in 1D problem */ float p; /* Particle position */ __RCOMP_I float s__ID__; /* Particle scalar */ diff --git a/HySoP/hysop/gpu/multi_gpu_particle_advection.py b/HySoP/hysop/gpu/multi_gpu_particle_advection.py index 954c52165..f7acfbdff 100644 --- a/HySoP/hysop/gpu/multi_gpu_particle_advection.py +++ b/HySoP/hysop/gpu/multi_gpu_particle_advection.py @@ -248,6 +248,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): src = ['common.cl', 'kernels/comm_advection_noVec.cl', 'kernels/minmax_buffers.cl'] + print src prg = self.cl_env.build_src( src, build_options, 1) self.num_reduce_stage1_advec = KernelLauncher( @@ -280,6 +281,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): src = ['common.cl', 'remeshing/weights_noVec.cl', 'kernels/comm_remeshing_noVec.cl', 'kernels/minmax_buffers.cl'] + print src prg = self.cl_env.build_src( src, build_options, 1) self.num_reduce_stage1_rmsh = KernelLauncher( diff --git a/HySoP/setup.py.in b/HySoP/setup.py.in index f771350fc..a2e61e470 100644 --- a/HySoP/setup.py.in +++ b/HySoP/setup.py.in @@ -112,7 +112,7 @@ if("@WITH_GPU@" is "ON"): data_files.append(('./parmepy/gpu/'+cl_dir, ['@CMAKE_SOURCE_DIR@/parmepy/gpu/'+cl_dir+'/' + cl_file for cl_file in os.listdir('@CMAKE_SOURCE_DIR@/parmepy/gpu/'+cl_dir+'/') - if cl_file[0]!='.' and cl_file[-3:]=='.cl'])) + if cl_file[0]!='.' and cl_file[0]!='#' and cl_file[-3:]=='.cl'])) config = Configuration(name=name, version='@PYPACKAGE_VERSION@', -- GitLab