From a75906a940e6eed15718f8513afd2413e771dc56 Mon Sep 17 00:00:00 2001
From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@univ-reims.fr>
Date: Thu, 31 Jul 2014 18:07:17 +0200
Subject: [PATCH] Fix use of mesh structure for opencl kernels

---
 HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl        |  2 +-
 HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl  |  2 +-
 HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl      |  2 +-
 HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl      |  2 +-
 .../hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl  |  2 +-
 .../gpu/cl_src/advection/comm_builtin_rk2_noVec.cl   |  2 +-
 HySoP/hysop/gpu/cl_src/common.cl                     | 12 ++++++------
 HySoP/hysop/gpu/cl_src/kernels/advection.cl          |  5 +----
 .../gpu/cl_src/kernels/advection_and_remeshing.cl    |  4 +---
 .../cl_src/kernels/advection_and_remeshing_noVec.cl  |  7 ++-----
 HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl    |  5 +----
 .../gpu/cl_src/kernels/comm_MS_advection_noVec.cl    |  7 +------
 .../hysop/gpu/cl_src/kernels/comm_advection_noVec.cl |  5 ++---
 .../hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl |  1 -
 HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl    |  1 -
 HySoP/hysop/gpu/multi_gpu_particle_advection.py      |  2 ++
 HySoP/setup.py.in                                    |  2 +-
 17 files changed, 23 insertions(+), 40 deletions(-)

diff --git a/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl b/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl
index 05760d25b..7bdcab7f8 100644
--- a/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl
@@ -105,5 +105,5 @@ float__N__ advection(uint i, float dt, __local float* velocity_cache, __constant
   k += kn;
 
 
-  return c + (float__N__)(dt * 0.16666666666666666) * k; // 1./6. = 0.16666666666666666
+  return c + (float__N__)(dt / 6.0) * k;
 }
diff --git a/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl
index b5a790649..c60ef1c55 100644
--- a/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl
@@ -84,7 +84,7 @@ float advection(uint i, float dt, __local float* velocity_cache, __constant stru
 
   k += kn;			/* k = k1 + 2*k2 + 2*k3 + k4 */
 
-  return c + dt * k * 0.16666666666666666; // 1./6. = 0.16666666666666666
+  return c + dt * k / 6.0;
 }
 /* Operations number :  */
 /*   - 4 positions = 4 * 2 + 3 */
diff --git a/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl b/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl
index f51fe57fb..f5fca5392 100644
--- a/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl
@@ -52,7 +52,7 @@ float__N__ advection(uint i, float dt, __local float* velocity_cache, __constant
 		   );
   vp = (float__N__)(velocity_cache[noBC_id(i_ind_p.s__NN__)],
 		    );
-  p = fma(hdt, mix(v,vp,p), c) * mesh->v_invdx;
+  p = fma(hdt, mix(v,vp,p), c) * v_invdx;
 #endif
 
   i_ind = convert_int__N___rtn(p);
diff --git a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl
index 1f6dcdab0..633fd4929 100644
--- a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl
@@ -99,5 +99,5 @@ float__N__ advection(uint i, float dt, __local float* velocity_cache, __constant
   k += kn;
 
 
-  return fma(k,(float__N__)(dt * 0.16666666666666666),c); // 1./6. = 0.16666666666666666
+  return fma(k,(float__N__)(dt/6.0),c);
 }
diff --git a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl
index 9bcce06df..75a3aa3c8 100644
--- a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl
@@ -79,7 +79,7 @@ float advection(uint i, float dt, __local float* velocity_cache, __constant stru
 
   k += kn;			/* k = k1 + 2*k2 + 2*k3 + k4 */
 
-  return fma(k, dt * 0.16666666666666666, c); // 1./6. = 0.16666666666666666
+  return fma(k, dt/6.0, c);
 }
 
 /* Operations number :  */
diff --git a/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl
index 32ba60fe7..a9a717f80 100644
--- a/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl
@@ -26,7 +26,7 @@ float advection(uint i, float dt, __local float* velocity_cache, __constant stru
 {
   float v, 			/* Velocity at point */
     p,				/* Intermediary position */
-    c = i * mesh->dx.x + mesh->min_position,  /* initial coordinate */
+    c = i * dx + min_position,  /* initial coordinate */
     hdt = 0.5 * dt;		/* half time step */
   int i_ind,			/* Interpolation left point */
     i_ind_p;			/* Interpolation right point */
diff --git a/HySoP/hysop/gpu/cl_src/common.cl b/HySoP/hysop/gpu/cl_src/common.cl
index 08ddc80fe..009872163 100644
--- a/HySoP/hysop/gpu/cl_src/common.cl
+++ b/HySoP/hysop/gpu/cl_src/common.cl
@@ -148,10 +148,10 @@ a minmax element is a 12 int defined as follows:
 /* Structure to store __constants advection parameters */
 typedef struct AdvectionMeshInfo
 {
-  float4 dx;			/* Mesh step (advected grid) */
-  float4 v_dx;			/* Mesh step (velocity) */
+  float4 dx;                   /* Mesh step (advected grid) */
+  float4 v_dx;                 /* Mesh step (velocity) */
   float min_position;           /* Domain minimum coordinate in current direction */
-  float invdx;			/* Store 1./dx.x */
-  float v_invdx;		/* Store 1./v_dx.x */
-  float x; 			/* Padding */
-} AdvectionMeshInfo __attribute__ ((aligned));
+  float invdx;                 /* Store 1./dx.x */
+  float v_invdx;               /* Store 1./v_dx.x */
+  float x;                     /* Padding */
+} AdvectionMeshInfo;
diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection.cl b/HySoP/hysop/gpu/cl_src/kernels/advection.cl
index 5eeeb9e96..8b8cbfbb8 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/advection.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/advection.cl
@@ -25,14 +25,11 @@
 __kernel void advection_kernel(__global const float* gvelo,
 			       __global float* ppos,
 			       __local float* velocity_cache,
-			       float dt,
-			       __constant struct AdvectionMeshInfo* mesh)
+			       float dt, __constant struct AdvectionMeshInfo* mesh)
 {
   uint gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
   uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
   uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  //float invdx = 1.0/dx.x;		/* Space step inverse */
-  //float v_invdx = 1.0/v_dx.x;		/* Space step inverse */
   uint i;			/* Particle index in 1D problem */
   float__N__ p;				/* Particle position */
   uint line_index = gidY*NB_I+gidZ*NB_I*NB_II; /* Current 1D problem index */
diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl
index 2d79e8eee..e763b60b8 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl
@@ -31,13 +31,11 @@ __kernel void advection_and_remeshing(__global const float* gvelo,
 				      __RCOMP_P__global float* gscal__ID__,
 				      __local float* velocity_cache,
 				      __RCOMP_P__local float* gscal_loc__ID__,
-				      float dt, float min_position, float4 dx, float4 v_dx)
+				      float dt, __constant struct AdvectionMeshInfo* mesh)
 {
   uint gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
   uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
   uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  //  float invdx = 1.0/dx.x;		/* Space step inverse */
-  //  float v_invdx = 1.0/v_dx.x;		/* Space step inverse */
   uint i;			/* Particle index in 1D problem */
   float__N__ p;			/* Particle position */
   __RCOMP_I float__N__ s__ID__; /* Particle scalar */
diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl
index cc4a3872d..17a83113d 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl
@@ -31,14 +31,11 @@ __kernel void advection_and_remeshing(__global const float* gvelo,
 				      __RCOMP_P__global float* gscal__ID__,
 				      __local float* gvelo_loc,
 				      __RCOMP_P__local float* gscal_loc__ID__,
-				      float dt,
-				      __constant struct AdvectionMeshInfo* mesh)
+				      float dt, __constant struct AdvectionMeshInfo* mesh)
 {
   uint gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
   uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
   uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  //float invdx = 1.0/dx.x;		/* Space step inverse */
-  //float v_invdx = 1.0/v_dx.x;		/* Space step inverse */
   uint i;			/* Particle index in 1D problem */
   float p;			/* Particle position */
   __RCOMP_I float s__ID__;	/* Particle scalar */
@@ -64,7 +61,7 @@ __kernel void advection_and_remeshing(__global const float* gvelo,
       /* Compute particle position */
       p = advection(i, dt, gvelo_loc, mesh);
       /* Remesh particle */
-      remesh(i, dx.x, invdx, __RCOMP_Ps__ID__, p, min_position, __RCOMP_Pgscal_loc__ID__);
+      remesh(i, __RCOMP_Ps__ID__, p, __RCOMP_Pgscal_loc__ID__, mesh);
     }
 
   /* Synchronize work-group */
diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl
index 243163876..4b3fa67e5 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl
@@ -20,14 +20,11 @@
 __kernel void advection_kernel(__global const float* gvelo,
 			       __global float* ppos,
 			       __local float* velocity_cache,
-			       float dt,
-			       __constant struct AdvectionMeshInfo* mesh)
+			       float dt, __constant struct AdvectionMeshInfo* mesh)
 {
   uint gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
   uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
   uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  //  float invdx = 1.0/dx.x;		/* Space step inverse */
-  //  float v_invdx = 1.0/v_dx.x;		/* Space step inverse */
   uint i;			/* Particle index in 1D problem */
   uint line_index = gidY*NB_I+gidZ*NB_I*NB_II; /* Current 1D problem index */
 
diff --git a/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl
index f3737883d..71ca92157 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl
@@ -17,15 +17,12 @@ __kernel void buff_advec(__global const float* gvelo,
 			 __local float* velocity_cache,
 			 __local float* buff_l_loc,
 			 __local float* buff_r_loc,
-			 float dt,
-			 __constant struct AdvectionMeshInfo* mesh,
+			 float dt, __constant struct AdvectionMeshInfo* mesh,
 			 int4 l_nb, int4 r_nb)
 {
   int gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
   int gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
   int gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  //  float invdx = 1.0/dx.x;		/* Space step inverse */
-  //  float v_invdx = 1.0/v_dx.x;		/* Space step inverse */
   int i;			/* Particle index in 1D problem */
   int line_index = gidY*NB_I+gidZ*NB_I*NB_II; /* Current 1D problem index */
   float p,v,c, hY, hZ;
@@ -118,8 +115,6 @@ __kernel void reduce_stage1_advec(__global const float* gvelo,
 {
   int lid = get_global_id(0);	/* OpenCL work-itme global index (X) */
   int gidY, gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  //  float invdx = 1.0/dx.x;		/* Space step inverse */
-  //  float v_invdx = 1.0/v_dx.x;		/* Space step inverse */
   int i;			/* Particle index in 1D problem */
   int line_index; /* Current 1D problem index */
   int my_minmax[12] = {1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30};
diff --git a/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl
index 61c02fadd..4d1c202ba 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl
@@ -18,8 +18,6 @@ __kernel void buff_advec(__global const float* gvelo,
   int gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
   int gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
   int gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  //  float invdx = 1.0/dx.x;		/* Space step inverse */
-  //  float v_invdx = 1.0/v_dx.x;		/* Space step inverse */
   int i;			/* Particle index in 1D problem */
   int line_index ; /* Current 1D problem index */
 
@@ -90,7 +88,8 @@ __kernel void reduce_stage1_advec(__global const float* gvelo,
 				  __global int* minmax_buffer,
 				  __local float* velocity_cache,
 				  __local int* minmax,
-				  float dt, __constant struct AdvectionMeshInfo* mesh)
+				  float dt,
+				  __constant struct AdvectionMeshInfo* mesh)
 {
   int gidY, gidZ = get_global_id(2);
   int lid = get_global_id(0);
diff --git a/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl
index e1691bb87..ff6839efb 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl
@@ -109,7 +109,6 @@ __kernel void buff_remesh(__global const float* ppos,
   int lid = get_local_id(0);	/* OpenCL work-itme global index (X) */
   int gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
   int gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  //  float invdx = 1.0/dx;         /* Space step inverse */
   int i;			/* Particle index in 1D problem */
   float p;			/* Particle position */
   float s;      /* Particle scalar */
diff --git a/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl
index b83eaec35..71fa58453 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl
@@ -34,7 +34,6 @@ __kernel void remeshing_kernel(__global const float* ppos,
   uint gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
   uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
   uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  //float invdx = 1.0/dx;         /* Space step inverse */
   uint i;			/* Particle index in 1D problem */
   float p;			/* Particle position */
   __RCOMP_I float s__ID__;      /* Particle scalar */
diff --git a/HySoP/hysop/gpu/multi_gpu_particle_advection.py b/HySoP/hysop/gpu/multi_gpu_particle_advection.py
index 954c52165..f7acfbdff 100644
--- a/HySoP/hysop/gpu/multi_gpu_particle_advection.py
+++ b/HySoP/hysop/gpu/multi_gpu_particle_advection.py
@@ -248,6 +248,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
             src = ['common.cl',
                    'kernels/comm_advection_noVec.cl',
                    'kernels/minmax_buffers.cl']
+        print src
         prg = self.cl_env.build_src(
             src, build_options, 1)
         self.num_reduce_stage1_advec = KernelLauncher(
@@ -280,6 +281,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
         src = ['common.cl', 'remeshing/weights_noVec.cl',
                'kernels/comm_remeshing_noVec.cl',
                'kernels/minmax_buffers.cl']
+        print src
         prg = self.cl_env.build_src(
             src, build_options, 1)
         self.num_reduce_stage1_rmsh = KernelLauncher(
diff --git a/HySoP/setup.py.in b/HySoP/setup.py.in
index f771350fc..a2e61e470 100644
--- a/HySoP/setup.py.in
+++ b/HySoP/setup.py.in
@@ -112,7 +112,7 @@ if("@WITH_GPU@" is "ON"):
         data_files.append(('./parmepy/gpu/'+cl_dir,
                        ['@CMAKE_SOURCE_DIR@/parmepy/gpu/'+cl_dir+'/' + cl_file
                         for cl_file in os.listdir('@CMAKE_SOURCE_DIR@/parmepy/gpu/'+cl_dir+'/')
-                        if cl_file[0]!='.' and cl_file[-3:]=='.cl']))
+                        if cl_file[0]!='.' and cl_file[0]!='#' and cl_file[-3:]=='.cl']))
 
 config = Configuration(name=name,
       version='@PYPACKAGE_VERSION@',
-- 
GitLab