From 75018cdbea82f21774ecbc9126c6efa30e3a24ad Mon Sep 17 00:00:00 2001
From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@univ-reims.fr>
Date: Fri, 25 Jul 2014 15:25:19 +0200
Subject: [PATCH] Bring some optimisations for single GPU

---
 .gitignore                                    |   3 +-
 HySoP/hysop/.pyflymakercc                     | Bin 267 -> 0 bytes
 HySoP/hysop/gpu/cl_src/advection/basic_rk2.cl |   5 +-
 .../gpu/cl_src/advection/basic_rk2_noVec.cl   |   6 +-
 HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl |   5 +-
 .../gpu/cl_src/advection/basic_rk4_noVec.cl   |   7 +-
 .../hysop/gpu/cl_src/advection/builtin_rk2.cl |   5 +-
 .../gpu/cl_src/advection/builtin_rk2_noVec.cl |   7 +-
 .../hysop/gpu/cl_src/advection/builtin_rk4.cl |   5 +-
 .../gpu/cl_src/advection/builtin_rk4_noVec.cl |   6 +-
 .../cl_src/advection/comm_basic_rk2_noVec.cl  |  74 ++++
 .../advection/comm_builtin_rk2_noVec.cl       |  73 ++++
 .../gpu/cl_src/advection/velocity_cache.cl    | 388 +++++-------------
 .../cl_src/advection/velocity_cache_noVec.cl  |   7 +-
 HySoP/hysop/gpu/cl_src/common.cl              |  19 +
 HySoP/hysop/gpu/cl_src/kernels/advection.cl   |   2 +-
 .../cl_src/kernels/advection_and_remeshing.cl |   6 +-
 .../kernels/advection_and_remeshing_noVec.cl  |   6 +-
 .../advection_and_remeshing_vector_2d.cl      |  83 ----
 .../advection_and_remeshing_vector_3d.cl      |  90 ----
 .../gpu/cl_src/kernels/advection_noVec.cl     |   2 +-
 .../cl_src/kernels/comm_MS_advection_noVec.cl | 261 ++++++++++++
 .../cl_src/kernels/comm_advection_noVec.cl    | 161 ++++++++
 .../gpu/cl_src/kernels/comm_diffusion.cl      | 150 +++++++
 .../cl_src/kernels/comm_remeshing_noVec.cl    | 253 ++++++++++++
 HySoP/hysop/gpu/cl_src/kernels/diffusion.cl   | 119 ++++++
 .../gpu/cl_src/kernels/minmax_buffers.cl      |  84 ++++
 HySoP/hysop/gpu/cl_src/kernels/remeshing.cl   |   4 +-
 .../gpu/cl_src/kernels/remeshing_noVec.cl     |   4 +-
 HySoP/hysop/gpu/cl_src/remeshing/basic.cl     |   5 +-
 .../hysop/gpu/cl_src/remeshing/basic_noVec.cl |   5 +-
 .../cl_src/remeshing/basic_noVec_vector_2d.cl | 111 -----
 .../cl_src/remeshing/basic_noVec_vector_3d.cl | 121 ------
 .../gpu/cl_src/remeshing/basic_vector_2d.cl   | 111 -----
 .../gpu/cl_src/remeshing/basic_vector_3d.cl   | 121 ------
 .../gpu/cl_src/remeshing/comm_basic_noVec.cl  | 124 ++++++
 HySoP/hysop/gpu/cl_src/remeshing/private.cl   |   5 +-
 .../gpu/cl_src/remeshing/private_noVec.cl     |   5 +-
 .../gpu/cl_src/remeshing/private_vector_2d.cl | 112 -----
 .../gpu/cl_src/remeshing/private_vector_3d.cl | 122 ------
 HySoP/hysop/gpu/cl_src/remeshing/weights.cl   | 168 ++++----
 .../gpu/cl_src/remeshing/weights_builtin.cl   | 171 ++++----
 .../gpu/cl_src/remeshing/weights_noVec.cl     | 171 ++++----
 .../cl_src/remeshing/weights_noVec_builtin.cl | 169 ++++----
 HySoP/hysop/gpu/config_default.py             |   5 +
 HySoP/hysop/gpu/config_k20m.py                |   5 +
 HySoP/hysop/gpu/gpu_operator.py               |   2 +-
 HySoP/hysop/gpu/tools.py                      |  78 +++-
 HySoP/hysop/operator/monitors/printer.py      |   2 +-
 49 files changed, 1880 insertions(+), 1568 deletions(-)
 delete mode 100644 HySoP/hysop/.pyflymakercc
 create mode 100644 HySoP/hysop/gpu/cl_src/advection/comm_basic_rk2_noVec.cl
 create mode 100644 HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl
 delete mode 100644 HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_vector_2d.cl
 delete mode 100644 HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_vector_3d.cl
 create mode 100644 HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl
 create mode 100644 HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl
 create mode 100644 HySoP/hysop/gpu/cl_src/kernels/comm_diffusion.cl
 create mode 100644 HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl
 create mode 100644 HySoP/hysop/gpu/cl_src/kernels/diffusion.cl
 create mode 100644 HySoP/hysop/gpu/cl_src/kernels/minmax_buffers.cl
 delete mode 100644 HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl
 delete mode 100644 HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl
 delete mode 100644 HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl
 delete mode 100644 HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl
 create mode 100644 HySoP/hysop/gpu/cl_src/remeshing/comm_basic_noVec.cl
 delete mode 100644 HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl
 delete mode 100644 HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl

diff --git a/.gitignore b/.gitignore
index 2af93fbc7..97b084e5e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 *.pyc
-parmepy/__init__.py
\ No newline at end of file
+parmepy/__init__.py
+parmepy/.pyflymakercc
diff --git a/HySoP/hysop/.pyflymakercc b/HySoP/hysop/.pyflymakercc
deleted file mode 100644
index e53a15c69251dff9ade2f58f5c149de02b3225f2..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 267
zcmZSn%**v=^Tgm}1}I<#(hfjeECVD`7#PwS8KQs;Murq7h7@LobY_MqAe)6DSc4U)
zpo9fTxEh(6mVkIc29}1#FqVN4jAdl#r@;hLEC?h#-TnN7T;rYnU0j3X16%_vN`x^)
zBYiymLNtKF3<#pw70A^OElw>e)^{sP%uCMJ56(|3Dc1K*Ey>7FNi7aYEGo%NF3m~I
yEJ`ib2l8@(Q~`)8sMOahs7%YL%uUQrElMuo0Geczo1apelWGTYb}`6toQwb$-8%*V

diff --git a/HySoP/hysop/gpu/cl_src/advection/basic_rk2.cl b/HySoP/hysop/gpu/cl_src/advection/basic_rk2.cl
index 2068ac88c..39b7639b7 100644
--- a/HySoP/hysop/gpu/cl_src/advection/basic_rk2.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/basic_rk2.cl
@@ -3,7 +3,7 @@
  * Advection function, vectorized version, no use of builtins functions.
  */
 
-float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache);
+float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position);
 
 
 /**
@@ -22,7 +22,7 @@ float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __l
  * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
  * @see parmepy.gpu.tools.parse_file
  */
-float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache)
+float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position)
 {
   float__N__ v,        		/* Velocity at point */
     vp,				/* Velocity at right point */
@@ -34,6 +34,7 @@ float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __l
 
   c = (float__N__)((i+__NN__)*dx,
 		   );
+  c = c + min_position;
 
 #if V_NB_I == NB_I
   // single-scale:
diff --git a/HySoP/hysop/gpu/cl_src/advection/basic_rk2_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/basic_rk2_noVec.cl
index 9f19c4c74..d805563e2 100644
--- a/HySoP/hysop/gpu/cl_src/advection/basic_rk2_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/basic_rk2_noVec.cl
@@ -3,7 +3,7 @@
  * Advection function, basic version
  */
 
-float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache);
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position);
 
 
 /**
@@ -19,12 +19,12 @@ float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local
  *
  * @remark NB_I, NB_II, NB_III : points number in directions from 1st varying index to last.
  */
-float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache)
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position)
 {
   float v, 			/* Velocity at point */
     vp,				/* Velocity at right point */
     p,				/* Normalized intermediary position */
-    c = i * dx,			/* initial coordinate */
+    c = i * dx + min_position,  /* initial coordinate */
     hdt = 0.5 * dt;		/* half time step */
   int i_ind,			/* Interpolation left point */
     i_ind_p;			/* Interpolation right point */
diff --git a/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl b/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl
index 10a19921d..cc50ed3ce 100644
--- a/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/basic_rk4.cl
@@ -3,7 +3,7 @@
  * Advection function (RK4 scheme), vectorized version, no use of builtins functions.
  */
 
-float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache);
+float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position);
 
 
 /**
@@ -22,7 +22,7 @@ float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __l
  * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
  * @see parmepy.gpu.tools.parse_file
  */
-float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache)
+float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position)
 {
   float__N__ v,        		/* Velocity at point */
     vp,				/* Velocity at right point */
@@ -36,6 +36,7 @@ float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __l
 
   c = (float__N__)((i+__NN__)*dx,
 		   );
+  c = c + min_position;
 
   //k1 = f(t,y)
   //k2 = f(t + dt/2, y + dt/2 * k1)
diff --git a/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl
index 1314370e8..dbbe1f395 100644
--- a/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/basic_rk4_noVec.cl
@@ -3,7 +3,7 @@
  * Advection function (RK4 scheme), basic version
  */
 
-float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache);
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position);
 
 
 /**
@@ -19,14 +19,14 @@ float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local
  *
  * @remark NB_I, NB_II, NB_III : points number in directions from 1st varying index to last.
  */
-float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache)
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position)
 {
   float v, 			/* Velocity at point */
     vp,				/* Velocity at right point */
     p,				/* Intermediary position */
     k,				/* rk averaged velocity */
     kn,				/* rk intermediate velocity */
-    c = i * dx,			/* initial coordinate */
+    c = i * dx + min_position,  /* initial coordinate */
     hdt = 0.5 * dt;		/* half time step */
   int i_ind,			/* Interpolation left point */
     i_ind_p;			/* Interpolation right point */
@@ -91,4 +91,3 @@ float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local
 /*   - 3 iterpolation = 3 * 9 */
 /*   - velocity weights = 5*/
 /* Total = 41 */
-
diff --git a/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl b/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl
index 958cdd3d8..0a7ac4ac5 100644
--- a/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/builtin_rk2.cl
@@ -3,7 +3,7 @@
  * Advection function, vectorized version.
  */
 
-float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache);
+float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position);
 
 
 /**
@@ -22,7 +22,7 @@ float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __l
  * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
  * @see parmepy.gpu.tools.parse_file
  */
-float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache)
+float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position)
 {
   float__N__ v,        		/* Velocity at point */
     vp,				/* Velocity at right point */
@@ -34,6 +34,7 @@ float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __l
 
   c = (float__N__)((i+__NN__)*dx,
 		       );
+  c = c + min_position;
 
 #if V_NB_I == NB_I
   // single-scale:
diff --git a/HySoP/hysop/gpu/cl_src/advection/builtin_rk2_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/builtin_rk2_noVec.cl
index 76712a015..5e4e8dcd1 100644
--- a/HySoP/hysop/gpu/cl_src/advection/builtin_rk2_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/builtin_rk2_noVec.cl
@@ -3,7 +3,7 @@
  * Advection function, basic version
  */
 
-float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache);
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position);
 
 
 /**
@@ -19,11 +19,11 @@ float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local
  *
  * @remark NB_I, NB_II, NB_III : points number in directions from 1st varying index to last.
  */
-float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache)
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position)
 {
   float v, 			/* Velocity at point */
     p,				/* Intermediary position */
-    c = i * dx,			/* initial coordinate */
+    c = i * dx + min_position,	/* initial coordinate */
     hdt = 0.5 * dt;		/* half time step */
   int i_ind,			/* Interpolation left point */
     i_ind_p;			/* Interpolation right point */
@@ -58,4 +58,3 @@ float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local
 /*   - 2 positions = 2 * fma */
 /*   - 1 iterpolation = 6 + 1 * mix */
 /* Total = 2 fma + 1 mix + 6 */
-
diff --git a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl
index 0725c4148..18b5366cb 100644
--- a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4.cl
@@ -3,7 +3,7 @@
  * Advection function, vectorized version.
  */
 
-float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache);
+float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position);
 
 
 /**
@@ -22,7 +22,7 @@ float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __l
  * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
  * @see parmepy.gpu.tools.parse_file
  */
-float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache)
+float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position)
 {
   float__N__ v,        		/* Velocity at point */
     vp,				/* Velocity at right point */
@@ -36,6 +36,7 @@ float__N__ advection(uint i, float dt, float dx, float invdx, float v_invdx, __l
 
   c = (float__N__)((i+__NN__)*dx,
 		       );
+  c = c + min_position;
 
 #if V_NB_I == NB_I
   // single-scale:
diff --git a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl
index c5b786c55..bd92b34a5 100644
--- a/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/builtin_rk4_noVec.cl
@@ -3,7 +3,7 @@
  * Advection function, basic version
  */
 
-float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache);
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position);
 
 
 /**
@@ -19,12 +19,12 @@ float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local
  *
  * @remark NB_I, NB_II, NB_III : points number in directions from 1st varying index to last.
  */
-float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache)
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position)
 {
   float p,		       /* Intermediary position */
     k,			       /* rk averaged velocity */
     kn,			       /* rk intermediate velocity */
-    c = i * dx,		       /* initial coordinate */
+    c = i * dx + min_position, /* initial coordinate */
     hdt = 0.5 * dt;	       /* half time step */
   int i_ind,		       /* Interpolation left point */
     i_ind_p;		       /* Interpolation right point */
diff --git a/HySoP/hysop/gpu/cl_src/advection/comm_basic_rk2_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/comm_basic_rk2_noVec.cl
new file mode 100644
index 000000000..fcb2dff72
--- /dev/null
+++ b/HySoP/hysop/gpu/cl_src/advection/comm_basic_rk2_noVec.cl
@@ -0,0 +1,74 @@
+/**
+ * @file advection/comm_basic_noVec.cl
+ * Advection function, basic version, mpi communications on the host side
+ */
+
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position);
+
+
+/**
+ * Compute the position of a particle with a RK2 integration scheme. Velocity is linearly interpolated from the global field.
+ * Use of builtin OpenCL functions fma and mix.
+ *
+ * @param i Particle index.
+ * @param dt Time step.
+ * @param dx Space step.
+ * @param invdx 1/dx.
+ * @param velocity_cache Local velocity field.
+ * @return Particle position
+ *
+ * @remark NB_I, NB_II, NB_III : points number in directions from 1st varying index to last.
+ * @remark T_NB_I: global points number in the 1st direction (mpi cutted direction)
+ * @remark START_INDEX Global staring index for computational points
+ * @remark STOP_INDEX Global stop index for computational points
+ */
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position)
+{
+  float v, 			/* Velocity at point */
+    vp,				/* Velocity at right point */
+    p,				/* Normalized intermediary position */
+    c = i * dx + min_position, /* initial coordinate */
+    hdt = 0.5 * dt;		/* half time step */
+  int i_ind,			/* Interpolation left point */
+    i_ind_p;			/* Interpolation right point */
+
+#if (V_NB_I-2*V_GHOSTS_NB) == NB_I
+  // single-scale:
+  v = velocity_cache[noBC_id(i + V_GHOSTS_NB)]; 	/* k = k1 */
+#else
+  // multi-scale : interpolate v from velocity buffer (of length V_NB_I)
+  p = c * v_invdx;
+  i_ind = convert_int_rtn(p);
+  p = p - convert_float(i_ind);
+  i_ind = i_ind - (V_START_INDEX-V_GHOSTS_NB);
+  i_ind_p = i_ind + 1;
+  v = mix(velocity_cache[noBC_id(i_ind)],
+	  velocity_cache[noBC_id(i_ind_p)],p);
+#endif
+  p = (c + hdt*v) * v_invdx;
+
+  i_ind = convert_int_rtn(p);
+  if( i_ind>=(V_START_INDEX-MS_INTERPOL_SHIFT) && i_ind < (V_STOP_INDEX-V_GHOSTS_NB))
+    {
+      p = p - convert_float(i_ind);
+
+      i_ind = i_ind - (V_START_INDEX-V_GHOSTS_NB);
+      i_ind_p = i_ind + 1;
+
+      v = velocity_cache[noBC_id(i_ind)];
+      vp = velocity_cache[noBC_id(i_ind_p)];
+      v = (p*(vp-v) + v);
+
+      p = c + dt * v;
+    }
+  else
+    {
+      p = (1000*T_NB_I)*1.0 + p;
+    }
+
+  return p;
+}
+/* Operations number :  */
+/*   - 2 positions = 2 * 2 */
+/*   - 1 iterpolation = 9 */
+/* Total = 13 */
diff --git a/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl
new file mode 100644
index 000000000..66e1eff53
--- /dev/null
+++ b/HySoP/hysop/gpu/cl_src/advection/comm_builtin_rk2_noVec.cl
@@ -0,0 +1,73 @@
+/**
+ * @file comm_builtin_noVec.cl
+ * Advection function, basic version, mpi communications on the host side
+ */
+
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position);
+
+
+/**
+ * Compute the position of a particle with a RK2 integration scheme. Velocity is linearly interpolated from the global field.
+ * Use of builtin OpenCL functions fma and mix.
+ *
+ * @param i Particle index (without velocity ghosts considering).
+ * @param dt Time step.
+ * @param dx Space step.
+ * @param invdx 1/dx.
+ * @param velocity_cache Local velocity field.
+ * @return Particle position
+ *
+ * @remark NB_I, NB_II, NB_III : points number in directions from 1st varying index to last.
+ * @remark T_NB_I: global points number in the 1st direction (mpi cutted direction)
+ * @remark START_INDEX Global staring index for computational points
+ * @remark STOP_INDEX Global stop index for computational points
+ */
+float advection(uint i, float dt, float dx, float invdx, float v_invdx, __local float* velocity_cache, float min_position)
+{
+  float v, 			/* Velocity at point */
+    p,				/* Intermediary position */
+    c = i * dx + min_position,  /* initial coordinate */
+    hdt = 0.5 * dt;		/* half time step */
+  int i_ind,			/* Interpolation left point */
+    i_ind_p;			/* Interpolation right point */
+
+#if (V_NB_I-2*V_GHOSTS_NB) == NB_I
+  // single scale:
+  v = velocity_cache[noBC_id(i + V_GHOSTS_NB)];
+#else
+  // multi-scale : interpolate v from velocity buffer (of length V_NB_I)
+  p = c * v_invdx;
+  i_ind = convert_int_rtn(p);
+  p = p - convert_float(i_ind);
+  i_ind = i_ind - (V_START_INDEX-V_GHOSTS_NB);
+  i_ind_p = i_ind + 1;
+  v = mix(velocity_cache[noBC_id(i_ind)],
+  	  velocity_cache[noBC_id(i_ind_p)],p);
+#endif
+
+  p = fma(hdt, v, c) * v_invdx;
+  i_ind = convert_int_rtn(p);
+  if( i_ind>=(V_START_INDEX-MS_INTERPOL_SHIFT) && i_ind < (V_STOP_INDEX-V_GHOSTS_NB))
+    {
+      p = p - convert_float(i_ind);
+
+      i_ind = i_ind - (V_START_INDEX-V_GHOSTS_NB);
+      i_ind_p = i_ind + 1;
+
+      v = mix(velocity_cache[noBC_id(i_ind)],
+      	      velocity_cache[noBC_id(i_ind_p)],p);
+
+      p = fma(dt, v, c);
+    }
+  else
+    {
+      p = (1000*T_NB_I)*1.0 + p;
+    }
+
+  return p;
+}
+/* Operations number :  */
+/*   - 2 positions = 2 * fma */
+/*   - 1 iterpolation = 6 + 1 * mix */
+/* Total = 2 fma + 1 mix + 6 */
+
diff --git a/HySoP/hysop/gpu/cl_src/advection/velocity_cache.cl b/HySoP/hysop/gpu/cl_src/advection/velocity_cache.cl
index 9ecd87b49..bfe728c28 100644
--- a/HySoP/hysop/gpu/cl_src/advection/velocity_cache.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/velocity_cache.cl
@@ -3,46 +3,6 @@ void fill_velocity_cache(__global const float* gvelo,
 			 float4 dx, float4 v_dx,
 			 __local float* gvelo_loc);
 
-
-inline float alpha_l2_1(float y){
-  return ((y * (y * (-y + 2.0) - 1.0)) / 2.0);}
-inline float beta_l2_1(float y){
-  return ((y * y * (3.0 * y - 5.0) + 2.0) / 2.0);}
-inline float gamma_l2_1(float y){
-  return ((y * (y * (-3.0 * y + 4.0) + 1.0)) / 2.0);}
-inline float delta_l2_1(float y){
-  return ((y * y * (y - 1.0)) / 2.0);}
-
-
-inline float alpha_l4_2(float y){
-  return ((y * (y * (y * (y * (-5.0 * y + 13.0) - 9.0) - 1.0) + 2.0)) / 24.0);}
-inline float beta_l4_2(float y){
-  return ((y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0)) / 24.0);}
-inline float gamma_l4_2(float y){
-  return ((y * y * (y * (y * (-50.0 * y + 126.0) - 70.0) - 30.0) + 24.0) / 24.0);}
-inline float delta_l4_2(float y){
-  return ((y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0)) / 24.0);}
-inline float eta_l4_2(float y){
-  return ((y * (y * (y * (y * (-25.0 * y + 61.0) - 33.0) - 1.0) - 2.0)) / 24.0);}
-inline float zeta_l4_2(float y){
-  return ((y * y * y * (y * (5.0 * y - 12.0) + 7.0)) / 24.0);}
-
-
-inline float alpha_l4_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-46.0 * y + 207.0) - 354.0) + 273.0) - 80.0) + 1.0) - 2.0) - 1.0) + 2.0)) / 24.0);}
-inline float beta_l4_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (230.0 * y - 1035.0) + 1770.0) - 1365.0) + 400.0) - 4.0) + 4.0) + 16.0) - 16.0)) / 24.0);}
-inline float gamma_l4_4(float y){
-  return ((y * y * (y * y * (y * (y * (y * (y * (-460.0 * y + 2070.0) - 3540.0) + 2730.0) - 800.0) + 6.0) - 30.0) + 24.0) / 24.0);}
-inline float delta_l4_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (460.0 * y - 2070.0) + 3540.0) - 2730.0) + 800.0) - 4.0) - 4.0) + 16.0) + 16.0)) / 24.0);}
-inline float eta_l4_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-230.0 * y + 1035.0) - 1770.0) + 1365.0) - 400.0) + 1.0) + 2.0) - 1.0) - 2.0)) / 24.0);}
-inline float zeta_l4_4(float y){
-  return ((y * y * y * y * y * (y * (y * (y * (46.0 * y - 207.0) + 354.0) - 273.0) + 80.0)) / 24.0);}
-
-/*** TODO: correct this file to work properly with vector enable remehsing weights ***/
-
 void fill_velocity_cache(__global const float* gvelo,
 			 uint gidX, uint gidY, uint gidZ,
 			 float4 dx, float4 v_dx,
@@ -50,10 +10,10 @@ void fill_velocity_cache(__global const float* gvelo,
 {
   uint i;
   float__N__ v;
-#if V_NB_I == NB_I
+#if (V_NB_I-2*V_GHOSTS_NB) == NB_I
   // Single scale : Velocity and scalar grids are identical : cache is just read from global
-  uint line_index = gidY*NB_I + gidZ*NB_I*NB_II; /* Current 1D problem index */
-  for(i=gidX*__N__; i<NB_I; i+=(WI_NB*__N__))
+  uint line_index = gidY*V_NB_I + gidZ*V_NB_I*V_NB_II; /* Current 1D problem index */
+  for(i=gidX*__N__; i<V_NB_I; i+=(WI_NB*__N__))
     {
       /* Read velocity */
       v = vload__N__((i+line_index)/__N__, gvelo);
@@ -88,76 +48,41 @@ void fill_velocity_cache(__global const float* gvelo,
 #if MS_FORMULA == MS_LINEAR
   wY.s1 = hY;
   wY.s0 = 1.0 - wY.s1;
-#elif MS_FORMULA == MS_L2_1
-  wY.s0 = alpha_l2_1(hY);
-  wY.s1 = beta_l2_1(hY);
-  wY.s2 = gamma_l2_1(hY);
-  wY.s3 = 1.0 - wY.s0 - wY.s1 - wY.s2;
-#elif MS_FORMULA == MS_L4_2
-  wY.s0 = alpha_l4_2(hY);
-  wY.s1 = beta_l4_2(hY);
-  wY.s2 = gamma_l4_2(hY);
-  wY.s3 = delta_l4_2(hY);
-  wY.s4 = eta_l4_2(hY);
-  wY.s5 = 1.0 - wY.s0 - wY.s1 - wY.s2 - wY.s3 - wY.s4;
-#elif MS_FORMULA == MS_l4_4
-  wY.s0 = alpha_l4_4(hY);
-  wY.s1 = beta_l4_4(hY);
-  wY.s2 = gamma_l4_4(hY);
-  wY.s3 = delta_l4_4(hY);
-  wY.s4 = eta_l4_4(hY);
+#else
+  wY.s0 = MS_INTERPOL(alpha)(hY);
+  wY.s1 = MS_INTERPOL(beta)(hY);
+  wY.s2 = MS_INTERPOL(gamma)(hY);
+#if MS_INTERPOL_SHIFT > 1
+  wY.s3 = MS_INTERPOL(delta)(hY);
+  wY.s4 = MS_INTERPOL(eta)(hY);
   wY.s5 = 1.0 - wY.s0 - wY.s1 - wY.s2 - wY.s3 - wY.s4;
+#else
+  wY.s3 = 1.0 - wY.s0 - wY.s1 - wY.s2;
+#endif
 #endif
 
-  indY = indY + V_GHOSTS_NB;
+  indY = indY + V_GHOSTS_NB - MS_INTERPOL_SHIFT;
 
-#if MS_FORMULA == MS_LINEAR
   v_line_index.s0 = indY * V_NB_I;
   v_line_index.s1 = (indY + 1) * V_NB_I;
-#elif MS_FORMULA == MS_L2_1
-  v_line_index.s0 = (indY - 1) * V_NB_I;
-  v_line_index.s1 = (indY) * V_NB_I;
-  v_line_index.s2 = (indY + 1) * V_NB_I;
-  v_line_index.s3 = (indY + 2) * V_NB_I;
-#elif MS_FORMULA == MS_L4_2 || MS_FORMULA == MS_L4_4
-  v_line_index.s0 = (indY - 2) * V_NB_I;
-  v_line_index.s1 = (indY - 1) * V_NB_I;
-  v_line_index.s2 = (indY) * V_NB_I;
-  v_line_index.s3 = (indY + 1) * V_NB_I;
-  v_line_index.s4 = (indY + 2) * V_NB_I;
-  v_line_index.s5 = (indY + 3) * V_NB_I;
+#if MS_INTERPOL_SHIFT > 0
+  v_line_index.s2 = (indY + 2) * V_NB_I;
+  v_line_index.s3 = (indY + 3) * V_NB_I;
+#elif MS_INTERPOL_SHIFT > 1
+  v_line_index.s4 = (indY + 4) * V_NB_I;
+  v_line_index.s5 = (indY + 5) * V_NB_I;
 #endif
 
-
   for(i=gidX*__N__; i<V_NB_I; i+=(WI_NB*__N__))
     {
-#if MS_FORMULA == MS_LINEAR
-      v = vload__N__((i+v_line_index.s0)/__N__, gvelo) * wY.s0;
-      velocity_cache[noBC_id(i+__NN__)] = v.s__NN__;
-      v = vload__N__((i+v_line_index.s1)/__N__, gvelo) * wY.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-#elif MS_FORMULA == MS_L2_1
-      v = vload__N__((i+v_line_index.s0)/__N__, gvelo) * wY.s0;
-      velocity_cache[noBC_id(i+__NN__)] = v.s__NN__;
-      v = vload__N__((i+v_line_index.s1)/__N__, gvelo) * wY.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i+v_line_index.s2)/__N__, gvelo) * wY.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i+v_line_index.s3)/__N__, gvelo) * wY.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-#elif MS_FORMULA == MS_L4_2 || MS_FORMULA == MS_L4_4
-      v = vload__N__((i+v_line_index.s0)/__N__, gvelo) * wY.s0;
-      velocity_cache[noBC_id(i+__NN__)] = v.s__NN__;
-      v = vload__N__((i+v_line_index.s1)/__N__, gvelo) * wY.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i+v_line_index.s2)/__N__, gvelo) * wY.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i+v_line_index.s3)/__N__, gvelo) * wY.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i+v_line_index.s4)/__N__, gvelo) * wY.s4;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i+v_line_index.s5)/__N__, gvelo) * wY.s5;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
+    gvelo_loc[noBC_id(i)] = wY.s0 * gvelo[i + v_line_index.s0];
+    gvelo_loc[noBC_id(i)] += wY.s1 * gvelo[i + v_line_index.s1];
+#if MS_INTERPOL_SHIFT > 0
+    gvelo_loc[noBC_id(i)] += wY.s2 * gvelo[i + v_line_index.s2];
+    gvelo_loc[noBC_id(i)] += wY.s3 * gvelo[i + v_line_index.s3];
+#elif MS_INTERPOL_SHIFT > 1
+    gvelo_loc[noBC_id(i)] += wY.s4 * gvelo[i + v_line_index.s4];
+    gvelo_loc[noBC_id(i)] += wY.s5 * gvelo[i + v_line_index.s5];
 #endif
     }
 
@@ -191,203 +116,94 @@ void fill_velocity_cache(__global const float* gvelo,
   wY.s0 = 1.0 - wY.s1;
   wZ.s1 = hZ;
   wZ.s0 = 1.0 - wZ.s1;
-#elif MS_FORMULA == MS_L2_1
-  wY.s0 = alpha_l2_1(hY);
-  wY.s1 = beta_l2_1(hY);
-  wY.s2 = gamma_l2_1(hY);
-  wY.s3 = 1.0 - wY.s0 - wY.s1 - wY.s2;
-  wZ.s0 = alpha_l2_1(hZ);
-  wZ.s1 = beta_l2_1(hZ);
-  wZ.s2 = gamma_l2_1(hZ);
-  wZ.s3 = 1.0 - wZ.s0 - wZ.s1 - wZ.s2;
-#elif MS_FORMULA == MS_L4_2
-  wY.s0 = alpha_l4_2(hY);
-  wY.s1 = beta_l4_2(hY);
-  wY.s2 = gamma_l4_2(hY);
-  wY.s3 = delta_l4_2(hY);
-  wY.s4 = eta_l4_2(hY);
-  wY.s5 = 1.0 - wY.s0 - wY.s1 - wY.s2 - wY.s3 - wY.s4;
-  wZ.s0 = alpha_l4_2(hZ);
-  wZ.s1 = beta_l4_2(hZ);
-  wZ.s2 = gamma_l4_2(hZ);
-  wZ.s3 = delta_l4_2(hZ);
-  wZ.s4 = eta_l4_2(hZ);
-  wZ.s5 = 1.0 - wZ.s0 - wZ.s1 - wZ.s2 - wZ.s3 - wZ.s4;
-#elif MS_FORMULA == MS_L4_4
-  wY.s0 = alpha_l4_4(hY);
-  wY.s1 = beta_l4_4(hY);
-  wY.s2 = gamma_l4_4(hY);
-  wY.s3 = delta_l4_4(hY);
-  wY.s4 = eta_l4_4(hY);
+#else
+  wY.s0 = MS_INTERPOL(alpha)(hY);
+  wY.s1 = MS_INTERPOL(beta)(hY);
+  wY.s2 = MS_INTERPOL(gamma)(hY);
+  wZ.s0 = MS_INTERPOL(alpha)(hZ);
+  wZ.s1 = MS_INTERPOL(beta)(hZ);
+  wZ.s2 = MS_INTERPOL(gamma)(hZ);
+#if MS_INTERPOL_SHIFT > 1
+  wY.s3 = MS_INTERPOL(delta)(hY);
+  wY.s4 = MS_INTERPOL(eta)(hY);
   wY.s5 = 1.0 - wY.s0 - wY.s1 - wY.s2 - wY.s3 - wY.s4;
-  wZ.s0 = alpha_l4_4(hZ);
-  wZ.s1 = beta_l4_4(hZ);
-  wZ.s2 = gamma_l4_4(hZ);
-  wZ.s3 = delta_l4_4(hZ);
-  wZ.s4 = eta_l4_4(hZ);
+  wZ.s3 = MS_INTERPOL(delta)(hZ);
+  wZ.s4 = MS_INTERPOL(eta)(hZ);
   wZ.s5 = 1.0 - wZ.s0 - wZ.s1 - wZ.s2 - wZ.s3 - wZ.s4;
+#else
+  wY.s3 = 1.0 - wY.s0 - wY.s1 - wY.s2;
+  wZ.s3 = 1.0 - wZ.s0 - wZ.s1 - wZ.s2;
+#endif
 #endif
 
-  indY = indY + V_GHOSTS_NB;
-  indZ = indZ + V_GHOSTS_NB;
+ indY = indY + V_GHOSTS_NB - MS_INTERPOL_SHIFT;
+ indZ = indZ + V_GHOSTS_NB - MS_INTERPOL_SHIFT;
 
-#if MS_FORMULA == MS_LINEAR
   v_line_indexY.s0 = indY * V_NB_I;
   v_line_indexY.s1 = (indY + 1) * V_NB_I;
   v_line_indexZ.s0 = indZ * V_NB_I * V_NB_II;
   v_line_indexZ.s1 = (indZ + 1) * V_NB_I * V_NB_II;
-#elif MS_FORMULA == MS_L2_1
-  v_line_indexY.s0 = (indY - 1) * V_NB_I;
-  v_line_indexY.s1 = (indY) * V_NB_I;
-  v_line_indexY.s2 = (indY + 1) * V_NB_I;
-  v_line_indexY.s3 = (indY + 2) * V_NB_I;
-  v_line_indexZ.s0 = (indZ - 1) * V_NB_I * V_NB_II;
-  v_line_indexZ.s1 = (indZ) * V_NB_I * V_NB_II;
-  v_line_indexZ.s2 = (indZ + 1) * V_NB_I * V_NB_II;
-  v_line_indexZ.s3 = (indZ + 2) * V_NB_I * V_NB_II;
-#elif MS_FORMULA == MS_L4_2 || MS_FORMULA == MS_L4_4
-  v_line_indexY.s0 = (indY - 2) * V_NB_I;
-  v_line_indexY.s1 = (indY - 1) * V_NB_I;
-  v_line_indexY.s2 = (indY) * V_NB_I;
-  v_line_indexY.s3 = (indY + 1) * V_NB_I;
-  v_line_indexY.s4 = (indY + 2) * V_NB_I;
-  v_line_indexY.s5 = (indY + 3) * V_NB_I;
-  v_line_indexZ.s0 = (indZ - 2) * V_NB_I * V_NB_II;
-  v_line_indexZ.s1 = (indZ - 1) * V_NB_I * V_NB_II;
-  v_line_indexZ.s2 = (indZ) * V_NB_I * V_NB_II;
-  v_line_indexZ.s3 = (indZ + 1) * V_NB_I * V_NB_II;
-  v_line_indexZ.s4 = (indZ + 2) * V_NB_I * V_NB_II;
-  v_line_indexZ.s5 = (indZ + 3) * V_NB_I * V_NB_II;
+#if MS_INTERPOL_SHIFT > 0
+  v_line_indexY.s2 = (indY + 2) * V_NB_I;
+  v_line_indexY.s3 = (indY + 3) * V_NB_I;
+  v_line_indexZ.s2 = (indZ + 2) * V_NB_I * V_NB_II;
+  v_line_indexZ.s3 = (indZ + 3) * V_NB_I * V_NB_II;
+#elif MS_INTERPOL_SHIFT > 1
+  v_line_indexY.s4 = (indY + 4) * V_NB_I;
+  v_line_indexY.s5 = (indY + 5) * V_NB_I;
+  v_line_indexZ.s4 = (indZ + 4) * V_NB_I * V_NB_II;
+  v_line_indexZ.s5 = (indZ + 5) * V_NB_I * V_NB_II;
 #endif
 
 
   for(i=gidX*__N__; i<V_NB_I; i+=(WI_NB*__N__))
     {
-#if MS_FORMULA == MS_LINEAR
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s0)/__N__, gvelo) * wY.s0 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] = v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s1)/__N__, gvelo) * wY.s0 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s0)/__N__, gvelo) * wY.s1 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s1)/__N__, gvelo) * wY.s1 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-
-#elif MS_FORMULA == MS_L2_1
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s0)/__N__, gvelo) * wY.s0 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] = v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s1)/__N__, gvelo) * wY.s0 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s2)/__N__, gvelo) * wY.s0 * wZ.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s3)/__N__, gvelo) * wY.s0 * wZ.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s0)/__N__, gvelo) * wY.s1 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s1)/__N__, gvelo) * wY.s1 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s2)/__N__, gvelo) * wY.s1 * wZ.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s3)/__N__, gvelo) * wY.s1 * wZ.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-
-      v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s0)/__N__, gvelo) * wY.s2 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s1)/__N__, gvelo) * wY.s2 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s2)/__N__, gvelo) * wY.s2 * wZ.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s3)/__N__, gvelo) * wY.s2 * wZ.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-
-      v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s0)/__N__, gvelo) * wY.s3 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s1)/__N__, gvelo) * wY.s3 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s2)/__N__, gvelo) * wY.s3 * wZ.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s3)/__N__, gvelo) * wY.s3 * wZ.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-
-#elif MS_FORMULA == MS_L4_2 || MS_FORMULA == MS_L4_4
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s0)/__N__, gvelo) * wY.s0 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] = v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s1)/__N__, gvelo) * wY.s0 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s2)/__N__, gvelo) * wY.s0 * wZ.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s3)/__N__, gvelo) * wY.s0 * wZ.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s4)/__N__, gvelo) * wY.s0 * wZ.s4;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s5)/__N__, gvelo) * wY.s0 * wZ.s5;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s0)/__N__, gvelo) * wY.s1 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s1)/__N__, gvelo) * wY.s1 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s2)/__N__, gvelo) * wY.s1 * wZ.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s3)/__N__, gvelo) * wY.s1 * wZ.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s4)/__N__, gvelo) * wY.s1 * wZ.s4;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s5)/__N__, gvelo) * wY.s1 * wZ.s5;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-
-      v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s0)/__N__, gvelo) * wY.s2 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s1)/__N__, gvelo) * wY.s2 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s2)/__N__, gvelo) * wY.s2 * wZ.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s3)/__N__, gvelo) * wY.s2 * wZ.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s4)/__N__, gvelo) * wY.s2 * wZ.s4;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s5)/__N__, gvelo) * wY.s2 * wZ.s5;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-
-      v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s0)/__N__, gvelo) * wY.s3 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s1)/__N__, gvelo) * wY.s3 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s2)/__N__, gvelo) * wY.s3 * wZ.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s3)/__N__, gvelo) * wY.s3 * wZ.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s4)/__N__, gvelo) * wY.s3 * wZ.s4;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s5)/__N__, gvelo) * wY.s3 * wZ.s5;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-
-      v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s0)/__N__, gvelo) * wY.s4 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s1)/__N__, gvelo) * wY.s4 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s2)/__N__, gvelo) * wY.s4 * wZ.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s3)/__N__, gvelo) * wY.s4 * wZ.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s4)/__N__, gvelo) * wY.s4 * wZ.s4;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s5)/__N__, gvelo) * wY.s4 * wZ.s5;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-
-      v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s0)/__N__, gvelo) * wY.s5 * wZ.s0;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s1)/__N__, gvelo) * wY.s5 * wZ.s1;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s2)/__N__, gvelo) * wY.s5 * wZ.s2;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s3)/__N__, gvelo) * wY.s5 * wZ.s3;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s4)/__N__, gvelo) * wY.s5 * wZ.s4;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
-      v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s5)/__N__, gvelo) * wY.s5 * wZ.s5;
-      velocity_cache[noBC_id(i+__NN__)] += v.s__NN__;
+    gvelo_loc[noBC_id(i)] = wY.s0 * wZ.s0 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s0];
+    gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s1 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s1];
+    gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s0 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s0];
+    gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s1 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s1];
+#if MS_INTERPOL_SHIFT > 0
+    gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s2 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s2];
+    gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s3 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s3];
+
+    gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s2 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s2];
+    gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s3 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s3];
+
+    gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s0 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s0];
+    gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s1 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s1];
+    gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s2 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s2];
+    gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s3 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s3];
+
+    gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s0 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s0];
+    gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s1 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s1];
+    gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s2 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s2];
+    gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s3 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s3];
+#elif MS_INTERPOL_SHIFT > 1
+    gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s4 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s4];
+    gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s5 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s5];
+
+    gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s4 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s4];
+    gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s5 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s5];
+
+    gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s4 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s4];
+    gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s5 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s5];
+
+    gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s4 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s4];
+    gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s5 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s5];
+
+    gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s0 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s0];
+    gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s1 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s1];
+    gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s2 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s2];
+    gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s3 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s3];
+    gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s4 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s4];
+    gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s5 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s5];
+
+    gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s0 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s0];
+    gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s1 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s1];
+    gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s2 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s2];
+    gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s3 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s3];
+    gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s4 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s4];
+    gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s5 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s5];
 #endif
     }
 #endif
diff --git a/HySoP/hysop/gpu/cl_src/advection/velocity_cache_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/velocity_cache_noVec.cl
index 60a19c0ca..c90cff973 100644
--- a/HySoP/hysop/gpu/cl_src/advection/velocity_cache_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/advection/velocity_cache_noVec.cl
@@ -13,17 +13,16 @@ void fill_velocity_cache(__global const float* gvelo,
   // ********************************
   // **    Single Scale
   // ********************************
-#if V_NB_I == NB_I
+#if (V_NB_I-2*V_GHOSTS_NB) == NB_I
   // Single scale : Velocity and scalar grids are identical : cache is just read from global
-  uint line_index = gidY*NB_I + gidZ*NB_I*NB_II; /* Current 1D problem index */
-  for(i=gidX; i<NB_I; i+=(WI_NB))
+  uint line_index = gidY*V_NB_I + gidZ*V_NB_I*V_NB_II; /* Current 1D problem index */
+  for(i=gidX; i<V_NB_I; i+=(WI_NB))
     {
       /* Read velocity */
       /* Fill velocity cache */
       gvelo_loc[noBC_id(i)] = gvelo[i+line_index];
     }
 
-
   // ********************************
   // **    Multi-Scale
   // ********************************
diff --git a/HySoP/hysop/gpu/cl_src/common.cl b/HySoP/hysop/gpu/cl_src/common.cl
index d482af6bb..34a7ec985 100644
--- a/HySoP/hysop/gpu/cl_src/common.cl
+++ b/HySoP/hysop/gpu/cl_src/common.cl
@@ -124,4 +124,23 @@ inline uint noBC_id(int id){
 #elif MS_FORMULA == L4_4
 #define MS_INTERPOL_SHIFT 2
 #define MS_INTERPOL(greek) greek##_l4_4
+#else
+//Default case for single-scale (only used in comm advection)
+#define MS_INTERPOL_SHIFT 0
 #endif
+
+/*
+a minmax element is a 12 int defined as follows:
+*/
+#define L_MIN_X 0
+#define L_MAX_X 1
+#define L_MIN_Y 2
+#define L_MAX_Y 3
+#define L_MIN_Z 4
+#define L_MAX_Z 5
+#define R_MIN_X 6
+#define R_MAX_X 7
+#define R_MIN_Y 8
+#define R_MAX_Y 9
+#define R_MIN_Z 10
+#define R_MAX_Z 11
diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection.cl b/HySoP/hysop/gpu/cl_src/kernels/advection.cl
index 20f0dc22c..538126459 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/advection.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/advection.cl
@@ -45,7 +45,7 @@ __kernel void advection_kernel(__global const float* gvelo,
   for(i=gidX*__N__; i<NB_I; i+=WI_NB*__N__)
     {
       /* Compute position */
-      p = advection(i, dt, dx.x, invdx, v_invdx, velocity_cache) + (float__N__)(min_position);
+      p = advection(i, dt, dx.x, invdx, v_invdx, velocity_cache, min_position);
       /* Store result */
       vstore__N__(p, (i+line_index)/__N__, ppos);
     }
diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl
index a204fcb31..21fe73008 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing.cl
@@ -31,7 +31,7 @@ __kernel void advection_and_remeshing(__global const float* gvelo,
 				      __RCOMP_P__global float* gscal__ID__,
 				      __local float* velocity_cache,
 				      __RCOMP_P__local float* gscal_loc__ID__,
-				      float dt,float min_position, float4 dx, float4 v_dx)
+				      float dt, float min_position, float4 dx, float4 v_dx)
 {
   uint gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
   uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
@@ -61,9 +61,9 @@ __kernel void advection_and_remeshing(__global const float* gvelo,
       /* Read Particle scalar */
       __RCOMP_Is__ID__ = vload__N__((i + line_index)/__N__, pscal__ID__);
       /* Compute particle position */
-      p = advection(i, dt, dx.x, invdx, v_invdx, velocity_cache);
+      p = advection(i, dt, dx.x, invdx, v_invdx, velocity_cache, min_position);
       /* Remesh particle */
-      remesh(i, dx.x, invdx, __RCOMP_Ps__ID__, p, __RCOMP_Pgscal_loc__ID__);
+      remesh(i, dx.x, invdx, __RCOMP_Ps__ID__, p, min_position, __RCOMP_Pgscal_loc__ID__);
     }
 
   /* Synchronize work-group */
diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl
index 454e8bd23..1b68b2925 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_noVec.cl
@@ -31,7 +31,7 @@ __kernel void advection_and_remeshing(__global const float* gvelo,
 				      __RCOMP_P__global float* gscal__ID__,
 				      __local float* gvelo_loc,
 				      __RCOMP_P__local float* gscal_loc__ID__,
-				      float dt,float min_position, float4 dx, float4 v_dx)
+				      float dt, float min_position, float4 dx, float4 v_dx)
 {
   uint gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
   uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
@@ -61,9 +61,9 @@ __kernel void advection_and_remeshing(__global const float* gvelo,
       /* Read Particle scalar */
       __RCOMP_Is__ID__ = pscal__ID__[i + line_index];
       /* Compute particle position */
-      p = advection(i, dt, dx.x, invdx, v_invdx, gvelo_loc);
+      p = advection(i, dt, dx.x, invdx, v_invdx, gvelo_loc, min_position);
       /* Remesh particle */
-      remesh(i, dx.x, invdx, __RCOMP_Ps__ID__, p, __RCOMP_Pgscal_loc__ID__);
+      remesh(i, dx.x, invdx, __RCOMP_Ps__ID__, p, min_position, __RCOMP_Pgscal_loc__ID__);
     }
 
   /* Synchronize work-group */
diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_vector_2d.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_vector_2d.cl
deleted file mode 100644
index 10a605498..000000000
--- a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_vector_2d.cl
+++ /dev/null
@@ -1,83 +0,0 @@
-/**
- * @file advection_and_remeshing_vector_2d.cl
- * Advection and remeshing kernel for 2D vector advection.
- */
-
-/**
- * Performs advection and then remeshing of the particles' vector.
- * A work-group is handling a 1D problem. Thus, gidY and gidZ are constants among work-items of a work-group.
- * Each work-item computes NB_I/WI_NB particles positions. To avoid concurrent witings, in case of strong velocity gradients, work-items computes contiguous particles.
- * Particle are computed through OpenCL vector types of lenght 2, 4 or 8.
- * Scalar results are stored in a local buffer as a cache and then copied to global memory buffer.
- *
- * @param gvelo Velocity field
- * @param pscal Particle scalar
- * @param gscal Grid scalar
- * @param dt Time step
- * @param min_position Domain lower coordinate
- * @param dx Space step
- *
- * @remark <code>NB_I</code>, <code>NB_II</code>, <code>NB_III</code> : points number in directions from 1st varying index to last.
- * @remark <code>WI_NB</code> corresponds to the work-item number.
- * @remark <code>__N__</code> is expanded at compilation time by vector width.
- * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
- * @see parmepy.gpu.tools.parse_file
- */
-__kernel void advection_and_remeshing(__global const float* gvelo,
-				      __global const float* pvec_X,
-				      __global const float* pvec_Y,
-				      __global float* gvec_X,
-				      __global float* gvec_Y,
-				      float dt,float min_position, float dx)
-{
-  uint gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
-  uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
-  uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  float invdx = 1.0/dx;		/* Space step inverse */
-  uint i;			/* Particle index in 1D problem */
-  float__N__ p,			/* Particle position */
-    pv_X, pv_Y,			/* Particle vector */
-    v;				/* Particle velocity */
-  uint line_index = gidY*NB_I+ gidZ*NB_I*NB_II; /* Current 1D problem index */
-
-  __local float gvec_X_loc[NB_I]; /* Local buffer for result */
-  __local float gvec_Y_loc[NB_I]; /* Local buffer for result */
-  __local float gvelo_loc[NB_I]; /* Velocity cache */
-
-  for(i=gidX*__N__; i<NB_I; i+=(WI_NB*__N__))
-    {
-      /* Read velocity */
-      v = vload__N__((i+line_index)/__N__, gvelo);
-      /* Fill velocity cache */
-      gvelo_loc[noBC_id(i+__NN__)] = v.s__NN__;
-      /* Initialize result buffer */
-      gvec_X_loc[noBC_id(i+__NN__)] = 0.0;
-      gvec_Y_loc[noBC_id(i+__NN__)] = 0.0;
-    }
-
-  /* Synchronize work-group */
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  for(i=gidX*PART_NB_PER_WI; i<(gidX + 1)*PART_NB_PER_WI; i+=__N__)
-    {
-      /* Read Particle scalar */
-      pv_X = vload__N__((i + line_index)/__N__, pvec_X);
-      pv_Y = vload__N__((i + line_index)/__N__, pvec_Y);
-      /* Compute particle position */
-      p = advection(i, dt, dx, invdx, gvelo_loc);
-      /* Remesh particle */
-      remesh(i, dx, invdx, pv_X, pv_Y, p, gvec_X_loc, gvec_Y_loc);
-    }
-
-  /* Synchronize work-group */
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  for(i=gidX*__N__; i<NB_I; i+=(WI_NB*__N__))
-    {
-      /* Store result */
-      vstore__N__((float__N__)(gvec_X_loc[noBC_id(i+__NN__)],
-			       ), (i + line_index)/__N__, gvec_X);
-      vstore__N__((float__N__)(gvec_Y_loc[noBC_id(i+__NN__)],
-			       ), (i + line_index)/__N__, gvec_Y);
-    }
-}
diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_vector_3d.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_vector_3d.cl
deleted file mode 100644
index abbdbe006..000000000
--- a/HySoP/hysop/gpu/cl_src/kernels/advection_and_remeshing_vector_3d.cl
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * @file advection_and_remeshing_vector_3d.cl
- * Advection and remeshing kernel for 3D vector advection.
- */
-
-/**
- * Performs advection and then remeshing of the particles' vector.
- * A work-group is handling a 1D problem. Thus, gidY and gidZ are constants among work-items of a work-group.
- * Each work-item computes NB_I/WI_NB particles positions. To avoid concurrent witings, in case of strong velocity gradients, work-items computes contiguous particles.
- * Particle are computed through OpenCL vector types of lenght 2, 4 or 8.
- * Scalar results are stored in a local buffer as a cache and then copied to global memory buffer.
- *
- * @param gvelo Velocity field
- * @param pscal Particle scalar
- * @param gscal Grid scalar
- * @param dt Time step
- * @param min_position Domain lower coordinate
- * @param dx Space step
- *
- * @remark <code>NB_I</code>, <code>NB_II</code>, <code>NB_III</code> : points number in directions from 1st varying index to last.
- * @remark <code>WI_NB</code> corresponds to the work-item number.
- * @remark <code>__N__</code> is expanded at compilation time by vector width.
- * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
- * @see parmepy.gpu.tools.parse_file
- */
-__kernel void advection_and_remeshing(__global const float* gvelo,
-				      __global const float* pvec_X,
-				      __global const float* pvec_Y,
-				      __global const float* pvec_Z,
-				      __global float* gvec_X,
-				      __global float* gvec_Y,
-				      __global float* gvec_Z,
-				      float dt,float min_position, float dx)
-{
-  uint gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
-  uint gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
-  uint gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
-  float invdx = 1.0/dx;		/* Space step inverse */
-  uint i;			/* Particle index in 1D problem */
-  float__N__ p,			/* Particle position */
-    pv_X, pv_Y, pv_Z,			/* Particle vector */
-    v;				/* Particle velocity */
-  uint line_index = gidY*NB_I+ gidZ*NB_I*NB_II; /* Current 1D problem index */
-
-  __local float gvec_X_loc[NB_I]; /* Local buffer for result */
-  __local float gvec_Y_loc[NB_I]; /* Local buffer for result */
-  __local float gvec_Z_loc[NB_I]; /* Local buffer for result */
-  __local float gvelo_loc[NB_I]; /* Velocity cache */
-
-  for(i=gidX*__N__; i<NB_I; i+=(WI_NB*__N__))
-    {
-      /* Read velocity */
-      v = vload__N__((i+line_index)/__N__, gvelo);
-      /* Fill velocity cache */
-      gvelo_loc[noBC_id(i+__NN__)] = v.s__NN__;
-      /* Initialize result buffer */
-      gvec_X_loc[noBC_id(i+__NN__)] = 0.0;
-      gvec_Y_loc[noBC_id(i+__NN__)] = 0.0;
-      gvec_Z_loc[noBC_id(i+__NN__)] = 0.0;
-    }
-
-  /* Synchronize work-group */
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  for(i=gidX*PART_NB_PER_WI; i<(gidX + 1)*PART_NB_PER_WI; i+=__N__)
-    {
-      /* Read Particle scalar */
-      pv_X = vload__N__((i + line_index)/__N__, pvec_X);
-      pv_Y = vload__N__((i + line_index)/__N__, pvec_Y);
-      pv_Z = vload__N__((i + line_index)/__N__, pvec_Z);
-      /* Compute particle position */
-      p = advection(i, dt, dx, invdx, gvelo_loc);
-      /* Remesh particle */
-      remesh(i, dx, invdx, pv_X, pv_Y, pv_Z, p, gvec_X_loc, gvec_Y_loc, gvec_Z_loc);
-    }
-
-  /* Synchronize work-group */
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  for(i=gidX*__N__; i<NB_I; i+=(WI_NB*__N__))
-    {
-      /* Store result */
-      vstore__N__((float__N__)(gvec_X_loc[noBC_id(i+__NN__)],
-			       ), (i + line_index)/__N__, gvec_X);
-      vstore__N__((float__N__)(gvec_Y_loc[noBC_id(i+__NN__)],
-			       ), (i + line_index)/__N__, gvec_Y);
-      vstore__N__((float__N__)(gvec_Z_loc[noBC_id(i+__NN__)],
-			       ), (i + line_index)/__N__, gvec_Z);
-    }
-}
diff --git a/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl
index a7f4c285d..0055af7d2 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/advection_noVec.cl
@@ -39,6 +39,6 @@ __kernel void advection_kernel(__global const float* gvelo,
 
   for(i=gidX; i<NB_I; i+=WI_NB)
     {
-      ppos[i+line_index] = advection(i, dt, dx.x, invdx, v_invdx, velocity_cache) + min_position;
+      ppos[i+line_index] = advection(i, dt, dx.x, invdx, v_invdx, velocity_cache, min_position);
     }
 }
diff --git a/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl
new file mode 100644
index 000000000..3398f8bc5
--- /dev/null
+++ b/HySoP/hysop/gpu/cl_src/kernels/comm_MS_advection_noVec.cl
@@ -0,0 +1,261 @@
+
+
+
+void fill_velocity_cache_reduction(__global const float* gvelo,
+				   uint gidX, uint gidY, uint gidZ,
+				   float4 dx, float4 v_dx,
+				   __local float* gvelo_loc);
+
+void reduce_local(__local int* minmax, int lid);
+
+
+__kernel void buff_advec(__global const float* gvelo,
+			 __global float* ppos,
+			 __global float* buffer_l,
+			 __global float* buffer_r,
+			 __global int* minmax_global,
+			 __local float* velocity_cache,
+			 __local float* buff_l_loc,
+			 __local float* buff_r_loc,
+			 float dt, float min_position, float4 dx, float4 v_dx,
+			 int4 l_nb, int4 r_nb)
+{
+  int gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
+  int gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
+  int gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
+  float invdx = 1.0/dx.x;		/* Space step inverse */
+  float v_invdx = 1.0/v_dx.x;		/* Space step inverse */
+  int i;			/* Particle index in 1D problem */
+  int line_index = gidY*NB_I+gidZ*NB_I*NB_II; /* Current 1D problem index */
+  float p,v,c, hY, hZ;
+  int i_ind, i_indY, i_indZ;
+
+  int l_start_x = minmax_global[L_MIN_X];
+  int l_start_y = minmax_global[L_MIN_Y];
+  int l_start_z = minmax_global[L_MIN_Z];
+  int r_start_x = minmax_global[R_MIN_X];
+  int r_start_y = minmax_global[R_MIN_Y];
+  int r_start_z = minmax_global[R_MIN_Z];
+  int4 l_nb_used = (int4)(minmax_global[L_MAX_X] - l_start_x + 1,
+			  minmax_global[L_MAX_Y] - l_start_y + 1,
+			  minmax_global[L_MAX_Z] - l_start_z + 1,
+			  0);
+  int4 r_nb_used = (int4)(minmax_global[R_MAX_X] - r_start_x + 1,
+			  minmax_global[R_MAX_Y] - r_start_y + 1,
+			  minmax_global[R_MAX_Z] - r_start_z + 1,
+			  0);
+
+  __local float* loc_ptr;
+
+
+  hY = (gidY * dx.y) / v_dx.y;
+  hZ = (gidZ * dx.z) / v_dx.z;
+  i_indY = convert_int_rtn(hY);
+  i_indZ = convert_int_rtn(hZ);
+  hY = hY - convert_float(i_indY);
+  hZ = hZ - convert_float(i_indZ);
+
+  for(i=gidX; i<V_NB_I; i+=(WI_NB)){
+    velocity_cache[noBC_id(i)] = (1.0-hY)*(1.0-hZ) * gvelo[i + i_indY * V_NB_I + i_indZ * V_NB_I * V_NB_II];
+    velocity_cache[noBC_id(i)] += (1.0-hY)*hZ * gvelo[i + i_indY * V_NB_I + (i_indZ + 1) * V_NB_I * V_NB_II];
+    velocity_cache[noBC_id(i)] += hY*(1.0-hZ) * gvelo[i + (i_indY + 1) * V_NB_I + i_indZ * V_NB_I * V_NB_II];
+    velocity_cache[noBC_id(i)] += hY*hZ * gvelo[i + (i_indY + 1) * V_NB_I + (i_indZ + 1) * V_NB_I * V_NB_II];
+  }
+
+  if ((i_indY>=l_start_y && i_indY<=minmax_global[L_MAX_Y]-1) && (i_indZ>=l_start_z && i_indZ<=minmax_global[L_MAX_Z]-1)){
+    for(i=gidX; i<l_nb_used.x; i+=(WI_NB)){
+      buff_l_loc[i] = (1.0-hY)*(1.0-hZ)*buffer_l[i+(i_indY-l_start_y)*l_nb.x + (i_indZ-l_start_z)*l_nb.x*l_nb.y];
+      buff_l_loc[i] += (1.0-hY)*hZ*buffer_l[i+(i_indY-l_start_y)*l_nb.x + (i_indZ+1-l_start_z)*l_nb.x*l_nb.y];
+      buff_l_loc[i] += hY*(1.0-hZ)*buffer_l[i+(i_indY+1-l_start_y)*l_nb.x + (i_indZ-l_start_z)*l_nb.x*l_nb.y];
+      buff_l_loc[i] += hY*hZ*buffer_l[i+(i_indY+1-l_start_y)*l_nb.x + (i_indZ+1-l_start_z)*l_nb.x*l_nb.y];
+    }}
+
+  if ((i_indY>=r_start_y && i_indY<=minmax_global[R_MAX_Y]-1) && (i_indZ>=r_start_z && i_indZ<=minmax_global[R_MAX_Z]-1)){
+    for(i=gidX; i<r_nb_used.x; i+=(WI_NB)){
+      buff_r_loc[i] = (1.0-hY)*(1.0-hZ)*buffer_r[i+(i_indY-r_start_y)*r_nb.x + (i_indZ-r_start_z)*r_nb.x*r_nb.y];
+      buff_r_loc[i] += (1.0-hY)*hZ*buffer_r[i+(i_indY-r_start_y)*r_nb.x + (i_indZ+1-r_start_z)*r_nb.x*r_nb.y];
+      buff_r_loc[i] += hY*(1.0-hZ)*buffer_r[i+(i_indY+1-r_start_y)*r_nb.x + (i_indZ-r_start_z)*r_nb.x*r_nb.y];
+      buff_r_loc[i] += hY*hZ*buffer_r[i+(i_indY+1-r_start_y)*r_nb.x + (i_indZ+1-r_start_z)*r_nb.x*r_nb.y];
+    }}
+
+  /* Synchronize work-group */
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for(i=gidX; i<NB_I; i+=WI_NB)
+    {
+      c = i * dx.x + min_position;
+      // multi-scale : interpolate v from velocity buffer (of length V_NB_I)
+      p = c * v_invdx;
+      i_ind = convert_int_rtn(p);
+      p = p - convert_float(i_ind);
+      i_ind = i_ind - (V_START_INDEX-V_GHOSTS_NB) - MS_INTERPOL_SHIFT;
+      v = mix(velocity_cache[noBC_id(i_ind)],
+	      velocity_cache[noBC_id(i_ind+1)],p);
+      p = (c + 0.5*dt*v) * v_invdx;
+
+      i_ind = convert_int_rtn(p) - MS_INTERPOL_SHIFT;
+
+      p = p - convert_float(i_ind);
+
+      loc_ptr = (i_ind>=(V_START_INDEX-V_GHOSTS_NB) && i_ind <= (V_STOP_INDEX+V_GHOSTS_NB)) ? velocity_cache+noBC_id(i_ind - (V_START_INDEX-V_GHOSTS_NB)) : (i_ind<(V_START_INDEX-V_GHOSTS_NB)) ? buff_l_loc+i_ind-l_start_x  : buff_r_loc+i_ind-r_start_x ;
+      v = (1.0-p)*(*loc_ptr);
+      i_ind = i_ind + 1;
+      loc_ptr = (i_ind>=(V_START_INDEX-V_GHOSTS_NB) && i_ind <= (V_STOP_INDEX+V_GHOSTS_NB)) ? velocity_cache+noBC_id(i_ind - (V_START_INDEX-V_GHOSTS_NB)) : (i_ind<(V_START_INDEX-V_GHOSTS_NB)) ? buff_l_loc+i_ind-l_start_x  : buff_r_loc+i_ind-r_start_x ;
+      v += p*(*loc_ptr);
+      ppos[i+line_index] = c + dt * v;
+    }
+
+}
+
+
+__kernel void reduce_stage1_advec(__global const float* gvelo,
+				  __global int* minmax_buffer,
+				  __local float* velocity_cache,
+				  __local int* minmax,
+				  float dt, float min_position, float4 dx, float4 v_dx)
+{
+  int lid = get_global_id(0);	/* OpenCL work-itme global index (X) */
+  int gidY, gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
+  float invdx = 1.0/dx.x;		/* Space step inverse */
+  float v_invdx = 1.0/v_dx.x;		/* Space step inverse */
+  int i;			/* Particle index in 1D problem */
+  int line_index; /* Current 1D problem index */
+  int my_minmax[12] = {1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30};
+  float p,v,c;
+  int i_ind, i_indY, i_indZ, ix, iy, iz;
+  bool is_l, is_r;
+
+  for(gidY=0;gidY<NB_II;gidY++)
+    {
+      line_index = gidY*V_NB_I + gidZ*V_NB_I*V_NB_II;
+      fill_velocity_cache_reduction(gvelo, lid, gidY, gidZ, dx, v_dx, velocity_cache);
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+      for(i=lid; i<NB_I; i+=WI_NB_REDUCE)
+	{
+	  c = i * dx.x + min_position;
+	  // multi-scale : interpolate v from velocity buffer (of length V_NB_I)
+	  p = c * v_invdx;
+	  i_ind = convert_int_rtn(p);
+	  p = p - convert_float(i_ind);
+	  i_ind = i_ind - (V_START_INDEX-V_GHOSTS_NB) - MS_INTERPOL_SHIFT;
+	  v = mix(velocity_cache[noBC_id(i_ind)],
+		  velocity_cache[noBC_id(i_ind+1)],p);
+	  p = (c + 0.5*dt*v) * v_invdx;
+
+	  i_ind = convert_int_rtn(p);
+	  i_indY = convert_int_rtn((gidY * dx.y) / v_dx.y);
+	  i_indZ = convert_int_rtn((gidZ * dx.z) / v_dx.z);
+
+	  for(ix=i_ind-MS_INTERPOL_SHIFT; ix<=i_ind+1+MS_INTERPOL_SHIFT; ix++){
+	    is_l = ix<(V_START_INDEX-V_GHOSTS_NB);
+	    is_r = ix>(V_STOP_INDEX+V_GHOSTS_NB);
+	    for (iy=i_indY-MS_INTERPOL_SHIFT; iy<=i_indY+1+MS_INTERPOL_SHIFT; iy++){
+	      for (iz=i_indZ-MS_INTERPOL_SHIFT; iz<=i_indZ+1+MS_INTERPOL_SHIFT; iz++){
+
+		my_minmax[L_MIN_X] = (is_l && my_minmax[L_MIN_X]>ix) ? ix : my_minmax[L_MIN_X];
+		my_minmax[L_MAX_X] = (is_l && my_minmax[L_MAX_X]<ix) ? ix : my_minmax[L_MAX_X];
+		my_minmax[L_MIN_Y] = (is_l && my_minmax[L_MIN_Y]>iy) ? iy : my_minmax[L_MIN_Y];
+		my_minmax[L_MAX_Y] = (is_l && my_minmax[L_MAX_Y]<iy) ? iy : my_minmax[L_MAX_Y];
+		my_minmax[L_MIN_Z] = (is_l && my_minmax[L_MIN_Z]>iz) ? iz : my_minmax[L_MIN_Z];
+		my_minmax[L_MAX_Z] = (is_l && my_minmax[L_MAX_Z]<iz) ? iz : my_minmax[L_MAX_Z];
+
+		my_minmax[R_MIN_X] = (is_r && my_minmax[R_MIN_X]>ix) ? ix : my_minmax[R_MIN_X];
+		my_minmax[R_MAX_X] = (is_r && my_minmax[R_MAX_X]<ix) ? ix : my_minmax[R_MAX_X];
+		my_minmax[R_MIN_Y] = (is_r && my_minmax[R_MIN_Y]>iy) ? iy : my_minmax[R_MIN_Y];
+		my_minmax[R_MAX_Y] = (is_r && my_minmax[R_MAX_Y]<iy) ? iy : my_minmax[R_MAX_Y];
+		my_minmax[R_MIN_Z] = (is_r && my_minmax[R_MIN_Z]>iz) ? iz : my_minmax[R_MIN_Z];
+		my_minmax[R_MAX_Z] = (is_r && my_minmax[R_MAX_Z]<iz) ? iz : my_minmax[R_MAX_Z];
+
+	      }
+	    }
+	  }
+	}
+    }
+
+  for(i=0;i<12;i++)
+    minmax[lid*12+i] = my_minmax[i];
+
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  reduce_local(minmax, lid);
+
+  if (lid == 0)
+    for(i=0;i<12;i++)
+      minmax_buffer[gidZ*12+i] = minmax[i];
+}
+
+
+
+void fill_velocity_cache_reduction(__global const float* gvelo,
+				   uint gidX, uint gidY, uint gidZ,
+				   float4 dx, float4 v_dx,
+				   __local float* gvelo_loc)
+{
+  uint i;
+
+#if NB_III == 1
+  //  Multi-Scale (2D)
+
+  float line_posY, hY;
+  int indY;
+  int2 v_line_index;
+  float2 wY;
+
+
+  line_posY = (gidY * dx.y) / v_dx.y;
+  indY = convert_int_rtn(line_posY);
+  hY = line_posY - convert_float(indY);
+
+  wY.s1 = hY;
+  wY.s0 = 1.0 - wY.s1;
+
+  indY = indY + V_GHOSTS_NB - MS_INTERPOL_SHIFT;
+
+  v_line_index.s0 = indY * V_NB_I;
+  v_line_index.s1 = (indY + 1) * V_NB_I;
+
+  for(i=gidX; i<V_NB_I; i+=(WI_NB_REDUCE)){
+    gvelo_loc[noBC_id(i)] = wY.s0 * gvelo[i + v_line_index.s0];
+    gvelo_loc[noBC_id(i)] += wY.s1 * gvelo[i + v_line_index.s1];
+
+  }
+
+#else
+  //  Multi-Scale (3D)
+
+  float line_posY, hY;
+  float line_posZ, hZ;
+  int indY, indZ;
+  int2 v_line_indexY, v_line_indexZ;
+  float2 wY, wZ;
+
+  line_posY = (gidY * dx.y) / v_dx.y;
+  line_posZ = (gidZ * dx.z) / v_dx.z;
+  indY = convert_int_rtn(line_posY);
+  indZ = convert_int_rtn(line_posZ);
+  hY = line_posY - convert_float(indY);
+  hZ = line_posZ - convert_float(indZ);
+
+  wY.s1 = hY;
+  wY.s0 = 1.0 - wY.s1;
+  wZ.s1 = hZ;
+  wZ.s0 = 1.0 - wZ.s1;
+
+
+  indY = indY + V_GHOSTS_NB - MS_INTERPOL_SHIFT;
+  indZ = indZ + V_GHOSTS_NB - MS_INTERPOL_SHIFT;
+
+  v_line_indexY.s0 = indY * V_NB_I;
+  v_line_indexY.s1 = (indY + 1) * V_NB_I;
+  v_line_indexZ.s0 = indZ * V_NB_I * V_NB_II;
+  v_line_indexZ.s1 = (indZ + 1) * V_NB_I * V_NB_II;
+
+  for(i=gidX; i<V_NB_I; i+=(WI_NB_REDUCE)){
+    gvelo_loc[noBC_id(i)] = wY.s0 * wZ.s0 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s0];
+    gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s1 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s1];
+    gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s0 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s0];
+    gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s1 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s1];
+  }
+#endif
+}
diff --git a/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl
new file mode 100644
index 000000000..ef42469dc
--- /dev/null
+++ b/HySoP/hysop/gpu/cl_src/kernels/comm_advection_noVec.cl
@@ -0,0 +1,161 @@
+
+
+
+void reduce_local(__local int* minmax, int lid);
+
+
+__kernel void buff_advec(__global const float* gvelo,
+			 __global float* ppos,
+			 __global float* buffer_l,
+			 __global float* buffer_r,
+			 __global int* minmax_global,
+			 __local float* velocity_cache,
+			 __local float* buff_l_loc,
+			 __local float* buff_r_loc,
+			 float dt, float min_position, float4 dx, float4 v_dx,
+			 int4 l_nb, int4 r_nb)
+{
+  int gidX = get_global_id(0);	/* OpenCL work-itme global index (X) */
+  int gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
+  int gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
+  float invdx = 1.0/dx.x;		/* Space step inverse */
+  float v_invdx = 1.0/v_dx.x;		/* Space step inverse */
+  int i;			/* Particle index in 1D problem */
+  int line_index ; /* Current 1D problem index */
+
+  float v,vp,p,c, hdt = 0.5 * dt;
+  int i_ind, i_ind_p;
+
+  int l_start_x = minmax_global[L_MIN_X];
+  int l_start_y = minmax_global[L_MIN_Y];
+  int l_start_z = minmax_global[L_MIN_Z];
+  int r_start_x = minmax_global[R_MIN_X];
+  int r_start_y = minmax_global[R_MIN_Y];
+  int r_start_z = minmax_global[R_MIN_Z];
+  int4 l_nb_used = (int4)(minmax_global[L_MAX_X] - l_start_x + 1,
+			  minmax_global[L_MAX_Y] - l_start_y + 1,
+			  minmax_global[L_MAX_Z] - l_start_z + 1,
+			  0);
+  int4 r_nb_used = (int4)(minmax_global[R_MAX_X] - r_start_x + 1,
+			  minmax_global[R_MAX_Y] - r_start_y + 1,
+			  minmax_global[R_MAX_Z] - r_start_z + 1,
+			  0);
+
+  __local float* loc_ptr;
+
+  if ((gidY>=l_start_y && gidY<=minmax_global[L_MAX_Y]) && (gidZ>=l_start_z && gidZ<=minmax_global[L_MAX_Z])){
+    for(i=gidX; i<l_nb_used.x; i+=(WI_NB)){
+      buff_l_loc[i] = buffer_l[i+(gidY-l_start_y)*l_nb.x + (gidZ-l_start_z)*l_nb.x*l_nb.y];}}
+
+  if ((gidY>=r_start_y && gidY<=minmax_global[R_MAX_Y]) && (gidZ>=r_start_z && gidZ<=minmax_global[R_MAX_Z])){
+    for(i=gidX; i<r_nb_used.x; i+=(WI_NB)){
+      buff_r_loc[i] = buffer_r[i+(gidY-r_start_y)*r_nb.x + (gidZ-r_start_z)*r_nb.x*r_nb.y];}}
+
+  line_index = gidY*V_NB_I + gidZ*V_NB_I*V_NB_II;
+  for(i=gidX; i<V_NB_I; i+=(WI_NB))
+    {
+      /* Read velocity */
+      /* Fill velocity cache */
+      velocity_cache[noBC_id(i)] = gvelo[i+line_index];
+    }
+
+  /* Synchronize work-group */
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  line_index = gidY*NB_I+gidZ*NB_I*NB_II;
+  for(i=gidX; i<NB_I; i+=WI_NB)
+    {
+      c = i * dx.x + min_position;
+      v = velocity_cache[noBC_id(i + V_GHOSTS_NB)];
+      p = (c + hdt*v) * v_invdx;
+
+      i_ind = convert_int_rtn(p);
+      p = p - convert_float(i_ind);
+      i_ind_p = i_ind + 1;
+      loc_ptr = (i_ind>=(V_START_INDEX-V_GHOSTS_NB) && i_ind <= (V_STOP_INDEX+V_GHOSTS_NB)) ? velocity_cache + noBC_id(i_ind - (V_START_INDEX-V_GHOSTS_NB)) : (i_ind<(V_START_INDEX-V_GHOSTS_NB)) ? buff_l_loc+i_ind-l_start_x : buff_r_loc+i_ind-r_start_x ;
+      v = *loc_ptr;
+
+      loc_ptr = (i_ind_p>=(V_START_INDEX-V_GHOSTS_NB) && i_ind_p <= (V_STOP_INDEX+V_GHOSTS_NB)) ? velocity_cache+noBC_id(i_ind_p - (V_START_INDEX-V_GHOSTS_NB)) : (i_ind_p<(V_START_INDEX-V_GHOSTS_NB)) ? buff_l_loc+i_ind_p-l_start_x : buff_r_loc+i_ind_p-r_start_x ;
+      vp = *loc_ptr;
+
+      v = (p*(vp-v) + v);
+      p = c + dt * v;
+      ppos[i+line_index] = p;
+    }
+
+}
+
+
+__kernel void reduce_stage1_advec(__global const float* gvelo,
+				  __global int* minmax_buffer,
+				  __local float* velocity_cache,
+				  __local int* minmax,
+				  float dt, float min_position, float4 dx, float4 v_dx)
+{
+  int gidY, gidZ = get_global_id(2);
+  int lid = get_global_id(0);
+  int my_minmax[12] = {1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30};
+  int i, line_index;
+  int ix;
+  float p, c, v;
+  bool is_l, is_r;
+
+  for(gidY=0;gidY<NB_II;gidY++)
+    {
+      line_index = gidY*V_NB_I + gidZ*V_NB_I*V_NB_II;
+      for (i=lid; i<NB_I; i+=WI_NB_REDUCE)
+	{
+	  c = i * dx.x + min_position;
+	  v = gvelo[i+line_index];
+	  p = (c + 0.5 * dt * v) / v_dx.x;
+
+	  ix = convert_int_rtn(p);
+
+	  is_l = ix<(V_START_INDEX-V_GHOSTS_NB);
+	  is_r = ix>(V_STOP_INDEX+V_GHOSTS_NB);
+
+	  my_minmax[L_MIN_X] = (is_l && my_minmax[L_MIN_X]>ix) ? ix : my_minmax[L_MIN_X];
+	  my_minmax[L_MAX_X] = (is_l && my_minmax[L_MAX_X]<ix) ? ix : my_minmax[L_MAX_X];
+	  my_minmax[L_MIN_Y] = (is_l && my_minmax[L_MIN_Y]>gidY) ? gidY : my_minmax[L_MIN_Y];
+	  my_minmax[L_MAX_Y] = (is_l && my_minmax[L_MAX_Y]<gidY) ? gidY : my_minmax[L_MAX_Y];
+	  my_minmax[L_MIN_Z] = (is_l && my_minmax[L_MIN_Z]>gidZ) ? gidZ : my_minmax[L_MIN_Z];
+	  my_minmax[L_MAX_Z] = (is_l && my_minmax[L_MAX_Z]<gidZ) ? gidZ : my_minmax[L_MAX_Z];
+
+	  my_minmax[R_MIN_X] = (is_r  && my_minmax[R_MIN_X]>ix) ? ix : my_minmax[R_MIN_X];
+	  my_minmax[R_MAX_X] = (is_r  && my_minmax[R_MAX_X]<ix) ? ix : my_minmax[R_MAX_X];
+	  my_minmax[R_MIN_Y] = (is_r && my_minmax[R_MIN_Y]>gidY) ? gidY : my_minmax[R_MIN_Y];
+	  my_minmax[R_MAX_Y] = (is_r && my_minmax[R_MAX_Y]<gidY) ? gidY : my_minmax[R_MAX_Y];
+	  my_minmax[R_MIN_Z] = (is_r && my_minmax[R_MIN_Z]>gidZ) ? gidZ : my_minmax[R_MIN_Z];
+	  my_minmax[R_MAX_Z] = (is_r && my_minmax[R_MAX_Z]<gidZ) ? gidZ : my_minmax[R_MAX_Z];
+
+	  ix = ix + 1;
+	  is_l = ix<(V_START_INDEX-V_GHOSTS_NB);
+	  is_r = ix>(V_STOP_INDEX+V_GHOSTS_NB);
+
+	  my_minmax[L_MIN_X] = (is_l && my_minmax[L_MIN_X]>ix) ? ix : my_minmax[L_MIN_X];
+	  my_minmax[L_MAX_X] = (is_l && my_minmax[L_MAX_X]<ix) ? ix : my_minmax[L_MAX_X];
+	  my_minmax[L_MIN_Y] = (is_l && my_minmax[L_MIN_Y]>gidY) ? gidY : my_minmax[L_MIN_Y];
+	  my_minmax[L_MAX_Y] = (is_l && my_minmax[L_MAX_Y]<gidY) ? gidY : my_minmax[L_MAX_Y];
+	  my_minmax[L_MIN_Z] = (is_l && my_minmax[L_MIN_Z]>gidZ) ? gidZ : my_minmax[L_MIN_Z];
+	  my_minmax[L_MAX_Z] = (is_l && my_minmax[L_MAX_Z]<gidZ) ? gidZ : my_minmax[L_MAX_Z];
+
+	  my_minmax[R_MIN_X] = (is_r  && my_minmax[R_MIN_X]>ix) ? ix : my_minmax[R_MIN_X];
+	  my_minmax[R_MAX_X] = (is_r  && my_minmax[R_MAX_X]<ix) ? ix : my_minmax[R_MAX_X];
+	  my_minmax[R_MIN_Y] = (is_r && my_minmax[R_MIN_Y]>gidY) ? gidY : my_minmax[R_MIN_Y];
+	  my_minmax[R_MAX_Y] = (is_r && my_minmax[R_MAX_Y]<gidY) ? gidY : my_minmax[R_MAX_Y];
+	  my_minmax[R_MIN_Z] = (is_r && my_minmax[R_MIN_Z]>gidZ) ? gidZ : my_minmax[R_MIN_Z];
+	  my_minmax[R_MAX_Z] = (is_r && my_minmax[R_MAX_Z]<gidZ) ? gidZ : my_minmax[R_MAX_Z];
+	}
+    }
+
+  for(i=0;i<12;i++)
+    minmax[lid*12+i] = my_minmax[i];
+
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  reduce_local(minmax, lid);
+
+  if (lid == 0)
+    for(i=0;i<12;i++)
+      minmax_buffer[gidZ*12+i] = minmax[i];
+}
diff --git a/HySoP/hysop/gpu/cl_src/kernels/comm_diffusion.cl b/HySoP/hysop/gpu/cl_src/kernels/comm_diffusion.cl
new file mode 100644
index 000000000..6b503bc21
--- /dev/null
+++ b/HySoP/hysop/gpu/cl_src/kernels/comm_diffusion.cl
@@ -0,0 +1,150 @@
+
+
+__kernel void diffusion(__global const float* scal_in,
+			__global const float* ghosts,
+#if (CUT_DIR_Y + CUT_DIR_Y )== 2
+			__global const float* ghostsZ,
+#endif
+			__global float* scal_out,
+			float nudt, float4 dx)
+{
+  int t_gidX = get_group_id(0);
+  int t_gidY = get_group_id(1);
+  int lidX = get_local_id(0);
+  int lidY = get_local_id(1);
+  int gidX = t_gidX*TILE_SIZE + lidX;	/* OpenCL work-item global index (X) */
+  int gidY = t_gidY*TILE_SIZE + lidY; /* OpenCL work-item global index (Y) */
+  int gidZ;
+  float cx = nudt/(dx.x*dx.x);
+  float cy = nudt/(dx.y*dx.y);
+  float cz = nudt/(dx.z*dx.z);
+  float scal_z_m[NB_PART];
+  float scal_z[NB_PART];
+  float scal_z_p[NB_PART];
+  float s;
+  uint i;
+
+  for(i=0;i<NB_PART;i++){
+#if CUT_DIR_Z == 1
+#if CUT_DIR_Y == 1
+    scal_z_m[i] = ghostsZ[gidX + (gidY+i*L_WIDTH)*NB_X + NB_X*NB_Y];
+#else
+    scal_z_m[i] = ghosts[gidX + (gidY+i*L_WIDTH)*NB_X + NB_X*NB_Y];
+#endif
+#else
+    scal_z_m[i] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X + (NB_Z-1)*NB_X*NB_Y];
+#endif
+    scal_z[i] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X];
+  }
+
+  /* scal_z[nb_parts] */
+  /* for i in xrange(nb_parts) */
+  /* lidY+i*L_WIDTH */
+  /* gidY+i*L_WIDTH */
+
+  __local float tile_XY[TILE_SIZE+2][TILE_SIZE+2];
+
+  lidX += 1;
+  lidY += 1;
+
+  // loop over Z indices but last.
+  for (gidZ=0; gidZ<(NB_Z-1); gidZ++)
+    {
+      for(i=0;i<NB_PART;i++){
+	// fill the tile
+	tile_XY[lidX][lidY+i*L_WIDTH] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+
+	/* // fill tile edges */
+	tile_XY[0][lidY+i*L_WIDTH] = scal_in[((t_gidX*TILE_SIZE-1+NB_X)%NB_X) + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+	tile_XY[TILE_SIZE+1][lidY+i*L_WIDTH] = scal_in[(((t_gidX+1)*TILE_SIZE+1+NB_X)%NB_X) + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+      }
+#if CUT_DIR_Y == 1
+      tile_XY[lidX][0] = (t_gidY*TILE_SIZE>=1)? scal_in[gidX + ((t_gidY*TILE_SIZE-1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y] : ghosts[gidX + NB_X + gidZ*NB_X*2];
+      tile_XY[lidX][TILE_SIZE+1] = ((t_gidY+1)*TILE_SIZE+1<NB_Y) ? scal_in[gidX + (((t_gidY+1)*TILE_SIZE+1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y] : ghosts[gidX + gidZ*NB_X*2];
+#else
+      tile_XY[lidX][0] = scal_in[gidX + ((t_gidY*TILE_SIZE-1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y];
+      tile_XY[lidX][TILE_SIZE+1] = scal_in[gidX + (((t_gidY+1)*TILE_SIZE+1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y];
+#endif
+
+      /* Synchronize work-group */
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+      for(i=0;i<NB_PART;i++){
+	/* get scalar value in Z direction */
+	scal_z_p[i] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X + (gidZ+1)*NB_X*NB_Y];
+
+	// Compute stencil
+	// central point
+	s = scal_z[i] * (1.0 - 2.0 * (cx + cy + cz));
+
+	s += cz*(scal_z_m[i] + scal_z_p[i]);
+
+	s += cy * tile_XY[lidX][lidY+i*L_WIDTH-1];
+	s += cy * tile_XY[lidX][lidY+i*L_WIDTH+1];
+	s += cx * tile_XY[lidX-1][lidY+i*L_WIDTH];
+	s += cx * tile_XY[lidX+1][lidY+i*L_WIDTH];
+
+	// write result
+	scal_out[gidX + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y] = s;
+      }
+
+      /* Synchronize work-group */
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+      for(i=0;i<NB_PART;i++){
+	// Shift Z values
+	scal_z_m[i] = scal_z[i];
+	scal_z[i] = scal_z_p[i];
+      }
+    }
+
+  // Compute last point (from ghosts)
+  gidZ = NB_Z - 1;
+
+  for(i=0;i<NB_PART;i++){
+    // fill the tile
+    tile_XY[lidX][lidY+i*L_WIDTH] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+
+    /* // fill tile edges */
+    tile_XY[0][lidY+i*L_WIDTH] = scal_in[((t_gidX*TILE_SIZE-1+NB_X)%NB_X) + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+    tile_XY[TILE_SIZE+1][lidY+i*L_WIDTH] = scal_in[(((t_gidX+1)*TILE_SIZE+1+NB_X)%NB_X) + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+  }
+#if CUT_DIR_Y == 1
+  tile_XY[lidX][0] = (t_gidY*TILE_SIZE>=1)? scal_in[gidX + ((t_gidY*TILE_SIZE-1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y] : ghosts[gidX + NB_X + gidZ*NB_X*2];
+  tile_XY[lidX][TILE_SIZE+1] = ((t_gidY+1)*TILE_SIZE+1<NB_Y) ? scal_in[gidX + (((t_gidY+1)*TILE_SIZE+1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y] : ghosts[gidX + gidZ*NB_X*2];
+#else
+  tile_XY[lidX][0] = scal_in[gidX + ((t_gidY*TILE_SIZE-1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y];
+  tile_XY[lidX][TILE_SIZE+1] = scal_in[gidX + (((t_gidY+1)*TILE_SIZE+1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y];
+#endif
+
+  /* Synchronize work-group */
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for(i=0;i<NB_PART;i++){
+    /* // get scalar value in Z direction */
+#if CUT_DIR_Z == 1
+#if CUT_DIR_Y == 1
+    scal_z_p[i] = ghostsZ[gidX + (gidY+i*L_WIDTH)*NB_X];
+#else
+    scal_z_p[i] = ghosts[gidX + (gidY+i*L_WIDTH)*NB_X];
+#endif
+#else
+    scal_z_p[i] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X];
+#endif
+
+    // Compute stencil
+    /* // central point */
+    s = scal_z[i] * (1.0 - 2.0 * (cx + cy + cz));
+
+    s += cz*(scal_z_m[i] + scal_z_p[i]);
+
+    s += cy * tile_XY[lidX][lidY+i*L_WIDTH-1];
+    s += cy * tile_XY[lidX][lidY+i*L_WIDTH+1];
+    s += cx * tile_XY[lidX-1][lidY+i*L_WIDTH];
+    s += cx * tile_XY[lidX+1][lidY+i*L_WIDTH];
+
+    // write result
+    scal_out[gidX + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y] = s;
+  }
+}
+
diff --git a/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl
new file mode 100644
index 000000000..2081d8235
--- /dev/null
+++ b/HySoP/hysop/gpu/cl_src/kernels/comm_remeshing_noVec.cl
@@ -0,0 +1,253 @@
+/**
+ * @file comm_remeshing_noVec.cl
+ * Remeshing kernel.
+ */
+
+void reduce_local(__local int* minmax, int lid);
+
+/**
+ * Kernel to reduce the particle minimums and maximums indices (minmax values) of grid points out of the local domain that are concerned by particles remeshing contributions.
+ *
+ * @param ppos : particles positions.
+ * @param minmax_buffer : global array to store minmax values for each XY plane.
+ * @param dx : mesh step size.
+ */
+__kernel void reduce_stage1_rmsh(__global const float* ppos,
+				 __global int* minmax_buffer,
+				 __local int* minmax,
+				 float dx)
+{
+  int gidY, gidZ = get_global_id(2);
+  int lid = get_global_id(0);
+  int my_minmax[12] = {1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30};
+  int i, s;
+  int ix,ix_s;
+  float p;
+  bool is_l, is_r;
+
+  for(gidY=0;gidY<NB_II;gidY++)
+    {
+      for (i=lid; i<NB_I; i+=WI_NB_REDUCE)
+	{
+	  p = (ppos[gidZ*NB_I*NB_II + gidY*NB_I + i])/dx;
+	  ix = convert_int_rtn(p) - REMESH_SHIFT;
+
+	  for (s=0;s<2*(REMESH_SHIFT+1); s++)
+	    {
+	      ix_s = ix + s;
+	      is_l = ix_s<START_INDEX;
+	      is_r = ix_s>STOP_INDEX;
+
+	      my_minmax[L_MIN_X] = (is_l && my_minmax[L_MIN_X]>ix_s) ? ix_s : my_minmax[L_MIN_X];
+	      my_minmax[L_MAX_X] = (is_l && my_minmax[L_MAX_X]<ix_s) ? ix_s : my_minmax[L_MAX_X];
+	      my_minmax[L_MIN_Y] = (is_l && my_minmax[L_MIN_Y]>gidY) ? gidY : my_minmax[L_MIN_Y];
+	      my_minmax[L_MAX_Y] = (is_l && my_minmax[L_MAX_Y]<gidY) ? gidY : my_minmax[L_MAX_Y];
+	      my_minmax[L_MIN_Z] = (is_l && my_minmax[L_MIN_Z]>gidZ) ? gidZ : my_minmax[L_MIN_Z];
+	      my_minmax[L_MAX_Z] = (is_l && my_minmax[L_MAX_Z]<gidZ) ? gidZ : my_minmax[L_MAX_Z];
+
+	      my_minmax[R_MIN_X] = (is_r  && my_minmax[R_MIN_X]>ix_s) ? ix_s : my_minmax[R_MIN_X];
+	      my_minmax[R_MAX_X] = (is_r  && my_minmax[R_MAX_X]<ix_s) ? ix_s : my_minmax[R_MAX_X];
+	      my_minmax[R_MIN_Y] = (is_r && my_minmax[R_MIN_Y]>gidY) ? gidY : my_minmax[R_MIN_Y];
+	      my_minmax[R_MAX_Y] = (is_r && my_minmax[R_MAX_Y]<gidY) ? gidY : my_minmax[R_MAX_Y];
+	      my_minmax[R_MIN_Z] = (is_r && my_minmax[R_MIN_Z]>gidZ) ? gidZ : my_minmax[R_MIN_Z];
+	      my_minmax[R_MAX_Z] = (is_r && my_minmax[R_MAX_Z]<gidZ) ? gidZ : my_minmax[R_MAX_Z];
+	    }
+	}
+    }
+
+  for(i=0;i<12;i++)
+    minmax[lid*12+i] = my_minmax[i];
+
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  reduce_local(minmax, lid);
+
+  if (lid == 0)
+    for(i=0;i<12;i++)
+      minmax_buffer[gidZ*12+i] = minmax[i];
+}
+
+/**
+ * Performs remeshing of the particles' scalar.
+ * A work-group is handling a 1D problem. Thus, gidY and gidZ are constants among work-items of a work-group.
+ * Each work-item computes <code>NB_I/WI_NB</code> particles positions. To avoid concurrent witings, in case of strong velocity gradients, work-items computes contiguous particles.
+ * Particle are computed through OpenCL vector types of lenght 2, 4 or 8.
+ * Scalar results are stored in a local buffer as a cache and then copied to global memory buffer.
+ *
+ * @param ppos Particle position
+ * @param pscal Particle scalar
+ * @param gscal Grid scalar
+ * @param buffer_l Buffer for storing out of domain contributions (to left)
+ * @param buffer_r Buffer for storing out of domain contributions (to right)
+ * @param min_position Domain lower coordinate
+ * @param dx Space step
+ * @param l_nb buffer_l sizes
+ * @param r_nb buffer_r sizes
+ *
+ * @remark <code>NB_I</code>, <code>NB_II</code>, <code>NB_III</code> : points number in directions from 1st varying index to last.
+ * @remark <code>WI_NB</code> corresponds to the work-item number.
+ * @remark <code>__N__</code> is expanded at compilation time by vector width.
+ * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
+ * @remark <code>__RCOMP_I</code> flag is for instruction expansion for the different remeshed components.
+ * @remark <code>__RCOMP_P</code> flag is for function parameter expansion for the different remeshed components.
+ * @remark <code>__ID__</code> is replaced by the remeshed component id in an expansion.
+ * @see parmepy.gpu.tools.parse_file
+ */
+__kernel void buff_remesh(__global const float* ppos,
+			  __global const float* pscal,
+			  __global float* gscal,
+			  __global float* buffer_l,
+			  __global float* buffer_r,
+			  __global int* minmax_global,
+			  __local float* gscal_loc,
+			  __local float* l_buff_loc,
+			  __local float* r_buff_loc,
+			  float min_position, float dx,
+			  int4 l_nb, int4 r_nb
+			  )
+{
+  int lid = get_local_id(0);	/* OpenCL work-itme global index (X) */
+  int gidY = get_global_id(1); /* OpenCL work-itme global index (Y) */
+  int gidZ = get_global_id(2); /* OpenCL work-itme global index (Z) */
+  float invdx = 1.0/dx;         /* Space step inverse */
+  int i;			/* Particle index in 1D problem */
+  float p;			/* Particle position */
+  float s;      /* Particle scalar */
+float y;			/* Normalized distance to nearest left grid point */
+  int ind;			/* Integer coordinate */
+  int index;		/* Remeshing index */
+  float w;
+
+
+  int l_start_x = minmax_global[L_MIN_X];
+  int l_start_y = minmax_global[L_MIN_Y];
+  int l_start_z = minmax_global[L_MIN_Z];
+  int r_start_x = minmax_global[R_MIN_X];
+  int r_start_y = minmax_global[R_MIN_Y];
+  int r_start_z = minmax_global[R_MIN_Z];
+  int4 l_nb_used = (int4)(minmax_global[L_MAX_X] - l_start_x + 1,
+			  minmax_global[L_MAX_Y] - l_start_y + 1,
+			  minmax_global[L_MAX_Z] - l_start_z + 1,
+			  0);
+  int4 r_nb_used = (int4)(minmax_global[R_MAX_X] - r_start_x + 1,
+			  minmax_global[R_MAX_Y] - r_start_y + 1,
+			  minmax_global[R_MAX_Z] - r_start_z + 1,
+			  0);
+
+  uint line_index = gidY*NB_I+ gidZ*NB_I*NB_II; /* Current 1D problem index */
+
+  __local float* loc_ptr;
+
+  // Initialize buffers
+  if((lid < l_nb_used.x))
+    l_buff_loc[lid] = 0.0;
+  if((lid < r_nb_used.x))
+    r_buff_loc[lid] = 0.0;
+
+  for(i=lid; i<NB_I; i+=WI_NB)
+    {
+      /* Initialize result buffer */
+      gscal_loc[i] = 0.0;
+    }
+
+  /* Synchronize work-group */
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for(i=lid*PART_NB_PER_WI; i<(lid + 1)*PART_NB_PER_WI; i+=1)
+    {
+      /* Read particle position */
+      p = ppos[i + line_index];
+      /* Read particle scalar */
+      s = pscal[i + line_index];
+      /* Remesh particle */
+
+      ind = convert_int_rtn(p * invdx);
+      y = (p - convert_float(ind) * dx) * invdx;
+
+      index = ind - REMESH_SHIFT;
+
+      w = REMESH(alpha)(y);
+      loc_ptr = (index>=START_INDEX && index <= STOP_INDEX) ? gscal_loc +noBC_id(index-START_INDEX) : ( (index<START_INDEX)? l_buff_loc+index-l_start_x : r_buff_loc + index-r_start_x );
+      w = w * s;
+      (*loc_ptr) += w;
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+      index = index + 1;
+      w = REMESH(beta)(y);
+      loc_ptr = (index>=START_INDEX && index <= STOP_INDEX) ? gscal_loc +noBC_id(index-START_INDEX) : ( (index<START_INDEX)? l_buff_loc+index-l_start_x : r_buff_loc + index-r_start_x );
+      w = w * s;
+      (*loc_ptr) += w;
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+      index = index + 1;
+      w = REMESH(gamma)(y);
+      loc_ptr = (index>=START_INDEX && index <= STOP_INDEX) ? gscal_loc +noBC_id(index-START_INDEX) : ( (index<START_INDEX)? l_buff_loc+index-l_start_x : r_buff_loc + index-r_start_x );
+      w = w * s;
+      (*loc_ptr) += w;
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+      index = index + 1;
+      w = REMESH(delta)(y);
+      loc_ptr = (index>=START_INDEX && index <= STOP_INDEX) ? gscal_loc +noBC_id(index-START_INDEX) : ( (index<START_INDEX)? l_buff_loc+index-l_start_x : r_buff_loc + index-r_start_x );
+      w = w * s;
+      (*loc_ptr) += w;
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+#if REMESH_SHIFT > 1
+      index = index + 1;
+      w = REMESH(eta)(y);
+      loc_ptr = (index>=START_INDEX && index <= STOP_INDEX) ? gscal_loc +noBC_id(index-START_INDEX) : ( (index<START_INDEX)? l_buff_loc+index-l_start_x : r_buff_loc + index-r_start_x );
+      w = w * s;
+      (*loc_ptr) += w;
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+      index = index + 1;
+      w = REMESH(zeta)(y);
+      loc_ptr = (index>=START_INDEX && index <= STOP_INDEX) ? gscal_loc +noBC_id(index-START_INDEX) : ( (index<START_INDEX)? l_buff_loc+index-l_start_x : r_buff_loc + index-r_start_x );
+      w = w * s;
+      (*loc_ptr) += w;
+      barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+#if REMESH_SHIFT > 2
+      index = index + 1;
+      w = REMESH(theta)(y);
+      loc_ptr = (index>=START_INDEX && index <= STOP_INDEX) ? gscal_loc +noBC_id(index-START_INDEX) : ( (index<START_INDEX)? l_buff_loc+index-l_start_x : r_buff_loc + index-r_start_x );
+      w = w * s;
+      (*loc_ptr) += w;
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+      index = index + 1;
+      w = REMESH(iota)(y);
+      loc_ptr = (index>=START_INDEX && index <= STOP_INDEX) ? gscal_loc +noBC_id(index-START_INDEX) : ( (index<START_INDEX)? l_buff_loc+index-l_start_x : r_buff_loc + index-r_start_x );
+      w = w * s;
+      (*loc_ptr) += w;
+      barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+#if REMESH_SHIFT > 3
+      index = index + 1;
+      w = REMESH(kappa)(y);
+      loc_ptr = (index>=START_INDEX && index <= STOP_INDEX) ? gscal_loc +noBC_id(index-START_INDEX) : ( (index<START_INDEX)? l_buff_loc+index-l_start_x : r_buff_loc + index-r_start_x );
+      w = w * s;
+      (*loc_ptr) += w;
+      barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+    }
+
+  /* Synchronize work-group */
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for(i=lid; i<NB_I; i+=WI_NB)
+    {
+      /* Store result */
+      gscal[i + line_index] = gscal_loc[noBC_id(i)];
+    }
+
+  // Store buffers
+  if((lid < l_nb_used.x) && (gidY<l_nb_used.y) && (gidZ<l_nb_used.z))
+    buffer_l[lid + gidY*l_nb.x + gidZ*l_nb.x*l_nb.y] = l_buff_loc[lid];
+  if((lid < r_nb_used.x) && (gidY<r_nb_used.y) && (gidZ<r_nb_used.z))
+    buffer_r[lid + gidY*r_nb.x + gidZ*r_nb.x*l_nb.y] = r_buff_loc[lid];
+
+}
diff --git a/HySoP/hysop/gpu/cl_src/kernels/diffusion.cl b/HySoP/hysop/gpu/cl_src/kernels/diffusion.cl
new file mode 100644
index 000000000..d764a3a9b
--- /dev/null
+++ b/HySoP/hysop/gpu/cl_src/kernels/diffusion.cl
@@ -0,0 +1,119 @@
+
+
+__kernel void diffusion(__global const float* scal_in,
+			__global float* scal_out,
+			float nudt, float4 dx)
+{
+  int t_gidX = get_group_id(0);
+  int t_gidY = get_group_id(1);
+  int lidX = get_local_id(0);
+  int lidY = get_local_id(1);
+  int gidX = t_gidX*TILE_SIZE + lidX;	/* OpenCL work-item global index (X) */
+  int gidY = t_gidY*TILE_SIZE + lidY; /* OpenCL work-item global index (Y) */
+  int gidZ;
+  float cx = nudt/(dx.x*dx.x);
+  float cy = nudt/(dx.y*dx.y);
+  float cz = nudt/(dx.z*dx.z);
+  float scal_z_m[NB_PART];
+  float scal_z[NB_PART];
+  float scal_z_p[NB_PART];
+  float s;
+  uint i;
+
+  for(i=0;i<NB_PART;i++){
+    scal_z_m[i] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X + NB_X*NB_Y + (NB_Z-1)*NB_X*NB_Y];
+    scal_z[i] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X];
+  }
+
+  /* scal_z[nb_parts] */
+  /* for i in xrange(nb_parts) */
+  /* lidY+i*L_WIDTH */
+  /* gidY+i*L_WIDTH */
+
+  __local float tile_XY[TILE_SIZE+2][TILE_SIZE+2];
+
+  lidX += 1;
+  lidY += 1;
+
+  // loop over Z indices but last.
+  for (gidZ=0; gidZ<(NB_Z-1); gidZ++)
+    {
+      for(i=0;i<NB_PART;i++){
+	// fill the tile
+	tile_XY[lidX][lidY+i*L_WIDTH] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+
+	/* // fill tile edges */
+	tile_XY[0][lidY+i*L_WIDTH] = scal_in[((t_gidX*TILE_SIZE-1+NB_X)%NB_X) + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+	tile_XY[TILE_SIZE+1][lidY+i*L_WIDTH] = scal_in[(((t_gidX+1)*TILE_SIZE+1+NB_X)%NB_X) + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+      }
+      tile_XY[lidX][0] = scal_in[gidX + ((t_gidY*TILE_SIZE-1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y];
+      tile_XY[lidX][TILE_SIZE+1] = scal_in[gidX + (((t_gidY+1)*TILE_SIZE+1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y];
+
+      /* Synchronize work-group */
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+      for(i=0;i<NB_PART;i++){
+	/* get scalar value in Z direction */
+	scal_z_p[i] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X + (gidZ+1)*NB_X*NB_Y];
+
+	// Compute stencil
+	// central point
+	s = scal_z[i] * (1.0 - 2.0 * (cx + cy + cz));
+
+	s += cz*(scal_z_m[i] + scal_z_p[i]);
+
+	s += cy * tile_XY[lidX][lidY+i*L_WIDTH-1];
+	s += cy * tile_XY[lidX][lidY+i*L_WIDTH+1];
+	s += cx * tile_XY[lidX-1][lidY+i*L_WIDTH];
+	s += cx * tile_XY[lidX+1][lidY+i*L_WIDTH];
+
+	// write result
+	scal_out[gidX + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y] = s;
+      }
+
+      /* Synchronize work-group */
+      barrier(CLK_LOCAL_MEM_FENCE);
+
+      for(i=0;i<NB_PART;i++){
+	// Shift Z values
+	scal_z_m[i] = scal_z[i];
+	scal_z[i] = scal_z_p[i];
+      }
+    }
+
+  // Compute last point (from ghosts)
+  gidZ = NB_Z - 1;
+
+  for(i=0;i<NB_PART;i++){
+    // fill the tile
+    tile_XY[lidX][lidY+i*L_WIDTH] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+
+    /* // fill tile edges */
+    tile_XY[0][lidY+i*L_WIDTH] = scal_in[((t_gidX*TILE_SIZE-1+NB_X)%NB_X) + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+    tile_XY[TILE_SIZE+1][lidY+i*L_WIDTH] = scal_in[(((t_gidX+1)*TILE_SIZE+1+NB_X)%NB_X) + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y];
+  }
+  tile_XY[lidX][0] = scal_in[gidX + ((t_gidY*TILE_SIZE-1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y];
+  tile_XY[lidX][TILE_SIZE+1] = scal_in[gidX + (((t_gidY+1)*TILE_SIZE+1+NB_Y)%NB_Y)*NB_X + gidZ*NB_X*NB_Y];
+
+  /* Synchronize work-group */
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for(i=0;i<NB_PART;i++){
+    /* // get scalar value in Z direction */
+    scal_z_p[i] = scal_in[gidX + (gidY+i*L_WIDTH)*NB_X];
+
+    // Compute stencil
+    /* // central point */
+    s = scal_z[i] * (1.0 - 2.0 * (cx + cy + cz));
+
+    s += cz*(scal_z_m[i] + scal_z_p[i]);
+
+    s += cy * tile_XY[lidX][lidY+i*L_WIDTH-1];
+    s += cy * tile_XY[lidX][lidY+i*L_WIDTH+1];
+    s += cx * tile_XY[lidX-1][lidY+i*L_WIDTH];
+    s += cx * tile_XY[lidX+1][lidY+i*L_WIDTH];
+
+    // write result
+    scal_out[gidX + (gidY+i*L_WIDTH)*NB_X + gidZ*NB_X*NB_Y] = s;
+  }
+}
diff --git a/HySoP/hysop/gpu/cl_src/kernels/minmax_buffers.cl b/HySoP/hysop/gpu/cl_src/kernels/minmax_buffers.cl
new file mode 100644
index 000000000..558fef5c9
--- /dev/null
+++ b/HySoP/hysop/gpu/cl_src/kernels/minmax_buffers.cl
@@ -0,0 +1,84 @@
+/**
+ * @file   minmax_buffers.cl
+ *
+ * @brief  reduction kernels.
+ *
+ *
+ */
+
+/**
+ * Perfoms a global reduction of an array of minmax values.
+ *
+ * @param minmax_buffer : minmax values to reduce
+ * @param minmax_global : Result of the reduction (12 integers)
+ *
+ * @remark : A single stage kernel can be written but it seems to be 5 time slower
+ */
+__kernel void reduce_stage2(__global const int* minmax_buffer,
+			    __global int* minmax_global,
+			    __local int* minmax)
+{
+  int lid = get_global_id(0);
+  int my_minmax[12] = {1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30, 1<<30, -1<<30};
+  int i;
+
+  for (i=lid; i<NB_III; i+=WI_NB_REDUCE)
+    {
+      my_minmax[L_MIN_X] = (my_minmax[L_MIN_X] < minmax_buffer[12*i + L_MIN_X] ) ? my_minmax[L_MIN_X] : minmax_buffer[12*i + L_MIN_X];
+      my_minmax[L_MAX_X] = (my_minmax[L_MAX_X] > minmax_buffer[12*i + L_MAX_X]) ? my_minmax[L_MAX_X] : minmax_buffer[12*i + L_MAX_X];
+      my_minmax[L_MIN_Y] = (my_minmax[L_MIN_Y] < minmax_buffer[12*i + L_MIN_Y]) ? my_minmax[L_MIN_Y] : minmax_buffer[12*i + L_MIN_Y];
+      my_minmax[L_MAX_Y] = (my_minmax[L_MAX_Y] > minmax_buffer[12*i + L_MAX_Y]) ? my_minmax[L_MAX_Y] : minmax_buffer[12*i + L_MAX_Y];
+      my_minmax[L_MIN_Z] = (my_minmax[L_MIN_Z] < minmax_buffer[12*i + L_MIN_Z]) ? my_minmax[L_MIN_Z] : minmax_buffer[12*i + L_MIN_Z];
+      my_minmax[L_MAX_Z] = (my_minmax[L_MAX_Z] > minmax_buffer[12*i + L_MAX_Z]) ? my_minmax[L_MAX_Z] : minmax_buffer[12*i + L_MAX_Z];
+
+      my_minmax[R_MIN_X] = (my_minmax[R_MIN_X] < minmax_buffer[12*i + R_MIN_X]) ? my_minmax[R_MIN_X] : minmax_buffer[12*i + R_MIN_X];
+      my_minmax[R_MAX_X] = (my_minmax[R_MAX_X] > minmax_buffer[12*i + R_MAX_X]) ? my_minmax[R_MAX_X] : minmax_buffer[12*i + R_MAX_X];
+      my_minmax[R_MIN_Y] = (my_minmax[R_MIN_Y] < minmax_buffer[12*i + R_MIN_Y]) ? my_minmax[R_MIN_Y] : minmax_buffer[12*i + R_MIN_Y];
+      my_minmax[R_MAX_Y] = (my_minmax[R_MAX_Y] > minmax_buffer[12*i + R_MAX_Y]) ? my_minmax[R_MAX_Y] : minmax_buffer[12*i + R_MAX_Y];
+      my_minmax[R_MIN_Z] = (my_minmax[R_MIN_Z] < minmax_buffer[12*i + R_MIN_Z]) ? my_minmax[R_MIN_Z] : minmax_buffer[12*i + R_MIN_Z];
+      my_minmax[R_MAX_Z] = (my_minmax[R_MAX_Z] > minmax_buffer[12*i + R_MAX_Z]) ? my_minmax[R_MAX_Z] : minmax_buffer[12*i + R_MAX_Z];
+    }
+
+  for(i=0;i<12;i++)
+    minmax[lid*12+i] = my_minmax[i];
+
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  reduce_local(minmax, lid);
+
+  for(i=0;i<12;i++)
+    minmax_global[i] = minmax[i];
+}
+
+
+/**
+ * Function to reduce in local memory of minmax values. The array must contains one value (12 integers) per work-items of a single work-group.
+ * At each step of the reduction, the last half part of the non-reduced arrays is compaired and reduced into the first half part and the non reduced array becomes the first half part.
+ *
+ * @param minmax : array of minmax values (12 integers each).
+ * @param lid : local index of the work-item.
+ */
+void reduce_local(__local int* minmax, int lid)
+{
+  int offset;
+  for(offset=WI_NB_REDUCE/2; offset>0; offset=offset/2)
+    {
+      if(lid < offset)
+  	{
+  	  minmax[lid*12 + L_MIN_X] = (minmax[lid*12 + L_MIN_X] < minmax[(lid+offset)*12 + L_MIN_X]) ? minmax[lid*12 + L_MIN_X] : minmax[(lid+offset)*12 + L_MIN_X];
+  	  minmax[lid*12 + R_MIN_X] = (minmax[lid*12 + R_MIN_X] < minmax[(lid+offset)*12 + R_MIN_X]) ? minmax[lid*12 + R_MIN_X] : minmax[(lid+offset)*12 + R_MIN_X];
+  	  minmax[lid*12 + L_MIN_Y] = (minmax[lid*12 + L_MIN_Y] < minmax[(lid+offset)*12 + L_MIN_Y]) ? minmax[lid*12 + L_MIN_Y] : minmax[(lid+offset)*12 + L_MIN_Y];
+  	  minmax[lid*12 + R_MIN_Y] = (minmax[lid*12 + R_MIN_Y] < minmax[(lid+offset)*12 + R_MIN_Y]) ? minmax[lid*12 + R_MIN_Y] : minmax[(lid+offset)*12 + R_MIN_Y];
+  	  minmax[lid*12 + L_MIN_Z] = (minmax[lid*12 + L_MIN_Z] < minmax[(lid+offset)*12 + L_MIN_Z]) ? minmax[lid*12 + L_MIN_Z] : minmax[(lid+offset)*12 + L_MIN_Z];
+  	  minmax[lid*12 + R_MIN_Z] = (minmax[lid*12 + R_MIN_Z] < minmax[(lid+offset)*12 + R_MIN_Z]) ? minmax[lid*12 + R_MIN_Z] : minmax[(lid+offset)*12 + R_MIN_Z];
+
+  	  minmax[lid*12 + L_MAX_X] = (minmax[lid*12 + L_MAX_X] > minmax[(lid+offset)*12 + L_MAX_X]) ? minmax[lid*12 + L_MAX_X] : minmax[(lid+offset)*12 + L_MAX_X];
+  	  minmax[lid*12 + R_MAX_X] = (minmax[lid*12 + R_MAX_X] > minmax[(lid+offset)*12 + R_MAX_X]) ? minmax[lid*12 + R_MAX_X] : minmax[(lid+offset)*12 + R_MAX_X];
+  	  minmax[lid*12 + L_MAX_Y] = (minmax[lid*12 + L_MAX_Y] > minmax[(lid+offset)*12 + L_MAX_Y]) ? minmax[lid*12 + L_MAX_Y] : minmax[(lid+offset)*12 + L_MAX_Y];
+  	  minmax[lid*12 + R_MAX_Y] = (minmax[lid*12 + R_MAX_Y] > minmax[(lid+offset)*12 + R_MAX_Y]) ? minmax[lid*12 + R_MAX_Y] : minmax[(lid+offset)*12 + R_MAX_Y];
+  	  minmax[lid*12 + L_MAX_Z] = (minmax[lid*12 + L_MAX_Z] > minmax[(lid+offset)*12 + L_MAX_Z]) ? minmax[lid*12 + L_MAX_Z] : minmax[(lid+offset)*12 + L_MAX_Z];
+  	  minmax[lid*12 + R_MAX_Z] = (minmax[lid*12 + R_MAX_Z] > minmax[(lid+offset)*12 + R_MAX_Z]) ? minmax[lid*12 + R_MAX_Z] : minmax[(lid+offset)*12 + R_MAX_Z];
+  	}
+      barrier(CLK_LOCAL_MEM_FENCE);
+    }
+}
diff --git a/HySoP/hysop/gpu/cl_src/kernels/remeshing.cl b/HySoP/hysop/gpu/cl_src/kernels/remeshing.cl
index bf2cf7d2f..1bd8fb5b8 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/remeshing.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/remeshing.cl
@@ -53,11 +53,11 @@ __kernel void remeshing_kernel(__global const float* ppos,
   for(i=gidX*PART_NB_PER_WI; i<(gidX + 1)*PART_NB_PER_WI; i+=__N__)
     {
       /* Read particle position */
-      p = vload__N__((i + line_index)/__N__, ppos) - (float__N__)(min_position);
+      p = vload__N__((i + line_index)/__N__, ppos);
       /* Read particle scalar */
       __RCOMP_Is__ID__ = vload__N__((i + line_index)/__N__, pscal__ID__);
       /* Remesh particle */
-      remesh(i, dx, invdx, __RCOMP_Ps__ID__, p, __RCOMP_Pgscal_loc__ID__);
+      remesh(i, dx, invdx, __RCOMP_Ps__ID__, p, min_position, __RCOMP_Pgscal_loc__ID__);
     }
 
   /* Synchronize work-group */
diff --git a/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl b/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl
index faa18986c..0ec701775 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/remeshing_noVec.cl
@@ -53,11 +53,11 @@ __kernel void remeshing_kernel(__global const float* ppos,
   for(i=gidX*PART_NB_PER_WI; i<(gidX + 1)*PART_NB_PER_WI; i+=1)
     {
       /* Read particle position */
-      p = ppos[i + line_index] - min_position;
+      p = ppos[i + line_index];
       /* Read particle scalar */
       __RCOMP_Is__ID__ = pscal__ID__[i + line_index];
       /* Remesh particle */
-      remesh(i, dx, invdx, __RCOMP_Ps__ID__, p, __RCOMP_Pgscal_loc__ID__);
+      remesh(i, dx, invdx, __RCOMP_Ps__ID__, p, min_position, __RCOMP_Pgscal_loc__ID__);
     }
 
   /* Synchronize work-group */
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic.cl
index 71cf2163d..394cb1d23 100644
--- a/HySoP/hysop/gpu/cl_src/remeshing/basic.cl
+++ b/HySoP/hysop/gpu/cl_src/remeshing/basic.cl
@@ -3,7 +3,7 @@
  * Remeshing function, vectorized version.
  */
 
-void remesh(uint i, float dx, float invdx, __RCOMP_P float__N__ s__ID__, float__N__ p, __RCOMP_P__local float* gscal_loc__ID__);
+void remesh(uint i, float dx, float invdx, __RCOMP_P float__N__ s__ID__, float__N__ p, float min_position, __RCOMP_P__local float* gscal_loc__ID__);
 
 
 /**
@@ -33,12 +33,15 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float__N__ s__ID__, float__
 void remesh(uint i, float dx, float invdx,
 	    __RCOMP_P float__N__ s__ID__,
 	    float__N__ p,
+	    float min_position,
 	    __RCOMP_P__local float* gscal_loc__ID__){
   float__N__ y;			/* Normalized distance to nearest left grid point */
   int__N__ ind;			/* Integer coordinate */
   uint__N__ index;		/* Remeshing index */
   float w__NN__;
 
+  p = p - min_position;
+
   ind = convert_int__N___rtn(p * invdx);
   y = (p - convert_float__N__(ind) * dx) * invdx;
 
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl
index 61b3cf0b9..02f6562e5 100644
--- a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl
@@ -3,7 +3,7 @@
  * Remeshing function, vectorized version.
  */
 
-void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, __RCOMP_P__local float* gscal_loc__ID__);
+void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, float min_position, __RCOMP_P__local float* gscal_loc__ID__);
 
 
 /**
@@ -33,13 +33,14 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, __R
 void remesh(uint i, float dx, float invdx,
 	    __RCOMP_P float s__ID__,
 	    float p,
+	    float min_position,
 	    __RCOMP_P__local float* gscal_loc__ID__){
   float y;			/* Normalized distance to nearest left grid point */
   int ind;			/* Integer coordinate */
   uint index;		/* Remeshing index */
   float w;
 
-
+  p = p  - min_position;
 
   ind = convert_int_rtn(p * invdx);
   y = (p - convert_float(ind) * dx) * invdx;
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl
deleted file mode 100644
index abb672668..000000000
--- a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl
+++ /dev/null
@@ -1,111 +0,0 @@
-/**
- * @file basic_noVec_vector_2d.cl
- * Remeshing function, vectorized version for 2D vector remeshing.
- */
-
-void remesh(uint i, float dx, float invdx,
-	    float v_X, float v_Y,
-	    float p,
-	    __local float* gvec_X_loc, __local float* gvec_Y_loc);
-
-
-/**
- * Remesh particles in local buffer.
- *
- * Remeshing formula is given a compiling time.
- * Use of builtin OpenCL functions fma and mix. Computations through OpenCL vector types.
- *
- * @param i Particle index
- * @param dx Space step
- * @param invdx 1/dx
- * @param s Particle scalar
- * @param p Particle position
- * @param gscal_loc Local buffer for result
- *
- * @remark <code>NB_I</code>, <code>NB_II</code>, <code>NB_III</code> : points number in directions from 1st varying index to last.
- * @remark <code>__N__</code> is expanded at compilation time by vector width.
- * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
- * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>}
- * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>)
- * @see parmepy.gpu.tools.parse_file
- * @see parmepy.gpu.cl_src.common
- */
-void remesh(uint i, float dx, float invdx,
-	    float v_X, float v_Y,
-	    float p,
-	    __local float* gvec_X_loc, __local float* gvec_Y_loc){
-  float y;			/* Normalized distance to nearest left grid point */
-  int ind;			/* Integer coordinate */
-  uint index;		/* Remeshing index */
-  float w;
-
-  ind = convert_int_rtn(p * invdx);
-  y = (p - convert_float(ind) * dx) * invdx;
-
-  index = convert_uint((ind - REMESH_SHIFT + NB_I) % NB_I);
-
-  w = REMESH(alpha)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(beta)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(gamma)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(delta)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-#if REMESH_SHIFT > 1
-  index = (index + 1) % NB_I;
-  w = REMESH(eta)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(zeta)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 2
-  index = (index + 1) % NB_I;
-  w = REMESH(theta)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(iota)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 3
-  index = (index + 1) % NB_I;
-  w = REMESH(kappa)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(mu)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-}
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl
deleted file mode 100644
index c912769d9..000000000
--- a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- * @file basic_noVec_vector_3d.cl
- * Remeshing function, vectorized version for 3D vector remeshing.
- */
-
-void remesh(uint i, float dx, float invdx,
-	    float v_X, float v_Y, float v_Z,
-	    float p,
-	    __local float* gvec_X_loc, __local float* gvec_Y_loc, __local float* gvec_Z_loc);
-
-
-/**
- * Remesh particles in local buffer.
- *
- * Remeshing formula is given a compiling time.
- * Use of builtin OpenCL functions fma and mix. Computations through OpenCL vector types.
- *
- * @param i Particle index
- * @param dx Space step
- * @param invdx 1/dx
- * @param s Particle scalar
- * @param p Particle position
- * @param gscal_loc Local buffer for result
- *
- * @remark <code>NB_I</code>, <code>NB_II</code>, <code>NB_III</code> : points number in directions from 1st varying index to last.
- * @remark <code>__N__</code> is expanded at compilation time by vector width.
- * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
- * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>}
- * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>)
- * @see parmepy.gpu.tools.parse_file
- * @see parmepy.gpu.cl_src.common
- */
-void remesh(uint i, float dx, float invdx,
-	    float v_X, float v_Y, float v_Z,
-	    float p,
-	    __local float* gvec_X_loc, __local float* gvec_Y_loc, __local float* gvec_Z_loc){
-  float y;			/* Normalized distance to nearest left grid point */
-  int ind;			/* Integer coordinate */
-  uint index;		/* Remeshing index */
-  float w;
-
-  ind = convert_int_rtn(p * invdx);
-  y = (p - convert_float(ind) * dx) * invdx;
-
-  index = convert_uint((ind - REMESH_SHIFT + NB_I) % NB_I);
-
-  w = REMESH(alpha)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  gvec_Z_loc[noBC_id(index)] += (w * v_Z);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(beta)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  gvec_Z_loc[noBC_id(index)] += (w * v_Z);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(gamma)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  gvec_Z_loc[noBC_id(index)] += (w * v_Z);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(delta)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  gvec_Z_loc[noBC_id(index)] += (w * v_Z);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-#if REMESH_SHIFT > 1
-  index = (index + 1) % NB_I;
-  w = REMESH(eta)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  gvec_Z_loc[noBC_id(index)] += (w * v_Z);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(zeta)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  gvec_Z_loc[noBC_id(index)] += (w * v_Z);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 2
-  index = (index + 1) % NB_I;
-  w = REMESH(theta)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  gvec_Z_loc[noBC_id(index)] += (w * v_Z);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(iota)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  gvec_Z_loc[noBC_id(index)] += (w * v_Z);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 3
-  index = (index + 1) % NB_I;
-  w = REMESH(kappa)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  gvec_Z_loc[noBC_id(index)] += (w * v_Z);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(mu)(y);
-  gvec_X_loc[noBC_id(index)] += (w * v_X);
-  gvec_Y_loc[noBC_id(index)] += (w * v_Y);
-  gvec_Z_loc[noBC_id(index)] += (w * v_Z);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-}
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl
deleted file mode 100644
index da8d9234b..000000000
--- a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl
+++ /dev/null
@@ -1,111 +0,0 @@
-/**
- * @file basic_vector_2d.cl
- * Remeshing function, vectorized version for vector remeshing in 2D.
- */
-
-void remesh(uint i, float dx, float invdx,
-	    float__N__ v_X, float__N__ v_Y,
-	    float__N__ p,
-	    __local float* gvec_X_loc, __local float* gvec_Y_loc);
-
-
-/**
- * Remesh particles in local buffer.
- *
- * Remeshing formula is given a compiling time.
- * Use of builtin OpenCL functions fma and mix. Computations through OpenCL vector types.
- *
- * @param i Particle index
- * @param dx Space step
- * @param invdx 1/dx
- * @param s Particle scalar
- * @param p Particle position
- * @param gscal_loc Local buffer for result
- *
- * @remark <code>NB_I</code>, <code>NB_II</code>, <code>NB_III</code> : points number in directions from 1st varying index to last.
- * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>}
- * @remark <code>__N__</code> is expanded at compilation time by vector width.
- * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
- * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>)
- * @see parmepy.gpu.tools.parse_file
- * @see parmepy.gpu.cl_src.common
- */
-void remesh(uint i, float dx, float invdx,
-	    float__N__ v_X, float__N__ v_Y,
-	    float__N__ p,
-	    __local float* gvec_X_loc,  __local float* gvec_Y_loc){
-  float__N__ y;			/* Normalized distance to nearest left grid point */
-  int__N__ ind;			/* Integer coordinate */
-  uint__N__ index;		/* Remeshing index */
-  float w__NN__;
-
-  ind = convert_int__N___rtn(p * invdx);
-  y = (p - convert_float__N__(ind) * dx) * invdx;
-
-  index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I);
-
-  w__NN__ = REMESH(alpha)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(beta)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(gamma)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(delta)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-#if REMESH_SHIFT > 1
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(eta)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(zeta)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 2
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(theta)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(iota)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 3
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(kappa)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(mu)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-}
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl
deleted file mode 100644
index ed3f4a397..000000000
--- a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- * @file basic_vector_3d.cl
- * Remeshing function, vectorized version for vector remeshing in 3D.
- */
-
-void remesh(uint i, float dx, float invdx,
-	    float__N__ v_X, float__N__ v_Y, float__N__ v_Z,
-	    float__N__ p,
-	    __local float* gvec_X_loc, __local float* gvec_Y_loc,  __local float* gvec_Z_loc);
-
-
-/**
- * Remesh particles in local buffer.
- *
- * Remeshing formula is given a compiling time.
- * Use of builtin OpenCL functions fma and mix. Computations through OpenCL vector types.
- *
- * @param i Particle index
- * @param dx Space step
- * @param invdx 1/dx
- * @param s Particle scalar
- * @param p Particle position
- * @param gscal_loc Local buffer for result
- *
- * @remark <code>NB_I</code>, <code>NB_II</code>, <code>NB_III</code> : points number in directions from 1st varying index to last.
- * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>}
- * @remark <code>__N__</code> is expanded at compilation time by vector width.
- * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
- * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>)
- * @see parmepy.gpu.tools.parse_file
- * @see parmepy.gpu.cl_src.common
- */
-void remesh(uint i, float dx, float invdx,
-	    float__N__ v_X, float__N__ v_Y,float__N__ v_Z,
-	    float__N__ p,
-	    __local float* gvec_X_loc,  __local float* gvec_Y_loc, __local float* gvec_Z_loc){
-  float__N__ y;			/* Normalized distance to nearest left grid point */
-  int__N__ ind;			/* Integer coordinate */
-  uint__N__ index;		/* Remeshing index */
-  float w__NN__;
-
-  ind = convert_int__N___rtn(p * invdx);
-  y = (p - convert_float__N__(ind) * dx) * invdx;
-
-  index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I);
-
-  w__NN__ = REMESH(alpha)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(beta)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(gamma)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(delta)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-#if REMESH_SHIFT > 1
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(eta)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(zeta)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 2
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(theta)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(iota)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 3
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(kappa)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w__NN__ = REMESH(mu)(y.s__NN__);
-  gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__);
-  gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__);
-  gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__);
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-}
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/comm_basic_noVec.cl b/HySoP/hysop/gpu/cl_src/remeshing/comm_basic_noVec.cl
new file mode 100644
index 000000000..3f1519f41
--- /dev/null
+++ b/HySoP/hysop/gpu/cl_src/remeshing/comm_basic_noVec.cl
@@ -0,0 +1,124 @@
+/**
+ * @file remeshing/comm_basic_noVec.cl
+ * Remeshing function, vectorized version.
+ */
+
+void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, float min_position, __RCOMP_P__local float* gscal_loc__ID__);
+
+
+/**
+ * Remesh particles in local buffer.
+ *
+ * Remeshing formula is given a compiling time.
+ * Use of builtin OpenCL functions fma and mix. Computations through OpenCL vector types.
+ *
+ * @param i Particle index
+ * @param dx Space step
+ * @param invdx 1/dx
+ * @param s Particle scalar
+ * @param p Particle position
+ * @param gscal_loc Local buffer for result
+ *
+ * @remark <code>NB_I</code>, <code>NB_II</code>, <code>NB_III</code> : points number in directions from 1st varying index to last.
+ * @remark T_NB_I: global points number in the 1st direction (mpi cutted direction)
+ * @remark START_INDEX Global staring index for computational points
+ * @remark <code>__N__</code> is expanded at compilation time by vector width.
+ * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
+ * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>}
+ * @remark <code>__RCOMP_I</code> flag is for instruction expansion for the different remeshed components.
+ * @remark <code>__RCOMP_P</code> flag is for function parameter expansion for the different remeshed components.
+ * @remark <code>__ID__</code> is replaced by the remeshed component id in an expansion.
+ * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>)
+ * @see parmepy.gpu.tools.parse_file
+ * @see parmepy.gpu.cl_src.common
+ */
+void remesh(uint i, float dx, float invdx,
+	    __RCOMP_P float s__ID__,
+	    float p,
+	    float min_position,
+	    __RCOMP_P__local float* gscal_loc__ID__){
+  float y;			/* Normalized distance to nearest left grid point */
+  int ind;			/* Integer coordinate */
+  int index;		/* Remeshing index */
+  float w;
+
+  ind = convert_int_rtn(p * invdx);
+  y = (p - convert_float(ind) * dx) * invdx;
+
+  index = ((ind - REMESH_SHIFT + T_NB_I) % T_NB_I) - START_INDEX;
+
+  if (index>=0 && index < NB_I){
+    w = REMESH(alpha)(y);
+    __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  index = index + 1;
+  if (index>=0 && index < NB_I){
+    w = REMESH(beta)(y);
+    __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  index = index + 1;
+  if (index>=0 && index < NB_I){
+    w = REMESH(gamma)(y);
+    __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  index = index + 1;
+  if (index>=0 && index < NB_I){
+    w = REMESH(delta)(y);
+    __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+#if REMESH_SHIFT > 1
+  index = index + 1;
+  if (index>=0 && index < NB_I){
+    w = REMESH(eta)(y);
+    __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  index = index + 1;
+  if (index>=0 && index < NB_I){
+    w = REMESH(zeta)(y);
+    __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+#if REMESH_SHIFT > 2
+  index = index + 1;
+  if (index>=0 && index < NB_I){
+    w = REMESH(theta)(y);
+    __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  index = index + 1;
+  if (index>=0 && index < NB_I){
+    w = REMESH(iota)(y);
+    __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+
+#if REMESH_SHIFT > 3
+  index = index + 1;
+  if (index>=0 && index < NB_I){
+    w = REMESH(kappa)(y);
+    __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  index = index + 1;
+  if (index>=0 && index < NB_I){
+    w = REMESH(mu)(y);
+    __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__);
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+}
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private.cl b/HySoP/hysop/gpu/cl_src/remeshing/private.cl
index bce6c1790..89b4befa4 100644
--- a/HySoP/hysop/gpu/cl_src/remeshing/private.cl
+++ b/HySoP/hysop/gpu/cl_src/remeshing/private.cl
@@ -3,7 +3,7 @@
  * Remeshing function, vectorized, private variable.
  */
 
-void remesh(uint i, float dx, float invdx, __RCOMP_P float__N__ s__ID__, float__N__ p, __RCOMP_P__local float* gscal_loc__ID__);
+void remesh(uint i, float dx, float invdx, __RCOMP_P float__N__ s__ID__, float__N__ p, float min_position, __RCOMP_P__local float* gscal_loc__ID__);
 
 
 /**
@@ -34,6 +34,7 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float__N__ s__ID__, float__
 void remesh(uint i, float dx, float invdx,
 	    __RCOMP_P float__N__ s__ID__,
 	    float__N__ p,
+	    float min_position,
 	    __RCOMP_P__local float* gscal_loc__ID__){
   float__N__ y,			   /* Normalized distance to nearest left grid point */
      w;
@@ -41,6 +42,8 @@ void remesh(uint i, float dx, float invdx,
   int__N__ ind;		   	   /* Integer coordinate */
   uint__N__ index;		   /* Remeshing index */
 
+  p = p - min_position;
+
   ind = convert_int__N___rtn(p * invdx);
   y = (p - convert_float__N__(ind) * dx) * invdx;
 
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl b/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl
index e1afa6a3a..3496e8823 100644
--- a/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl
@@ -3,7 +3,7 @@
  * Remeshing function, vectorized, private variable.
  */
 
-void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, __RCOMP_P__local float* gscal_loc__ID__);
+void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, float min_position, __RCOMP_P__local float* gscal_loc__ID__);
 
 
 /**
@@ -34,6 +34,7 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, __R
 void remesh(uint i, float dx, float invdx,
 	    __RCOMP_P float s__ID__,
 	    float p,
+	    float min_position,
 	    __RCOMP_P__local float* gscal_loc__ID__){
   float y,			/* Normalized distance to nearest left grid point */
     w;			/* Temporary remeshing weights */
@@ -41,6 +42,8 @@ void remesh(uint i, float dx, float invdx,
   int ind;			/* Integer coordinate */
   uint index;		/* Remeshing index */
 
+  p = p - min_position;
+
   ind = convert_int_rtn(p * invdx);
   y = (p - convert_float(ind) * dx) * invdx;
 
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl b/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl
deleted file mode 100644
index fcb1e443c..000000000
--- a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl
+++ /dev/null
@@ -1,112 +0,0 @@
-/**
- * @file private_vector_2d.cl
- * Remeshing function, vectorized, private variable for 2D vector remeshing.
- */
-
-void remesh(uint i, float dx, float invdx,
-	    float__N__ v_X, float__N__ v_Y,
-	    float__N__ p,
-	    __local float* gvec_X_loc, __local float* gvec_Y_loc);
-
-
-/**
- * Remesh particles in local buffer.
- *
- * Remeshing formula is given a compiling time.
- * Use of builtin OpenCL functions fma and mix. Computations through OpenCL vector types.
- * Use of a private temporary variable for remeshing weights.
- *
- * @param i Particle index
- * @param dx Space step
- * @param invdx 1/dx
- * @param s Particle scalar
- * @param p Particle position
- * @param gscal_loc Local buffer for result
- *
- * @remark <code>NB_I</code>, <code>NB_II</code>, <code>NB_III</code> : points number in directions from 1st varying index to last.
- * @remark <code>__N__</code> is expanded at compilation time by vector width.
- * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
- * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>}
- * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>)
- * @see parmepy.gpu.tools.parse_file
- * @see parmepy.gpu.cl_src.common
- */
-void remesh(uint i, float dx, float invdx,
-	    float__N__ v_X, float__N__ v_Y,
-	    float__N__ p,
-	    __local float* gvec_X_loc, __local float* gvec_Y_loc){
-  float__N__ y,			/* Normalized distance to nearest left grid point */
-    w;			        /* Temporary remeshing weights */
-  int__N__ ind;			/* Integer coordinate */
-  uint__N__ index;		/* Remeshing index */
-
-  ind = convert_int__N___rtn(p * invdx);
-  y = (p - convert_float__N__(ind) * dx) * invdx;
-
-  index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I);
-
-  w = REMESH(alpha)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(beta)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(gamma)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(delta)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-#if REMESH_SHIFT > 1
-  index = (index + 1) % NB_I;
-  w = REMESH(eta)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(zeta)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 2
-  index = (index + 1) % NB_I;
-  w = REMESH(theta)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(iota)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 3
-  index = (index + 1) % NB_I;
-  w = REMESH(kappa)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(mu)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-}
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl b/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl
deleted file mode 100644
index dabd8e5d8..000000000
--- a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * @file private_vector_3d.cl
- * Remeshing function, vectorized, private variable for 3D vector remeshing.
- */
-
-void remesh(uint i, float dx, float invdx,
-	    float__N__ v_X, float__N__ v_Y,float__N__ v_Z,
-	    float__N__ p,
-	    __local float* gvec_X_loc, __local float* gvec_Y_loc, __local float* gvec_Z_loc);
-
-
-/**
- * Remesh particles in local buffer.
- *
- * Remeshing formula is given a compiling time.
- * Use of builtin OpenCL functions fma and mix. Computations through OpenCL vector types.
- * Use of a private temporary variable for remeshing weights.
- *
- * @param i Particle index
- * @param dx Space step
- * @param invdx 1/dx
- * @param s Particle scalar
- * @param p Particle position
- * @param gscal_loc Local buffer for result
- *
- * @remark <code>NB_I</code>, <code>NB_II</code>, <code>NB_III</code> : points number in directions from 1st varying index to last.
- * @remark <code>__N__</code> is expanded at compilation time by vector width.
- * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component.
- * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>}
- * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>)
- * @see parmepy.gpu.tools.parse_file
- * @see parmepy.gpu.cl_src.common
- */
-void remesh(uint i, float dx, float invdx,
-	    float__N__ v_X, float__N__ v_Y,float__N__ v_Z,
-	    float__N__ p,
-	    __local float* gvec_X_loc, __local float* gvec_Y_loc, __local float* gvec_Z_loc){
-  float__N__ y,			/* Normalized distance to nearest left grid point */
-    w;			        /* Temporary remeshing weights */
-  int__N__ ind;			/* Integer coordinate */
-  uint__N__ index;		/* Remeshing index */
-
-  ind = convert_int__N___rtn(p * invdx);
-  y = (p - convert_float__N__(ind) * dx) * invdx;
-
-  index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I);
-
-  w = REMESH(alpha)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(beta)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(gamma)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(delta)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-#if REMESH_SHIFT > 1
-  index = (index + 1) % NB_I;
-  w = REMESH(eta)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(zeta)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 2
-  index = (index + 1) % NB_I;
-  w = REMESH(theta)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(iota)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-
-#if REMESH_SHIFT > 3
-  index = (index + 1) % NB_I;
-  w = REMESH(kappa)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-
-  index = (index + 1) % NB_I;
-  w = REMESH(mu)(y);
-  gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__;
-  gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__;
-  gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__;
-  barrier(CLK_LOCAL_MEM_FENCE);
-#endif
-}
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights.cl
index 0b102c369..d101fffed 100644
--- a/HySoP/hysop/gpu/cl_src/remeshing/weights.cl
+++ b/HySoP/hysop/gpu/cl_src/remeshing/weights.cl
@@ -5,197 +5,197 @@
  */
 
 inline float__N__ alpha_l2_1(float__N__ y){
-  return ((y * (y * (-y + 2.0) - 1.0)) / 2.0);}
+  return ((y * (y * (-y + 2.0) - 1.0)) * 0.5);}
 inline float__N__ beta_l2_1(float__N__ y){
-  return ((y * y * (3.0 * y - 5.0) + 2.0) / 2.0);}
+  return ((y * y * (3.0 * y - 5.0) + 2.0) * 0.5);}
 inline float__N__ gamma_l2_1(float__N__ y){
-  return ((y * (y * (-3.0 * y + 4.0) + 1.0)) / 2.0);}
+  return ((y * (y * (-3.0 * y + 4.0) + 1.0)) * 0.5);}
 inline float__N__ delta_l2_1(float__N__ y){
-  return ((y * y * (y - 1.0)) / 2.0);}
+  return ((y * y * (y - 1.0)) * 0.5);}
 
 
 inline float__N__ alpha_l2_2(float__N__ y){
-  return ((y * (y * (y * (y * (2.0 * y - 5.0) + 3.0) + 1.0) - 1.0)) / 2.0);}
+  return ((y * (y * (y * (y * (2.0 * y - 5.0) + 3.0) + 1.0) - 1.0)) * 0.5);}
 inline float__N__ beta_l2_2(float__N__ y){
-  return ((y * y * (y * (y * (-6.0 * y + 15.0) - 9.0) - 2.0) + 2.0) / 2.0);}
+  return ((y * y * (y * (y * (-6.0 * y + 15.0) - 9.0) - 2.0) + 2.0) * 0.5);}
 inline float__N__ gamma_l2_2(float__N__ y){
-  return ((y * (y * (y * (y * (6.0 * y - 15.0) + 9.0) + 1.0) + 1.0)) / 2.0);}
+  return ((y * (y * (y * (y * (6.0 * y - 15.0) + 9.0) + 1.0) + 1.0)) * 0.5);}
 inline float__N__ delta_l2_2(float__N__ y){
-  return ((y * y * y * (y * (-2.0 * y + 5.0) - 3.0)) / 2.0);}
+  return ((y * y * y * (y * (-2.0 * y + 5.0) - 3.0)) * 0.5);}
 
 
 inline float__N__ alpha_l2_3(float__N__ y){
-  return ((y * (y * (y * y * (y * (y * (-6.0 * y + 21.0) - 25.0) + 10.0) + 1.0) - 1.0)) / 2.0);}
+  return ((y * (y * (y * y * (y * (y * (-6.0 * y + 21.0) - 25.0) + 10.0) + 1.0) - 1.0)) * 0.5);}
 inline float__N__ beta_l2_3(float__N__ y){
-  return ((y * y * (y * y * (y * (y * (18.0 * y - 63.0) + 75.0) - 30.0) - 2.0) + 2.0) / 2.0);}
+  return ((y * y * (y * y * (y * (y * (18.0 * y - 63.0) + 75.0) - 30.0) - 2.0) + 2.0) * 0.5);}
 inline float__N__ gamma_l2_3(float__N__ y){
-  return ((y * (y * (y * y * (y * (y * (-18.0 * y + 63.0) - 75.0) + 30.0) + 1.0) + 1.0)) / 2.0);}
+  return ((y * (y * (y * y * (y * (y * (-18.0 * y + 63.0) - 75.0) + 30.0) + 1.0) + 1.0)) * 0.5);}
 inline float__N__ delta_l2_3(float__N__ y){
-  return ((y * y * y * y * (y * (y * (6.0 * y - 21.0) + 25.0) - 10.0)) / 2.0);}
+  return ((y * y * y * y * (y * (y * (6.0 * y - 21.0) + 25.0) - 10.0)) * 0.5);}
 
 
 inline float__N__ alpha_l2_4(float__N__ y){
-  return ((y * (y * (y * y * y * (y * (y * (y * (20.0 * y - 90.0) + 154.0) - 119.0) + 35.0) + 1.0) - 1.0)) / 2.0);}
+  return ((y * (y * (y * y * y * (y * (y * (y * (20.0 * y - 90.0) + 154.0) - 119.0) + 35.0) + 1.0) - 1.0)) * 0.5);}
 inline float__N__ beta_l2_4(float__N__ y){
-  return ((y * y * (y * y * y * (y * (y * (y * (-60.0 * y + 270.0) - 462.0) + 357.0) - 105.0) - 2.0) + 2.0) / 2.0);}
+  return ((y * y * (y * y * y * (y * (y * (y * (-60.0 * y + 270.0) - 462.0) + 357.0) - 105.0) - 2.0) + 2.0) * 0.5);}
 inline float__N__ gamma_l2_4(float__N__ y){
-  return ((y * (y * (y * y * y * (y * (y * (y * (60.0 * y - 270.0) + 462.0) - 357.0) + 105.0) + 1.0) + 1.0)) / 2.0);}
+  return ((y * (y * (y * y * y * (y * (y * (y * (60.0 * y - 270.0) + 462.0) - 357.0) + 105.0) + 1.0) + 1.0)) * 0.5);}
 inline float__N__ delta_l2_4(float__N__ y){
-  return ((y * y * y * y * y * (y * (y * (y * (-20.0 * y + 90.0) - 154.0) + 119.0) - 35.0)) / 2.0);}
+  return ((y * y * y * y * y * (y * (y * (y * (-20.0 * y + 90.0) - 154.0) + 119.0) - 35.0)) * 0.5);}
 
 
 inline float__N__ alpha_l4_2(float__N__ y){
-  return ((y * (y * (y * (y * (-5.0 * y + 13.0) - 9.0) - 1.0) + 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (-5.0 * y + 13.0) - 9.0) - 1.0) + 2.0)) * 0.041666666666666664);}
 inline float__N__ beta_l4_2(float__N__ y){
-  return ((y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0)) * 0.041666666666666664);}
 inline float__N__ gamma_l4_2(float__N__ y){
-  return ((y * y * (y * (y * (-50.0 * y + 126.0) - 70.0) - 30.0) + 24.0) / 24.0);}
+  return ((y * y * (y * (y * (-50.0 * y + 126.0) - 70.0) - 30.0) + 24.0) * 0.041666666666666664);}
 inline float__N__ delta_l4_2(float__N__ y){
-  return ((y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0)) * 0.041666666666666664);}
 inline float__N__ eta_l4_2(float__N__ y){
-  return ((y * (y * (y * (y * (-25.0 * y + 61.0) - 33.0) - 1.0) - 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (-25.0 * y + 61.0) - 33.0) - 1.0) - 2.0)) * 0.041666666666666664);}
 inline float__N__ zeta_l4_2(float__N__ y){
-  return ((y * y * y * (y * (5.0 * y - 12.0) + 7.0)) / 24.0);}
+  return ((y * y * y * (y * (5.0 * y - 12.0) + 7.0)) * 0.041666666666666664);}
 
 
 inline float__N__ alpha_l4_3(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (14.0 * y - 49.0) + 58.0) - 22.0) - 2.0) - 1.0) + 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (14.0 * y - 49.0) + 58.0) - 22.0) - 2.0) - 1.0) + 2.0)) * 0.041666666666666664);}
 inline float__N__ beta_l4_3(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (-70.0 * y + 245.0) - 290.0) + 111.0) + 4.0) + 16.0) - 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (-70.0 * y + 245.0) - 290.0) + 111.0) + 4.0) + 16.0) - 16.0)) * 0.041666666666666664);}
 inline float__N__ gamma_l4_3(float__N__ y){
-  return ((y * y * (y * y * (y * (y * (140.0 * y - 490.0) + 580.0) - 224.0) - 30.0) + 24.0) / 24.0);}
+  return ((y * y * (y * y * (y * (y * (140.0 * y - 490.0) + 580.0) - 224.0) - 30.0) + 24.0) * 0.041666666666666664);}
 inline float__N__ delta_l4_3(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (-140.0 * y + 490.0) - 580.0) + 226.0) - 4.0) + 16.0) + 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (-140.0 * y + 490.0) - 580.0) + 226.0) - 4.0) + 16.0) + 16.0)) * 0.041666666666666664);}
 inline float__N__ eta_l4_3(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (70.0 * y - 245.0) + 290.0) - 114.0) + 2.0) - 1.0) - 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (70.0 * y - 245.0) + 290.0) - 114.0) + 2.0) - 1.0) - 2.0)) * 0.041666666666666664);}
 inline float__N__ zeta_l4_3(float__N__ y){
-  return ((y * y * y * y * (y * (y * (-14.0 * y + 49.0) - 58.0) + 23.0)) / 24.0);}
+  return ((y * y * y * y * (y * (y * (-14.0 * y + 49.0) - 58.0) + 23.0)) * 0.041666666666666664);}
 
 
 inline float__N__ alpha_l4_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-46.0 * y + 207.0) - 354.0) + 273.0) - 80.0) + 1.0) - 2.0) - 1.0) + 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-46.0 * y + 207.0) - 354.0) + 273.0) - 80.0) + 1.0) - 2.0) - 1.0) + 2.0)) * 0.041666666666666664);}
 inline float__N__ beta_l4_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (230.0 * y - 1035.0) + 1770.0) - 1365.0) + 400.0) - 4.0) + 4.0) + 16.0) - 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (230.0 * y - 1035.0) + 1770.0) - 1365.0) + 400.0) - 4.0) + 4.0) + 16.0) - 16.0)) * 0.041666666666666664);}
 inline float__N__ gamma_l4_4(float__N__ y){
-  return ((y * y * (y * y * (y * (y * (y * (y * (-460.0 * y + 2070.0) - 3540.0) + 2730.0) - 800.0) + 6.0) - 30.0) + 24.0) / 24.0);}
+  return ((y * y * (y * y * (y * (y * (y * (y * (-460.0 * y + 2070.0) - 3540.0) + 2730.0) - 800.0) + 6.0) - 30.0) + 24.0) * 0.041666666666666664);}
 inline float__N__ delta_l4_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (460.0 * y - 2070.0) + 3540.0) - 2730.0) + 800.0) - 4.0) - 4.0) + 16.0) + 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (460.0 * y - 2070.0) + 3540.0) - 2730.0) + 800.0) - 4.0) - 4.0) + 16.0) + 16.0)) * 0.041666666666666664);}
 inline float__N__ eta_l4_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-230.0 * y + 1035.0) - 1770.0) + 1365.0) - 400.0) + 1.0) + 2.0) - 1.0) - 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-230.0 * y + 1035.0) - 1770.0) + 1365.0) - 400.0) + 1.0) + 2.0) - 1.0) - 2.0)) * 0.041666666666666664);}
 inline float__N__ zeta_l4_4(float__N__ y){
-  return ((y * y * y * y * y * (y * (y * (y * (46.0 * y - 207.0) + 354.0) - 273.0) + 80.0)) / 24.0);}
+  return ((y * y * y * y * y * (y * (y * (y * (46.0 * y - 207.0) + 354.0) - 273.0) + 80.0)) * 0.041666666666666664);}
 
 
 inline float__N__ alpha_M8p(float__N__ y){
-  return ((y*(y*(y*(y*(y*(y*(-10.0*y + 21.0) + 28.0) - 105.0) + 70.0) + 35.0) - 56.0) + 17.0) / 3360.0);}
+  return ((y*(y*(y*(y*(y*(y*(-10.0*y + 21.0) + 28.0) - 105.0) + 70.0) + 35.0) - 56.0) + 17.0) * 0.00029761904761904765);}
 inline float__N__ beta_M8p(float__N__ y){
-  return ((y*(y*(y*(y*(y*(y*(70.0*y - 175.0) - 140.0) + 770.0) - 560.0) - 350.0) + 504.0) - 102.0) / 3360.0);}
+  return ((y*(y*(y*(y*(y*(y*(70.0*y - 175.0) - 140.0) + 770.0) - 560.0) - 350.0) + 504.0) - 102.0) * 0.00029761904761904765);}
 inline float__N__ gamma_M8p(float__N__ y){
-  return ((y*(y*(y*(y*(y*(y*(-210.0*y + 609.0) + 224.0) - 2135.0) + 910.0) + 2765.0) - 2520.0) + 255.0) / 3360.0);}
+  return ((y*(y*(y*(y*(y*(y*(-210.0*y + 609.0) + 224.0) - 2135.0) + 910.0) + 2765.0) - 2520.0) + 255.0) * 0.00029761904761904765);}
 inline float__N__ delta_M8p(float__N__ y){
-  return ((y*y* (y*y* (y*y* (70.0*y - 231.0) + 588.0) - 980.0) + 604.0) / 672.0);}
+  return ((y*y* (y*y* (y*y* (70.0*y - 231.0) + 588.0) - 980.0) + 604.0) * 0.001488095238095238);}
 inline float__N__ eta_M8p(float__N__ y){
-  return ((y*(y*(y*(y*(y*(y*(-70.0*y+ 259.0) - 84.0) - 427.0) - 182.0)+ 553.0) + 504.0)+ 51.0) / 672.0);}
+  return ((y*(y*(y*(y*(y*(y*(-70.0*y+ 259.0) - 84.0) - 427.0) - 182.0)+ 553.0) + 504.0)+ 51.0) * 0.001488095238095238);}
 inline float__N__ zeta_M8p(float__N__ y){
-  return ((y*(y*(y*(y*(y*(y*(210.0*y- 861.0) + 532.0) + 770.0) + 560.0) - 350.0) - 504.0) - 102.0) / 3360.0);}
+  return ((y*(y*(y*(y*(y*(y*(210.0*y- 861.0) + 532.0) + 770.0) + 560.0) - 350.0) - 504.0) - 102.0) * 0.00029761904761904765);}
 inline float__N__ theta_M8p(float__N__ y){
-  return ((y* (y* (y* (y* (y* (y* (-70.0* y+ 315.0) -280.0) -105.0) -70.0) +35.0)+ 56.0) +17.0) / 3360.0);}
+  return ((y* (y* (y* (y* (y* (y* (-70.0* y+ 315.0) -280.0) -105.0) -70.0) +35.0)+ 56.0) +17.0) * 0.00029761904761904765);}
 inline float__N__ iota_M8p(float__N__ y){
-  return ((y * y * y * y * y * (y * (10.0 * y - 49.0) + 56.0)) / 3360.0);}
+  return ((y * y * y * y * y * (y * (10.0 * y - 49.0) + 56.0)) * 0.00029761904761904765);}
 
 
 inline float__N__ alpha_l6_3(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (-89.0 * y + 312.0) - 370.0) + 140.0) + 15.0) + 4.0) - 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (-89.0 * y + 312.0) - 370.0) + 140.0) + 15.0) + 4.0) - 12.0)) * 0.001388888888888889);}
 inline float__N__ beta_l6_3(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (623.0 * y - 2183.0) + 2581.0) - 955.0) - 120.0) - 54.0) + 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (623.0 * y - 2183.0) + 2581.0) - 955.0) - 120.0) - 54.0) + 108.0)) * 0.001388888888888889);}
 inline float__N__ gamma_l6_3(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (-1869.0 * y + 6546.0) - 7722.0) + 2850.0) + 195.0) + 540.0) - 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (-1869.0 * y + 6546.0) - 7722.0) + 2850.0) + 195.0) + 540.0) - 540.0)) * 0.001388888888888889);}
 inline float__N__ delta_l6_3(float__N__ y){
-  return ((y * y * (y * y * (y * (y * (3115.0 * y - 10905.0) + 12845.0) - 4795.0) - 980.0) + 720.0) / 720.0);}
+  return ((y * y * (y * y * (y * (y * (3115.0 * y - 10905.0) + 12845.0) - 4795.0) - 980.0) + 720.0) * 0.001388888888888889);}
 inline float__N__ eta_l6_3(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (-3115.0 * y + 10900.0) - 12830.0) + 4880.0) - 195.0) + 540.0) + 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (-3115.0 * y + 10900.0) - 12830.0) + 4880.0) - 195.0) + 540.0) + 540.0)) * 0.001388888888888889);}
 inline float__N__ zeta_l6_3(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (1869.0 * y - 6537.0) + 7695.0) - 2985.0) + 120.0) - 54.0) - 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (1869.0 * y - 6537.0) + 7695.0) - 2985.0) + 120.0) - 54.0) - 108.0)) * 0.001388888888888889);}
 inline float__N__ theta_l6_3(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (-623.0 * y + 2178.0) - 2566.0) + 1010.0) - 15.0) + 4.0) + 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (-623.0 * y + 2178.0) - 2566.0) + 1010.0) - 15.0) + 4.0) + 12.0)) * 0.001388888888888889);}
 inline float__N__ iota_l6_3(float__N__ y){
-  return ((y * y * y * y * (y * (y * (89.0 * y - 311.0) + 367.0) - 145.0)) / 720.0);}
+  return ((y * y * y * y * (y * (y * (89.0 * y - 311.0) + 367.0) - 145.0)) * 0.001388888888888889);}
 
 
 inline float__N__ alpha_l6_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (290.0 * y - 1305.0) + 2231.0) - 1718.0) + 500.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (290.0 * y - 1305.0) + 2231.0) - 1718.0) + 500.0) - 5.0) + 15.0) + 4.0) - 12.0)) * 0.001388888888888889);}
 inline float__N__ beta_l6_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-2030.0 * y + 9135.0) - 15617.0) + 12027.0) - 3509.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-2030.0 * y + 9135.0) - 15617.0) + 12027.0) - 3509.0) + 60.0) - 120.0) - 54.0) + 108.0)) * 0.001388888888888889);}
 inline float__N__ gamma_l6_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (6090.0 * y - 27405.0) + 46851.0) - 36084.0) + 10548.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (6090.0 * y - 27405.0) + 46851.0) - 36084.0) + 10548.0) - 195.0) + 195.0) + 540.0) - 540.0)) * 0.001388888888888889);}
 inline float__N__ delta_l6_4(float__N__ y){
-  return ((y * y * (y * y * (y * (y * (y * (y * (-10150.0 * y + 45675.0) - 78085.0) + 60145.0) - 17605.0) + 280.0) - 980.0) + 720.0) / 720.0);}
+  return ((y * y * (y * y * (y * (y * (y * (y * (-10150.0 * y + 45675.0) - 78085.0) + 60145.0) - 17605.0) + 280.0) - 980.0) + 720.0) * 0.001388888888888889);}
 inline float__N__ eta_l6_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (10150.0 * y - 45675.0) + 78085.0) - 60150.0) + 17620.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (10150.0 * y - 45675.0) + 78085.0) - 60150.0) + 17620.0) - 195.0) - 195.0) + 540.0) + 540.0)) * 0.001388888888888889);}
 inline float__N__ zeta_l6_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-6090.0 * y + 27405.0) - 46851.0) + 36093.0) - 10575.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-6090.0 * y + 27405.0) - 46851.0) + 36093.0) - 10575.0) + 60.0) + 120.0) - 54.0) - 108.0)) * 0.001388888888888889);}
 inline float__N__ theta_l6_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (2030.0 * y - 9135.0) + 15617.0) - 12032.0) + 3524.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (2030.0 * y - 9135.0) + 15617.0) - 12032.0) + 3524.0) - 5.0) - 15.0) + 4.0) + 12.0)) * 0.001388888888888889);}
 inline float__N__ iota_l6_4(float__N__ y){
-  return ((y * y * y * y * y * (y * (y * (y * (-290.0 * y + 1305.0) - 2231.0) + 1719.0) - 503.0)) / 720.0);}
+  return ((y * y * y * y * y * (y * (y * (y * (-290.0 * y + 1305.0) - 2231.0) + 1719.0) - 503.0)) * 0.001388888888888889);}
 
 
 inline float__N__ alpha_l6_5(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-1006.0 * y + 5533.0) - 12285.0) + 13785.0) - 7829.0) + 1803.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-1006.0 * y + 5533.0) - 12285.0) + 13785.0) - 7829.0) + 1803.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) * 0.001388888888888889);}
 inline float__N__ beta_l6_5(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (7042.0 * y - 38731.0) + 85995.0) - 96495.0) + 54803.0) - 12620.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (7042.0 * y - 38731.0) + 85995.0) - 96495.0) + 54803.0) - 12620.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) * 0.001388888888888889);}
 inline float__N__ gamma_l6_5(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-21126.0 * y + 116193.0) - 257985.0) + 289485.0) - 164409.0) + 37857.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-21126.0 * y + 116193.0) - 257985.0) + 289485.0) - 164409.0) + 37857.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) * 0.001388888888888889);}
 inline float__N__ delta_l6_5(float__N__ y){
-  return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (35210.0 * y - 193655.0) + 429975.0) - 482475.0) + 274015.0) - 63090.0) + 280.0) - 980.0) + 720.0) / 720.0);}
+  return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (35210.0 * y - 193655.0) + 429975.0) - 482475.0) + 274015.0) - 63090.0) + 280.0) - 980.0) + 720.0) * 0.001388888888888889);}
 inline float__N__ eta_l6_5(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-35210.0 * y + 193655.0) - 429975.0) + 482475.0) - 274015.0) + 63085.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-35210.0 * y + 193655.0) - 429975.0) + 482475.0) - 274015.0) + 63085.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) * 0.001388888888888889);}
 inline float__N__ zeta_l6_5(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (21126.0 * y - 116193.0) + 257985.0) - 289485.0) + 164409.0) - 37848.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (21126.0 * y - 116193.0) + 257985.0) - 289485.0) + 164409.0) - 37848.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) * 0.001388888888888889);}
 inline float__N__ theta_l6_5(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-7042.0 * y + 38731.0) - 85995.0) + 96495.0) - 54803.0) + 12615.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-7042.0 * y + 38731.0) - 85995.0) + 96495.0) - 54803.0) + 12615.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) * 0.001388888888888889);}
 inline float__N__ iota_l6_5(float__N__ y){
-  return ((y * y * y * y * y * y * (y * (y * (y * (y * (1006.0 * y - 5533.0) + 12285.0) - 13785.0) + 7829.0) - 1802.0)) / 720.0);}
+  return ((y * y * y * y * y * y * (y * (y * (y * (y * (1006.0 * y - 5533.0) + 12285.0) - 13785.0) + 7829.0) - 1802.0)) * 0.001388888888888889);}
 
 
 inline float__N__ alpha_l6_6(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (3604.0 * y - 23426.0) + 63866.0) - 93577.0) + 77815.0) - 34869.0) + 6587.0) + 1.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (3604.0 * y - 23426.0) + 63866.0) - 93577.0) + 77815.0) - 34869.0) + 6587.0) + 1.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) * 0.001388888888888889);}
 inline float__N__ beta_l6_6(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-25228.0 * y + 163982.0) - 447062.0) + 655039.0) - 544705.0) + 244083.0) - 46109.0) - 6.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-25228.0 * y + 163982.0) - 447062.0) + 655039.0) - 544705.0) + 244083.0) - 46109.0) - 6.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) * 0.001388888888888889);}
 inline float__N__ gamma_l6_6(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (75684.0 * y - 491946.0) + 1341186.0) - 1965117.0) + 1634115.0) - 732249.0) + 138327.0) + 15.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (75684.0 * y - 491946.0) + 1341186.0) - 1965117.0) + 1634115.0) - 732249.0) + 138327.0) + 15.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) * 0.001388888888888889);}
 inline float__N__ delta_l6_6(float__N__ y){
-  return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (y * (y * (-126140.0 * y + 819910.0) - 2235310.0) + 3275195.0) - 2723525.0) + 1220415.0) - 230545.0) - 20.0) + 280.0) - 980.0) + 720.0) / 720.0);}
+  return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (y * (y * (-126140.0 * y + 819910.0) - 2235310.0) + 3275195.0) - 2723525.0) + 1220415.0) - 230545.0) - 20.0) + 280.0) - 980.0) + 720.0) * 0.001388888888888889);}
 inline float__N__ eta_l6_6(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (126140.0 * y - 819910.0) + 2235310.0) - 3275195.0) + 2723525.0) - 1220415.0) + 230545.0) + 15.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (126140.0 * y - 819910.0) + 2235310.0) - 3275195.0) + 2723525.0) - 1220415.0) + 230545.0) + 15.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) * 0.001388888888888889);}
 inline float__N__ zeta_l6_6(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-75684.0 * y + 491946.0) - 1341186.0) + 1965117.0) - 1634115.0) + 732249.0) - 138327.0) - 6.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-75684.0 * y + 491946.0) - 1341186.0) + 1965117.0) - 1634115.0) + 732249.0) - 138327.0) - 6.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) * 0.001388888888888889);}
 inline float__N__ theta_l6_6(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (25228.0 * y - 163982.0) + 447062.0) - 655039.0) + 544705.0) - 244083.0) + 46109.0) + 1.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (25228.0 * y - 163982.0) + 447062.0) - 655039.0) + 544705.0) - 244083.0) + 46109.0) + 1.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) * 0.001388888888888889);}
 inline float__N__ iota_l6_6(float__N__ y){
-  return ((y * y * y * y * y * y * y * (y * (y * (y * (y * (y * (-3604.0 * y + 23426.0) - 63866.0) + 93577.0) - 77815.0) + 34869.0) - 6587.0)) / 720.0);}
+  return ((y * y * y * y * y * y * y * (y * (y * (y * (y * (y * (-3604.0 * y + 23426.0) - 63866.0) + 93577.0) - 77815.0) + 34869.0) - 6587.0)) * 0.001388888888888889);}
 
 
 inline float__N__ alpha_l8_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-3569.0 * y + 16061.0) - 27454.0) + 21126.0) - 6125.0) + 49.0) - 196.0) - 36.0) + 144.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-3569.0 * y + 16061.0) - 27454.0) + 21126.0) - 6125.0) + 49.0) - 196.0) - 36.0) + 144.0)) * 2.48015873015873e-05);}
 inline float__N__ beta_l8_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (32121.0 * y - 144548.0) + 247074.0) - 190092.0) + 55125.0) - 672.0) + 2016.0) + 512.0) - 1536.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (32121.0 * y - 144548.0) + 247074.0) - 190092.0) + 55125.0) - 672.0) + 2016.0) + 512.0) - 1536.0)) * 2.48015873015873e-05);}
 inline float__N__ gamma_l8_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-128484.0 * y + 578188.0) - 988256.0) + 760312.0) - 221060.0) + 4732.0) - 9464.0) - 4032.0) + 8064.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-128484.0 * y + 578188.0) - 988256.0) + 760312.0) - 221060.0) + 4732.0) - 9464.0) - 4032.0) + 8064.0)) * 2.48015873015873e-05);}
 inline float__N__ delta_l8_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (299796.0 * y - 1349096.0) + 2305856.0) - 1774136.0) + 517580.0) - 13664.0) + 13664.0) + 32256.0) - 32256.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (299796.0 * y - 1349096.0) + 2305856.0) - 1774136.0) + 517580.0) - 13664.0) + 13664.0) + 32256.0) - 32256.0)) * 2.48015873015873e-05);}
 inline float__N__ eta_l8_4(float__N__ y){
-  return ((y * y * (y * y * (y * (y * (y * (y * (-449694.0 * y + 2023630.0) - 3458700.0) + 2661540.0) - 778806.0) + 19110.0) - 57400.0) + 40320.0) / 40320.0);}
+  return ((y * y * (y * y * (y * (y * (y * (y * (-449694.0 * y + 2023630.0) - 3458700.0) + 2661540.0) - 778806.0) + 19110.0) - 57400.0) + 40320.0) * 2.48015873015873e-05);}
 inline float__N__ zeta_l8_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (449694.0 * y - 2023616.0) + 3458644.0) - 2662016.0) + 780430.0) - 13664.0) - 13664.0) + 32256.0) + 32256.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (449694.0 * y - 2023616.0) + 3458644.0) - 2662016.0) + 780430.0) - 13664.0) - 13664.0) + 32256.0) + 32256.0)) * 2.48015873015873e-05);}
 inline float__N__ theta_l8_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-299796.0 * y + 1349068.0) - 2305744.0) + 1775032.0) - 520660.0) + 4732.0) + 9464.0) - 4032.0) - 8064.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-299796.0 * y + 1349068.0) - 2305744.0) + 1775032.0) - 520660.0) + 4732.0) + 9464.0) - 4032.0) - 8064.0)) * 2.48015873015873e-05);}
 inline float__N__ iota_l8_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (128484.0 * y - 578168.0) + 988176.0) - 760872.0) + 223020.0) - 672.0) - 2016.0) + 512.0) + 1536.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (128484.0 * y - 578168.0) + 988176.0) - 760872.0) + 223020.0) - 672.0) - 2016.0) + 512.0) + 1536.0)) * 2.48015873015873e-05);}
 inline float__N__ kappa_l8_4(float__N__ y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-32121.0 * y + 144541.0) - 247046.0) + 190246.0) - 55685.0) + 49.0) + 196.0) - 36.0) - 144.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-32121.0 * y + 144541.0) - 247046.0) + 190246.0) - 55685.0) + 49.0) + 196.0) - 36.0) - 144.0)) * 2.48015873015873e-05);}
 inline float__N__ mu_l8_4(float__N__ y){
-  return ((y * y * y * y * y * (y * (y * (y * (3569.0 * y - 16060.0) + 27450.0) - 21140.0) + 6181.0)) / 40320.0);}
+  return ((y * y * y * y * y * (y * (y * (y * (3569.0 * y - 16060.0) + 27450.0) - 21140.0) + 6181.0)) * 2.48015873015873e-05);}
 
 
 
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl
index 35f45164f..cd1827937 100644
--- a/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl
+++ b/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl
@@ -5,198 +5,195 @@
  */
 
 inline float__N__ alpha_l2_1(float__N__ y){
-  return (y*fma(y,fma(y,-1.0, 2.0), - 1.0)/2.0);}
+  return (y*fma(y,fma(y,-1.0, 2.0), - 1.0) * 0.5);}
 inline float__N__ beta_l2_1(float__N__ y){
-  return (fma(y*y, fma(y, 3.0, -5.0), 2.0) / 2.0);}
+  return (fma(y*y, fma(y, 3.0, -5.0), 2.0) * 0.5);}
 inline float__N__ gamma_l2_1(float__N__   y){
-  return ((y * fma(y , fma(-3.0, y, 4.0), 1.0)) / 2.0);}
+  return ((y * fma(y , fma(-3.0, y, 4.0), 1.0)) * 0.5);}
 inline float__N__ delta_l2_1(float__N__ y){
-  return ((y * y * fma(1.0, y, - 1.0)) / 2.0);}
+  return ((y * y * fma(1.0, y, - 1.0)) * 0.5);}
 
 
 inline float__N__ alpha_l2_2(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, 2.0, -5.0), 3.0), 1.0), -1.0)) / 2.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, 2.0, -5.0), 3.0), 1.0), -1.0)) * 0.5);}
 inline float__N__ beta_l2_2(float__N__ y){
-  return (fma(y * y, fma(y, fma(y, fma(y, -6.0, 15.0), -9.0), -2.0), 2.0) / 2.0);}
+  return (fma(y * y, fma(y, fma(y, fma(y, -6.0, 15.0), -9.0), -2.0), 2.0) * 0.5);}
 inline float__N__ gamma_l2_2(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, 6.0, -15.0), 9.0), 1.0), 1.0)) / 2.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, 6.0, -15.0), 9.0), 1.0), 1.0)) * 0.5);}
 inline float__N__ delta_l2_2(float__N__ y){
-  return ((y * y * y * fma(y, fma(y, -2.0, 5.0), -3.0)) / 2.0);}
+  return ((y * y * y * fma(y, fma(y, -2.0, 5.0), -3.0)) * 0.5);}
 
 
 inline float__N__ alpha_l2_3(float__N__ y){
-  return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -6.0, 21.0), -25.0), 10.0), 1.0), -1.0)) / 2.0);}
+  return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -6.0, 21.0), -25.0), 10.0), 1.0), -1.0)) * 0.5);}
 inline float__N__ beta_l2_3(float__N__ y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 18.0, -63.0), 75.0), -30.0), -2.0), 2.0) / 2.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 18.0, -63.0), 75.0), -30.0), -2.0), 2.0) * 0.5);}
 inline float__N__ gamma_l2_3(float__N__ y){
-  return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -18.0, 63.0), -75.0), 30.0), 1.0), 1.0)) / 2.0);}
+  return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -18.0, 63.0), -75.0), 30.0), 1.0), 1.0)) * 0.5);}
 inline float__N__ delta_l2_3(float__N__ y){
-  return ((y * y * y * y * fma(y, fma(y, fma(y, 6.0, -21.0), 25.0), -10.0)) / 2.0);}
+  return ((y * y * y * y * fma(y, fma(y, fma(y, 6.0, -21.0), 25.0), -10.0)) * 0.5);}
 
 
 inline float__N__ alpha_l2_4(float__N__ y){
-  return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 20.0, -90.0), 154.0), -119.0), 35.0), 1.0), -1.0)) / 2.0);}
+  return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 20.0, -90.0), 154.0), -119.0), 35.0), 1.0), -1.0)) * 0.5);}
 inline float__N__ beta_l2_4(float__N__ y){
-  return (fma(y * y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, -60.0, 270.0), -462.0), 357.0), -105.0), -2.0), 2.0) / 2.0);}
+  return (fma(y * y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, -60.0, 270.0), -462.0), 357.0), -105.0), -2.0), 2.0) * 0.5);}
 inline float__N__ gamma_l2_4(float__N__ y){
-  return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 60.0, -270.0), 462.0), -357.0), 105.0), 1.0), 1.0)) / 2.0);}
+  return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 60.0, -270.0), 462.0), -357.0), 105.0), 1.0), 1.0)) * 0.5);}
 inline float__N__ delta_l2_4(float__N__ y){
-  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -20.0, 90.0), -154.0), 119.0), -35.0)) / 2.0);}
+  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -20.0, 90.0), -154.0), 119.0), -35.0)) * 0.5);}
 
 
 inline float__N__ alpha_l4_2(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, -5.0, 13.0), -9.0), -1.0), 2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, -5.0, 13.0), -9.0), -1.0), 2.0)) * 0.041666666666666664);}
 inline float__N__ beta_l4_2(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, 25.0, -64.0), 39.0), 16.0), -16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, 25.0, -64.0), 39.0), 16.0), -16.0)) * 0.041666666666666664);}
 inline float__N__ gamma_l4_2(float__N__ y){
-  return (fma(y * y, fma(y, fma(y, fma(y, -50.0, 126.0), -70.0), -30.0), 24.0) / 24.0);}
+  return (fma(y * y, fma(y, fma(y, fma(y, -50.0, 126.0), -70.0), -30.0), 24.0) * 0.041666666666666664);}
 inline float__N__ delta_l4_2(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, 50.0, -124.0), 66.0), 16.0), 16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, 50.0, -124.0), 66.0), 16.0), 16.0)) * 0.041666666666666664);}
 inline float__N__ eta_l4_2(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, -25.0, 61.0), -33.0), -1.0), -2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, -25.0, 61.0), -33.0), -1.0), -2.0)) * 0.041666666666666664);}
 inline float__N__ zeta_l4_2(float__N__ y){
-  return ((y * y * y * fma(y, fma(y, 5.0, -12.0), 7.0)) / 24.0);}
+  return ((y * y * y * fma(y, fma(y, 5.0, -12.0), 7.0)) * 0.041666666666666664);}
 
 
 inline float__N__ alpha_l4_3(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 14.0, -49.0), 58.0), -22.0), -2.0), -1.0), 2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 14.0, -49.0), 58.0), -22.0), -2.0), -1.0), 2.0)) * 0.041666666666666664);}
 inline float__N__ beta_l4_3(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -70.0, 245.0), -290.0), 111.0), 4.0), 16.0), -16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -70.0, 245.0), -290.0), 111.0), 4.0), 16.0), -16.0)) * 0.041666666666666664);}
 inline float__N__ gamma_l4_3(float__N__ y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 140.0, -490.0), 580.0), -224.0), -30.0), 24.0) / 24.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 140.0, -490.0), 580.0), -224.0), -30.0), 24.0) * 0.041666666666666664);}
 inline float__N__ delta_l4_3(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -140.0, 490.0), -580.0), 226.0), -4.0), 16.0), 16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -140.0, 490.0), -580.0), 226.0), -4.0), 16.0), 16.0)) * 0.041666666666666664);}
 inline float__N__ eta_l4_3(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 70.0, -245.0), 290.0), -114.0), 2.0), -1.0), -2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 70.0, -245.0), 290.0), -114.0), 2.0), -1.0), -2.0)) * 0.041666666666666664);}
 inline float__N__ zeta_l4_3(float__N__ y){
-  return ((y * y * y * y * fma(y, fma(y, fma(y, -14.0, 49.0), -58.0), 23.0)) / 24.0);}
+  return ((y * y * y * y * fma(y, fma(y, fma(y, -14.0, 49.0), -58.0), 23.0)) * 0.041666666666666664);}
 
 
 inline float__N__ alpha_l4_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -46.0, 207.0), -354.0), 273.0), -80.0), 1.0), -2.0), -1.0), 2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -46.0, 207.0), -354.0), 273.0), -80.0), 1.0), -2.0), -1.0), 2.0)) * 0.041666666666666664);}
 inline float__N__ beta_l4_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 230.0, -1035.0), 1770.0), -1365.0), 400.0), -4.0), 4.0), 16.0), -16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 230.0, -1035.0), 1770.0), -1365.0), 400.0), -4.0), 4.0), 16.0), -16.0)) * 0.041666666666666664);}
 inline float__N__ gamma_l4_4(float__N__ y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -460.0, 2070.0), -3540.0), 2730.0), -800.0), 6.0), -30.0), 24.0) / 24.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -460.0, 2070.0), -3540.0), 2730.0), -800.0), 6.0), -30.0), 24.0) * 0.041666666666666664);}
 inline float__N__ delta_l4_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 460.0, -2070.0), 3540.0), -2730.0), 800.0), -4.0), -4.0), 16.0), 16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 460.0, -2070.0), 3540.0), -2730.0), 800.0), -4.0), -4.0), 16.0), 16.0)) * 0.041666666666666664);}
 inline float__N__ eta_l4_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -230.0, 1035.0), -1770.0), 1365.0), -400.0), 1.0), 2.0), -1.0), -2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -230.0, 1035.0), -1770.0), 1365.0), -400.0), 1.0), 2.0), -1.0), -2.0)) * 0.041666666666666664);}
 inline float__N__ zeta_l4_4(float__N__ y){
-  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 46.0, -207.0), 354.0), -273.0), 80.0)) / 24.0);}
+  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 46.0, -207.0), 354.0), -273.0), 80.0)) * 0.041666666666666664);}
 
 
 inline float__N__ alpha_M8p(float__N__ y){
-  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-10.0,y, + 21.0), + 28.0), - 105.0), + 70.0), + 35.0), - 56.0), + 17.0) / 3360.0);}
+  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-10.0,y, + 21.0), + 28.0), - 105.0), + 70.0), + 35.0), - 56.0), + 17.0) * 0.00029761904761904765);}
 inline float__N__ beta_M8p(float__N__ y){
-  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(70.0,y, - 175.0), - 140.0), + 770.0), - 560.0), - 350.0), + 504.0), - 102.0) / 3360.0);}
+  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(70.0,y, - 175.0), - 140.0), + 770.0), - 560.0), - 350.0), + 504.0), - 102.0) * 0.00029761904761904765);}
 inline float__N__ gamma_M8p(float__N__ y){
-  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-210.0,y, + 609.0), + 224.0), - 2135.0), + 910.0), + 2765.0), - 2520.0), + 255.0) / 3360.0);}
+  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-210.0,y, + 609.0), + 224.0), - 2135.0), + 910.0), + 2765.0), - 2520.0), + 255.0) * 0.00029761904761904765);}
 inline float__N__ delta_M8p(float__N__ y){
-  return (fma(y*y, fma(y*y, fma(y*y, fma(70.0,y, - 231.0), + 588.0), - 980.0), + 604.0) / 672.0);}
+  return (fma(y*y, fma(y*y, fma(y*y, fma(70.0,y, - 231.0), + 588.0), - 980.0), + 604.0) * 0.001488095238095238);}
 inline float__N__ eta_M8p(float__N__ y){
-  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-70.0,y, 259.0), - 84.0), - 427.0), - 182.0), + 553.0), + 504.0), + 51.0) / 672.0);}
+  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-70.0,y, 259.0), - 84.0), - 427.0), - 182.0), + 553.0), + 504.0), + 51.0) * 0.001488095238095238);}
 inline float__N__ zeta_M8p(float__N__ y){
-  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(210.0,y,- 861.0), + 532.0), + 770.0), + 560.0), - 350.0), - 504.0), - 102.0) / 3360.0);}
+  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(210.0,y,- 861.0), + 532.0), + 770.0), + 560.0), - 350.0), - 504.0), - 102.0) * 0.00029761904761904765);}
 inline float__N__ theta_M8p(float__N__ y){
-  return (fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(-70.0, y, 315.0), -280.0), -105.0), -70.0), 35.0), 56.0), 17.0) / 3360.0);}
+  return (fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(-70.0, y, 315.0), -280.0), -105.0), -70.0), 35.0), 56.0), 17.0) * 0.00029761904761904765);}
 inline float__N__ iota_M8p(float__N__ y){
-  return ((y * y * y * y * y * fma(y , fma(10.0 , y ,- 49.0) , 56.0)) / 3360.0);}
+  return ((y * y * y * y * y * fma(y , fma(10.0 , y ,- 49.0) , 56.0)) * 0.00029761904761904765);}
 
 
 inline float__N__ alpha_l6_3(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -89.0, 312.0), -370.0), 140.0), 15.0), 4.0), -12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -89.0, 312.0), -370.0), 140.0), 15.0), 4.0), -12.0)) * 0.001388888888888889);}
 inline float__N__ beta_l6_3(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 623.0, -2183.0), 2581.0), -955.0), -120.0), -54.0), 108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 623.0, -2183.0), 2581.0), -955.0), -120.0), -54.0), 108.0)) * 0.001388888888888889);}
 inline float__N__ gamma_l6_3(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1869.0, 6546.0), -7722.0), 2850.0), 195.0), 540.0), -540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1869.0, 6546.0), -7722.0), 2850.0), 195.0), 540.0), -540.0)) * 0.001388888888888889);}
 inline float__N__ delta_l6_3(float__N__ y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 3115.0, -10905.0), 12845.0), -4795.0), -980.0), 720.0) / 720.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 3115.0, -10905.0), 12845.0), -4795.0), -980.0), 720.0) * 0.001388888888888889);}
 inline float__N__ eta_l6_3(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3115.0, 10900.0), -12830.0), 4880.0), -195.0), 540.0), 540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3115.0, 10900.0), -12830.0), 4880.0), -195.0), 540.0), 540.0)) * 0.001388888888888889);}
 inline float__N__ zeta_l6_3(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 1869.0, -6537.0), 7695.0), -2985.0), 120.0), -54.0), -108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 1869.0, -6537.0), 7695.0), -2985.0), 120.0), -54.0), -108.0)) * 0.001388888888888889);}
 inline float__N__ theta_l6_3(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -623.0, 2178.0), -2566.0), 1010.0), -15.0), 4.0), 12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -623.0, 2178.0), -2566.0), 1010.0), -15.0), 4.0), 12.0)) * 0.001388888888888889);}
 inline float__N__ iota_l6_3(float__N__ y){
-  return ((y * y * y * y * fma(y, fma(y, fma(y, 89.0, -311.0), 367.0), -145.0)) / 720.0);}
+  return ((y * y * y * y * fma(y, fma(y, fma(y, 89.0, -311.0), 367.0), -145.0)) * 0.001388888888888889);}
 
 
 inline float__N__ alpha_l6_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 290.0, -1305.0), 2231.0), -1718.0), 500.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 290.0, -1305.0), 2231.0), -1718.0), 500.0), -5.0), 15.0), 4.0), -12.0)) * 0.001388888888888889);}
 inline float__N__ beta_l6_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -2030.0, 9135.0), -15617.0), 12027.0), -3509.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -2030.0, 9135.0), -15617.0), 12027.0), -3509.0), 60.0), -120.0), -54.0), 108.0)) * 0.001388888888888889);}
 inline float__N__ gamma_l6_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 6090.0, -27405.0), 46851.0), -36084.0), 10548.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 6090.0, -27405.0), 46851.0), -36084.0), 10548.0), -195.0), 195.0), 540.0), -540.0)) * 0.001388888888888889);}
 inline float__N__ delta_l6_4(float__N__ y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -10150.0, 45675.0), -78085.0), 60145.0), -17605.0), 280.0), -980.0), 720.0) / 720.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -10150.0, 45675.0), -78085.0), 60145.0), -17605.0), 280.0), -980.0), 720.0) * 0.001388888888888889);}
 inline float__N__ eta_l6_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 10150.0, -45675.0), 78085.0), -60150.0), 17620.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 10150.0, -45675.0), 78085.0), -60150.0), 17620.0), -195.0), -195.0), 540.0), 540.0)) * 0.001388888888888889);}
 inline float__N__ zeta_l6_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -6090.0, 27405.0), -46851.0), 36093.0), -10575.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -6090.0, 27405.0), -46851.0), 36093.0), -10575.0), 60.0), 120.0), -54.0), -108.0)) * 0.001388888888888889);}
 inline float__N__ theta_l6_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 2030.0, -9135.0), 15617.0), -12032.0), 3524.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 2030.0, -9135.0), 15617.0), -12032.0), 3524.0), -5.0), -15.0), 4.0), 12.0)) * 0.001388888888888889);}
 inline float__N__ iota_l6_4(float__N__ y){
-  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -290.0, 1305.0), -2231.0), 1719.0), -503.0)) / 720.0);}
+  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -290.0, 1305.0), -2231.0), 1719.0), -503.0)) * 0.001388888888888889);}
 
 
 inline float__N__ alpha_l6_5(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1006.0, 5533.0), -12285.0), 13785.0), -7829.0), 1803.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1006.0, 5533.0), -12285.0), 13785.0), -7829.0), 1803.0), -3.0), -5.0), 15.0), 4.0), -12.0)) * 0.001388888888888889);}
 inline float__N__ beta_l6_5(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 7042.0, -38731.0), 85995.0), -96495.0), 54803.0), -12620.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 7042.0, -38731.0), 85995.0), -96495.0), 54803.0), -12620.0), 12.0), 60.0), -120.0), -54.0), 108.0)) * 0.001388888888888889);}
 inline float__N__ gamma_l6_5(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -21126.0, 116193.0), -257985.0), 289485.0), -164409.0), 37857.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -21126.0, 116193.0), -257985.0), 289485.0), -164409.0), 37857.0), -15.0), -195.0), 195.0), 540.0), -540.0)) * 0.001388888888888889);}
 inline float__N__ delta_l6_5(float__N__ y){
-  return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, 35210.0, -193655.0), 429975.0), -482475.0), 274015.0), -63090.0), 280.0), -980.0), 720.0) / 720.0);}
+  return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, 35210.0, -193655.0), 429975.0), -482475.0), 274015.0), -63090.0), 280.0), -980.0), 720.0) * 0.001388888888888889);}
 inline float__N__ eta_l6_5(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -35210.0, 193655.0), -429975.0), 482475.0), -274015.0), 63085.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -35210.0, 193655.0), -429975.0), 482475.0), -274015.0), 63085.0), 15.0), -195.0), -195.0), 540.0), 540.0)) * 0.001388888888888889);}
 inline float__N__ zeta_l6_5(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 21126.0, -116193.0), 257985.0), -289485.0), 164409.0), -37848.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 21126.0, -116193.0), 257985.0), -289485.0), 164409.0), -37848.0), -12.0), 60.0), 120.0), -54.0), -108.0)) * 0.001388888888888889);}
 inline float__N__ theta_l6_5(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -7042.0, 38731.0), -85995.0), 96495.0), -54803.0), 12615.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -7042.0, 38731.0), -85995.0), 96495.0), -54803.0), 12615.0), 3.0), -5.0), -15.0), 4.0), 12.0)) * 0.001388888888888889);}
 inline float__N__ iota_l6_5(float__N__ y){
-  return ((y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, 1006.0, -5533.0), 12285.0), -13785.0), 7829.0), -1802.0)) / 720.0);}
+  return ((y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, 1006.0, -5533.0), 12285.0), -13785.0), 7829.0), -1802.0)) * 0.001388888888888889);}
 
 
 inline float__N__ alpha_l6_6(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 3604.0, -23426.0), 63866.0), -93577.0), 77815.0), -34869.0), 6587.0), 1.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 3604.0, -23426.0), 63866.0), -93577.0), 77815.0), -34869.0), 6587.0), 1.0), -3.0), -5.0), 15.0), 4.0), -12.0)) * 0.001388888888888889);}
 inline float__N__ beta_l6_6(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -25228.0, 163982.0), -447062.0), 655039.0), -544705.0), 244083.0), -46109.0), -6.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -25228.0, 163982.0), -447062.0), 655039.0), -544705.0), 244083.0), -46109.0), -6.0), 12.0), 60.0), -120.0), -54.0), 108.0)) * 0.001388888888888889);}
 inline float__N__ gamma_l6_6(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 75684.0, -491946.0), 1341186.0), -1965117.0), 1634115.0), -732249.0), 138327.0), 15.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 75684.0, -491946.0), 1341186.0), -1965117.0), 1634115.0), -732249.0), 138327.0), 15.0), -15.0), -195.0), 195.0), 540.0), -540.0)) * 0.001388888888888889);}
 inline float__N__ delta_l6_6(float__N__ y){
-  return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -126140.0, 819910.0), -2235310.0), 3275195.0), -2723525.0), 1220415.0), -230545.0), -20.0), 280.0), -980.0), 720.0) / 720.0);}
+  return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -126140.0, 819910.0), -2235310.0), 3275195.0), -2723525.0), 1220415.0), -230545.0), -20.0), 280.0), -980.0), 720.0) * 0.001388888888888889);}
 inline float__N__ eta_l6_6(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 126140.0, -819910.0), 2235310.0), -3275195.0), 2723525.0), -1220415.0), 230545.0), 15.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 126140.0, -819910.0), 2235310.0), -3275195.0), 2723525.0), -1220415.0), 230545.0), 15.0), 15.0), -195.0), -195.0), 540.0), 540.0)) * 0.001388888888888889);}
 inline float__N__ zeta_l6_6(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -75684.0, 491946.0), -1341186.0), 1965117.0), -1634115.0), 732249.0), -138327.0), -6.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -75684.0, 491946.0), -1341186.0), 1965117.0), -1634115.0), 732249.0), -138327.0), -6.0), -12.0), 60.0), 120.0), -54.0), -108.0)) * 0.001388888888888889);}
 inline float__N__ theta_l6_6(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 25228.0, -163982.0), 447062.0), -655039.0), 544705.0), -244083.0), 46109.0), 1.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 25228.0, -163982.0), 447062.0), -655039.0), 544705.0), -244083.0), 46109.0), 1.0), 3.0), -5.0), -15.0), 4.0), 12.0)) * 0.001388888888888889);}
 inline float__N__ iota_l6_6(float__N__ y){
-  return ((y * y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3604.0, 23426.0), -63866.0), 93577.0), -77815.0), 34869.0), -6587.0)) / 720.0);}
+  return ((y * y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3604.0, 23426.0), -63866.0), 93577.0), -77815.0), 34869.0), -6587.0)) * 0.001388888888888889);}
 
 
 
 inline float__N__ alpha_l8_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3569.0, 16061.0), -27454.0), 21126.0), -6125.0), 49.0), -196.0), -36.0), 144.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3569.0, 16061.0), -27454.0), 21126.0), -6125.0), 49.0), -196.0), -36.0), 144.0)) * 2.48015873015873e-05);}
 inline float__N__ beta_l8_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 32121.0, -144548.0), 247074.0), -190092.0), 55125.0), -672.0), 2016.0), 512.0), -1536.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 32121.0, -144548.0), 247074.0), -190092.0), 55125.0), -672.0), 2016.0), 512.0), -1536.0)) * 2.48015873015873e-05);}
 inline float__N__ gamma_l8_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -128484.0, 578188.0), -988256.0), 760312.0), -221060.0), 4732.0), -9464.0), -4032.0), 8064.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -128484.0, 578188.0), -988256.0), 760312.0), -221060.0), 4732.0), -9464.0), -4032.0), 8064.0)) * 2.48015873015873e-05);}
 inline float__N__ delta_l8_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 299796.0, -1349096.0), 2305856.0), -1774136.0), 517580.0), -13664.0), 13664.0), 32256.0), -32256.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 299796.0, -1349096.0), 2305856.0), -1774136.0), 517580.0), -13664.0), 13664.0), 32256.0), -32256.0)) * 2.48015873015873e-05);}
 inline float__N__ eta_l8_4(float__N__ y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -449694.0, 2023630.0), -3458700.0), 2661540.0), -778806.0), 19110.0), -57400.0), 40320.0) / 40320.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -449694.0, 2023630.0), -3458700.0), 2661540.0), -778806.0), 19110.0), -57400.0), 40320.0) * 2.48015873015873e-05);}
 inline float__N__ zeta_l8_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 449694.0, -2023616.0), 3458644.0), -2662016.0), 780430.0), -13664.0), -13664.0), 32256.0), 32256.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 449694.0, -2023616.0), 3458644.0), -2662016.0), 780430.0), -13664.0), -13664.0), 32256.0), 32256.0)) * 2.48015873015873e-05);}
 inline float__N__ theta_l8_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -299796.0, 1349068.0), -2305744.0), 1775032.0), -520660.0), 4732.0), 9464.0), -4032.0), -8064.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -299796.0, 1349068.0), -2305744.0), 1775032.0), -520660.0), 4732.0), 9464.0), -4032.0), -8064.0)) * 2.48015873015873e-05);}
 inline float__N__ iota_l8_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 128484.0, -578168.0), 988176.0), -760872.0), 223020.0), -672.0), -2016.0), 512.0), 1536.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 128484.0, -578168.0), 988176.0), -760872.0), 223020.0), -672.0), -2016.0), 512.0), 1536.0)) * 2.48015873015873e-05);}
 inline float__N__ kappa_l8_4(float__N__ y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -32121.0, 144541.0), -247046.0), 190246.0), -55685.0), 49.0), 196.0), -36.0), -144.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -32121.0, 144541.0), -247046.0), 190246.0), -55685.0), 49.0), 196.0), -36.0), -144.0)) * 2.48015873015873e-05);}
 inline float__N__ mu_l8_4(float__N__ y){
-  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 3569.0, -16060.0), 27450.0), -21140.0), 6181.0)) / 40320.0);}
-
-
-#endif
+  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 3569.0, -16060.0), 27450.0), -21140.0), 6181.0)) * 2.48015873015873e-05);}
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl
index e023dc5e8..a46f89e31 100644
--- a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl
+++ b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl
@@ -5,197 +5,194 @@
  */
 
 inline float alpha_l2_1(float y){
-  return ((y * (y * (-y + 2.0) - 1.0)) / 2.0);}
+  return ((y * (y * (-y + 2.0) - 1.0)) * 0.5);}
 inline float beta_l2_1(float y){
-  return ((y * y * (3.0 * y - 5.0) + 2.0) / 2.0);}
+  return ((y * y * (3.0 * y - 5.0) + 2.0) * 0.5);}
 inline float gamma_l2_1(float y){
-  return ((y * (y * (-3.0 * y + 4.0) + 1.0)) / 2.0);}
+  return ((y * (y * (-3.0 * y + 4.0) + 1.0)) * 0.5);}
 inline float delta_l2_1(float y){
-  return ((y * y * (y - 1.0)) / 2.0);}
+  return ((y * y * (y - 1.0)) * 0.5);}
 
 
 inline float alpha_l2_2(float y){
-  return ((y * (y * (y * (y * (2.0 * y - 5.0) + 3.0) + 1.0) - 1.0)) / 2.0);}
+  return ((y * (y * (y * (y * (2.0 * y - 5.0) + 3.0) + 1.0) - 1.0)) * 0.5);}
 inline float beta_l2_2(float y){
-  return ((y * y * (y * (y * (-6.0 * y + 15.0) - 9.0) - 2.0) + 2.0) / 2.0);}
+  return ((y * y * (y * (y * (-6.0 * y + 15.0) - 9.0) - 2.0) + 2.0) * 0.5);}
 inline float gamma_l2_2(float y){
-  return ((y * (y * (y * (y * (6.0 * y - 15.0) + 9.0) + 1.0) + 1.0)) / 2.0);}
+  return ((y * (y * (y * (y * (6.0 * y - 15.0) + 9.0) + 1.0) + 1.0)) * 0.5);}
 inline float delta_l2_2(float y){
-  return ((y * y * y * (y * (-2.0 * y + 5.0) - 3.0)) / 2.0);}
+  return ((y * y * y * (y * (-2.0 * y + 5.0) - 3.0)) * 0.5);}
 
 
 inline float alpha_l2_3(float y){
-  return ((y * (y * (y * y * (y * (y * (-6.0 * y + 21.0) - 25.0) + 10.0) + 1.0) - 1.0)) / 2.0);}
+  return ((y * (y * (y * y * (y * (y * (-6.0 * y + 21.0) - 25.0) + 10.0) + 1.0) - 1.0)) * 0.5);}
 inline float beta_l2_3(float y){
-  return ((y * y * (y * y * (y * (y * (18.0 * y - 63.0) + 75.0) - 30.0) - 2.0) + 2.0) / 2.0);}
+  return ((y * y * (y * y * (y * (y * (18.0 * y - 63.0) + 75.0) - 30.0) - 2.0) + 2.0) * 0.5);}
 inline float gamma_l2_3(float y){
-  return ((y * (y * (y * y * (y * (y * (-18.0 * y + 63.0) - 75.0) + 30.0) + 1.0) + 1.0)) / 2.0);}
+  return ((y * (y * (y * y * (y * (y * (-18.0 * y + 63.0) - 75.0) + 30.0) + 1.0) + 1.0)) * 0.5);}
 inline float delta_l2_3(float y){
-  return ((y * y * y * y * (y * (y * (6.0 * y - 21.0) + 25.0) - 10.0)) / 2.0);}
+  return ((y * y * y * y * (y * (y * (6.0 * y - 21.0) + 25.0) - 10.0)) * 0.5);}
 
 
 inline float alpha_l2_4(float y){
-  return ((y * (y * (y * y * y * (y * (y * (y * (20.0 * y - 90.0) + 154.0) - 119.0) + 35.0) + 1.0) - 1.0)) / 2.0);}
+  return ((y * (y * (y * y * y * (y * (y * (y * (20.0 * y - 90.0) + 154.0) - 119.0) + 35.0) + 1.0) - 1.0)) * 0.5);}
 inline float beta_l2_4(float y){
-  return ((y * y * (y * y * y * (y * (y * (y * (-60.0 * y + 270.0) - 462.0) + 357.0) - 105.0) - 2.0) + 2.0) / 2.0);}
+  return ((y * y * (y * y * y * (y * (y * (y * (-60.0 * y + 270.0) - 462.0) + 357.0) - 105.0) - 2.0) + 2.0) * 0.5);}
 inline float gamma_l2_4(float y){
-  return ((y * (y * (y * y * y * (y * (y * (y * (60.0 * y - 270.0) + 462.0) - 357.0) + 105.0) + 1.0) + 1.0)) / 2.0);}
+  return ((y * (y * (y * y * y * (y * (y * (y * (60.0 * y - 270.0) + 462.0) - 357.0) + 105.0) + 1.0) + 1.0)) * 0.5);}
 inline float delta_l2_4(float y){
-  return ((y * y * y * y * y * (y * (y * (y * (-20.0 * y + 90.0) - 154.0) + 119.0) - 35.0)) / 2.0);}
+  return ((y * y * y * y * y * (y * (y * (y * (-20.0 * y + 90.0) - 154.0) + 119.0) - 35.0)) * 0.5);}
 
 
 inline float alpha_l4_2(float y){
-  return ((y * (y * (y * (y * (-5.0 * y + 13.0) - 9.0) - 1.0) + 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (-5.0 * y + 13.0) - 9.0) - 1.0) + 2.0)) * 0.041666666666666664);}
 inline float beta_l4_2(float y){
-  return ((y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0)) * 0.041666666666666664);}
 inline float gamma_l4_2(float y){
-  return ((y * y * (y * (y * (-50.0 * y + 126.0) - 70.0) - 30.0) + 24.0) / 24.0);}
+  return ((y * y * (y * (y * (-50.0 * y + 126.0) - 70.0) - 30.0) + 24.0) * 0.041666666666666664);}
 inline float delta_l4_2(float y){
-  return ((y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0)) * 0.041666666666666664);}
 inline float eta_l4_2(float y){
-  return ((y * (y * (y * (y * (-25.0 * y + 61.0) - 33.0) - 1.0) - 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (-25.0 * y + 61.0) - 33.0) - 1.0) - 2.0)) * 0.041666666666666664);}
 inline float zeta_l4_2(float y){
-  return ((y * y * y * (y * (5.0 * y - 12.0) + 7.0)) / 24.0);}
+  return ((y * y * y * (y * (5.0 * y - 12.0) + 7.0)) * 0.041666666666666664);}
 
 
 inline float alpha_l4_3(float y){
-  return ((y * (y * (y * (y * (y * (y * (14.0 * y - 49.0) + 58.0) - 22.0) - 2.0) - 1.0) + 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (14.0 * y - 49.0) + 58.0) - 22.0) - 2.0) - 1.0) + 2.0)) * 0.041666666666666664);}
 inline float beta_l4_3(float y){
-  return ((y * (y * (y * (y * (y * (y * (-70.0 * y + 245.0) - 290.0) + 111.0) + 4.0) + 16.0) - 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (-70.0 * y + 245.0) - 290.0) + 111.0) + 4.0) + 16.0) - 16.0)) * 0.041666666666666664);}
 inline float gamma_l4_3(float y){
-  return ((y * y * (y * y * (y * (y * (140.0 * y - 490.0) + 580.0) - 224.0) - 30.0) + 24.0) / 24.0);}
+  return ((y * y * (y * y * (y * (y * (140.0 * y - 490.0) + 580.0) - 224.0) - 30.0) + 24.0) * 0.041666666666666664);}
 inline float delta_l4_3(float y){
-  return ((y * (y * (y * (y * (y * (y * (-140.0 * y + 490.0) - 580.0) + 226.0) - 4.0) + 16.0) + 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (-140.0 * y + 490.0) - 580.0) + 226.0) - 4.0) + 16.0) + 16.0)) * 0.041666666666666664);}
 inline float eta_l4_3(float y){
-  return ((y * (y * (y * (y * (y * (y * (70.0 * y - 245.0) + 290.0) - 114.0) + 2.0) - 1.0) - 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (70.0 * y - 245.0) + 290.0) - 114.0) + 2.0) - 1.0) - 2.0)) * 0.041666666666666664);}
 inline float zeta_l4_3(float y){
-  return ((y * y * y * y * (y * (y * (-14.0 * y + 49.0) - 58.0) + 23.0)) / 24.0);}
+  return ((y * y * y * y * (y * (y * (-14.0 * y + 49.0) - 58.0) + 23.0)) * 0.041666666666666664);}
 
 
 inline float alpha_l4_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-46.0 * y + 207.0) - 354.0) + 273.0) - 80.0) + 1.0) - 2.0) - 1.0) + 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-46.0 * y + 207.0) - 354.0) + 273.0) - 80.0) + 1.0) - 2.0) - 1.0) + 2.0)) * 0.041666666666666664);}
 inline float beta_l4_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (230.0 * y - 1035.0) + 1770.0) - 1365.0) + 400.0) - 4.0) + 4.0) + 16.0) - 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (230.0 * y - 1035.0) + 1770.0) - 1365.0) + 400.0) - 4.0) + 4.0) + 16.0) - 16.0)) * 0.041666666666666664);}
 inline float gamma_l4_4(float y){
-  return ((y * y * (y * y * (y * (y * (y * (y * (-460.0 * y + 2070.0) - 3540.0) + 2730.0) - 800.0) + 6.0) - 30.0) + 24.0) / 24.0);}
+  return ((y * y * (y * y * (y * (y * (y * (y * (-460.0 * y + 2070.0) - 3540.0) + 2730.0) - 800.0) + 6.0) - 30.0) + 24.0) * 0.041666666666666664);}
 inline float delta_l4_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (460.0 * y - 2070.0) + 3540.0) - 2730.0) + 800.0) - 4.0) - 4.0) + 16.0) + 16.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (460.0 * y - 2070.0) + 3540.0) - 2730.0) + 800.0) - 4.0) - 4.0) + 16.0) + 16.0)) * 0.041666666666666664);}
 inline float eta_l4_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-230.0 * y + 1035.0) - 1770.0) + 1365.0) - 400.0) + 1.0) + 2.0) - 1.0) - 2.0)) / 24.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-230.0 * y + 1035.0) - 1770.0) + 1365.0) - 400.0) + 1.0) + 2.0) - 1.0) - 2.0)) * 0.041666666666666664);}
 inline float zeta_l4_4(float y){
-  return ((y * y * y * y * y * (y * (y * (y * (46.0 * y - 207.0) + 354.0) - 273.0) + 80.0)) / 24.0);}
+  return ((y * y * y * y * y * (y * (y * (y * (46.0 * y - 207.0) + 354.0) - 273.0) + 80.0)) * 0.041666666666666664);}
 
 
 inline float alpha_M8p(float y){
-  return ((y*(y*(y*(y*(y*(y*(-10.0*y + 21.0) + 28.0) - 105.0) + 70.0) + 35.0) - 56.0) + 17.0) / 3360.0);}
+  return ((y*(y*(y*(y*(y*(y*(-10.0*y + 21.0) + 28.0) - 105.0) + 70.0) + 35.0) - 56.0) + 17.0) * 0.00029761904761904765);}
 inline float beta_M8p(float y){
-  return ((y*(y*(y*(y*(y*(y*(70.0*y - 175.0) - 140.0) + 770.0) - 560.0) - 350.0) + 504.0) - 102.0) / 3360.0);}
+  return ((y*(y*(y*(y*(y*(y*(70.0*y - 175.0) - 140.0) + 770.0) - 560.0) - 350.0) + 504.0) - 102.0) * 0.00029761904761904765);}
 inline float gamma_M8p(float y){
-  return ((y*(y*(y*(y*(y*(y*(-210.0*y + 609.0) + 224.0) - 2135.0) + 910.0) + 2765.0) - 2520.0) + 255.0) / 3360.0);}
+  return ((y*(y*(y*(y*(y*(y*(-210.0*y + 609.0) + 224.0) - 2135.0) + 910.0) + 2765.0) - 2520.0) + 255.0) * 0.00029761904761904765);}
 inline float delta_M8p(float y){
-  return ((y*y* (y*y* (y*y* (70.0*y - 231.0) + 588.0) - 980.0) + 604.0) / 672.0);}
+  return ((y*y* (y*y* (y*y* (70.0*y - 231.0) + 588.0) - 980.0) + 604.0) * 0.001488095238095238);}
 inline float eta_M8p(float y){
-  return ((y*(y*(y*(y*(y*(y*(-70.0*y+ 259.0) - 84.0) - 427.0) - 182.0)+ 553.0) + 504.0)+ 51.0) / 672.0);}
+  return ((y*(y*(y*(y*(y*(y*(-70.0*y+ 259.0) - 84.0) - 427.0) - 182.0)+ 553.0) + 504.0)+ 51.0) * 0.001488095238095238);}
 inline float zeta_M8p(float y){
-  return ((y*(y*(y*(y*(y*(y*(210.0*y- 861.0) + 532.0) + 770.0) + 560.0) - 350.0) - 504.0) - 102.0) / 3360.0);}
+  return ((y*(y*(y*(y*(y*(y*(210.0*y- 861.0) + 532.0) + 770.0) + 560.0) - 350.0) - 504.0) - 102.0) * 0.00029761904761904765);}
 inline float theta_M8p(float y){
-  return ((y* (y* (y* (y* (y* (y* (-70.0* y+ 315.0) -280.0) -105.0) -70.0) +35.0)+ 56.0) +17.0) / 3360.0);}
+  return ((y* (y* (y* (y* (y* (y* (-70.0* y+ 315.0) -280.0) -105.0) -70.0) +35.0)+ 56.0) +17.0) * 0.00029761904761904765);}
 inline float iota_M8p(float y){
-  return ((y * y * y * y * y * (y * (10.0 * y - 49.0) + 56.0)) / 3360.0);}
+  return ((y * y * y * y * y * (y * (10.0 * y - 49.0) + 56.0)) * 0.00029761904761904765);}
 
 
 inline float alpha_l6_3(float y){
-  return ((y * (y * (y * (y * (y * (y * (-89.0 * y + 312.0) - 370.0) + 140.0) + 15.0) + 4.0) - 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (-89.0 * y + 312.0) - 370.0) + 140.0) + 15.0) + 4.0) - 12.0)) * 0.001388888888888889);}
 inline float beta_l6_3(float y){
-  return ((y * (y * (y * (y * (y * (y * (623.0 * y - 2183.0) + 2581.0) - 955.0) - 120.0) - 54.0) + 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (623.0 * y - 2183.0) + 2581.0) - 955.0) - 120.0) - 54.0) + 108.0)) * 0.001388888888888889);}
 inline float gamma_l6_3(float y){
-  return ((y * (y * (y * (y * (y * (y * (-1869.0 * y + 6546.0) - 7722.0) + 2850.0) + 195.0) + 540.0) - 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (-1869.0 * y + 6546.0) - 7722.0) + 2850.0) + 195.0) + 540.0) - 540.0)) * 0.001388888888888889);}
 inline float delta_l6_3(float y){
-  return ((y * y * (y * y * (y * (y * (3115.0 * y - 10905.0) + 12845.0) - 4795.0) - 980.0) + 720.0) / 720.0);}
+  return ((y * y * (y * y * (y * (y * (3115.0 * y - 10905.0) + 12845.0) - 4795.0) - 980.0) + 720.0) * 0.001388888888888889);}
 inline float eta_l6_3(float y){
-  return ((y * (y * (y * (y * (y * (y * (-3115.0 * y + 10900.0) - 12830.0) + 4880.0) - 195.0) + 540.0) + 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (-3115.0 * y + 10900.0) - 12830.0) + 4880.0) - 195.0) + 540.0) + 540.0)) * 0.001388888888888889);}
 inline float zeta_l6_3(float y){
-  return ((y * (y * (y * (y * (y * (y * (1869.0 * y - 6537.0) + 7695.0) - 2985.0) + 120.0) - 54.0) - 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (1869.0 * y - 6537.0) + 7695.0) - 2985.0) + 120.0) - 54.0) - 108.0)) * 0.001388888888888889);}
 inline float theta_l6_3(float y){
-  return ((y * (y * (y * (y * (y * (y * (-623.0 * y + 2178.0) - 2566.0) + 1010.0) - 15.0) + 4.0) + 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (-623.0 * y + 2178.0) - 2566.0) + 1010.0) - 15.0) + 4.0) + 12.0)) * 0.001388888888888889);}
 inline float iota_l6_3(float y){
-  return ((y * y * y * y * (y * (y * (89.0 * y - 311.0) + 367.0) - 145.0)) / 720.0);}
+  return ((y * y * y * y * (y * (y * (89.0 * y - 311.0) + 367.0) - 145.0)) * 0.001388888888888889);}
 
 
 inline float alpha_l6_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (290.0 * y - 1305.0) + 2231.0) - 1718.0) + 500.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (290.0 * y - 1305.0) + 2231.0) - 1718.0) + 500.0) - 5.0) + 15.0) + 4.0) - 12.0)) * 0.001388888888888889);}
 inline float beta_l6_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-2030.0 * y + 9135.0) - 15617.0) + 12027.0) - 3509.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-2030.0 * y + 9135.0) - 15617.0) + 12027.0) - 3509.0) + 60.0) - 120.0) - 54.0) + 108.0)) * 0.001388888888888889);}
 inline float gamma_l6_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (6090.0 * y - 27405.0) + 46851.0) - 36084.0) + 10548.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (6090.0 * y - 27405.0) + 46851.0) - 36084.0) + 10548.0) - 195.0) + 195.0) + 540.0) - 540.0)) * 0.001388888888888889);}
 inline float delta_l6_4(float y){
-  return ((y * y * (y * y * (y * (y * (y * (y * (-10150.0 * y + 45675.0) - 78085.0) + 60145.0) - 17605.0) + 280.0) - 980.0) + 720.0) / 720.0);}
+  return ((y * y * (y * y * (y * (y * (y * (y * (-10150.0 * y + 45675.0) - 78085.0) + 60145.0) - 17605.0) + 280.0) - 980.0) + 720.0) * 0.001388888888888889);}
 inline float eta_l6_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (10150.0 * y - 45675.0) + 78085.0) - 60150.0) + 17620.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (10150.0 * y - 45675.0) + 78085.0) - 60150.0) + 17620.0) - 195.0) - 195.0) + 540.0) + 540.0)) * 0.001388888888888889);}
 inline float zeta_l6_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-6090.0 * y + 27405.0) - 46851.0) + 36093.0) - 10575.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-6090.0 * y + 27405.0) - 46851.0) + 36093.0) - 10575.0) + 60.0) + 120.0) - 54.0) - 108.0)) * 0.001388888888888889);}
 inline float theta_l6_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (2030.0 * y - 9135.0) + 15617.0) - 12032.0) + 3524.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (2030.0 * y - 9135.0) + 15617.0) - 12032.0) + 3524.0) - 5.0) - 15.0) + 4.0) + 12.0)) * 0.001388888888888889);}
 inline float iota_l6_4(float y){
-  return ((y * y * y * y * y * (y * (y * (y * (-290.0 * y + 1305.0) - 2231.0) + 1719.0) - 503.0)) / 720.0);}
+  return ((y * y * y * y * y * (y * (y * (y * (-290.0 * y + 1305.0) - 2231.0) + 1719.0) - 503.0)) * 0.001388888888888889);}
 
 
 inline float alpha_l6_5(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-1006.0 * y + 5533.0) - 12285.0) + 13785.0) - 7829.0) + 1803.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-1006.0 * y + 5533.0) - 12285.0) + 13785.0) - 7829.0) + 1803.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) * 0.001388888888888889);}
 inline float beta_l6_5(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (7042.0 * y - 38731.0) + 85995.0) - 96495.0) + 54803.0) - 12620.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (7042.0 * y - 38731.0) + 85995.0) - 96495.0) + 54803.0) - 12620.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) * 0.001388888888888889);}
 inline float gamma_l6_5(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-21126.0 * y + 116193.0) - 257985.0) + 289485.0) - 164409.0) + 37857.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-21126.0 * y + 116193.0) - 257985.0) + 289485.0) - 164409.0) + 37857.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) * 0.001388888888888889);}
 inline float delta_l6_5(float y){
-  return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (35210.0 * y - 193655.0) + 429975.0) - 482475.0) + 274015.0) - 63090.0) + 280.0) - 980.0) + 720.0) / 720.0);}
+  return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (35210.0 * y - 193655.0) + 429975.0) - 482475.0) + 274015.0) - 63090.0) + 280.0) - 980.0) + 720.0) * 0.001388888888888889);}
 inline float eta_l6_5(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-35210.0 * y + 193655.0) - 429975.0) + 482475.0) - 274015.0) + 63085.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-35210.0 * y + 193655.0) - 429975.0) + 482475.0) - 274015.0) + 63085.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) * 0.001388888888888889);}
 inline float zeta_l6_5(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (21126.0 * y - 116193.0) + 257985.0) - 289485.0) + 164409.0) - 37848.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (21126.0 * y - 116193.0) + 257985.0) - 289485.0) + 164409.0) - 37848.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) * 0.001388888888888889);}
 inline float theta_l6_5(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-7042.0 * y + 38731.0) - 85995.0) + 96495.0) - 54803.0) + 12615.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-7042.0 * y + 38731.0) - 85995.0) + 96495.0) - 54803.0) + 12615.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) * 0.001388888888888889);}
 inline float iota_l6_5(float y){
-  return ((y * y * y * y * y * y * (y * (y * (y * (y * (1006.0 * y - 5533.0) + 12285.0) - 13785.0) + 7829.0) - 1802.0)) / 720.0);}
+  return ((y * y * y * y * y * y * (y * (y * (y * (y * (1006.0 * y - 5533.0) + 12285.0) - 13785.0) + 7829.0) - 1802.0)) * 0.001388888888888889);}
 
 
 inline float alpha_l6_6(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (3604.0 * y - 23426.0) + 63866.0) - 93577.0) + 77815.0) - 34869.0) + 6587.0) + 1.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (3604.0 * y - 23426.0) + 63866.0) - 93577.0) + 77815.0) - 34869.0) + 6587.0) + 1.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) * 0.001388888888888889);}
 inline float beta_l6_6(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-25228.0 * y + 163982.0) - 447062.0) + 655039.0) - 544705.0) + 244083.0) - 46109.0) - 6.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-25228.0 * y + 163982.0) - 447062.0) + 655039.0) - 544705.0) + 244083.0) - 46109.0) - 6.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) * 0.001388888888888889);}
 inline float gamma_l6_6(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (75684.0 * y - 491946.0) + 1341186.0) - 1965117.0) + 1634115.0) - 732249.0) + 138327.0) + 15.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (75684.0 * y - 491946.0) + 1341186.0) - 1965117.0) + 1634115.0) - 732249.0) + 138327.0) + 15.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) * 0.001388888888888889);}
 inline float delta_l6_6(float y){
-  return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (y * (y * (-126140.0 * y + 819910.0) - 2235310.0) + 3275195.0) - 2723525.0) + 1220415.0) - 230545.0) - 20.0) + 280.0) - 980.0) + 720.0) / 720.0);}
+  return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (y * (y * (-126140.0 * y + 819910.0) - 2235310.0) + 3275195.0) - 2723525.0) + 1220415.0) - 230545.0) - 20.0) + 280.0) - 980.0) + 720.0) * 0.001388888888888889);}
 inline float eta_l6_6(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (126140.0 * y - 819910.0) + 2235310.0) - 3275195.0) + 2723525.0) - 1220415.0) + 230545.0) + 15.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (126140.0 * y - 819910.0) + 2235310.0) - 3275195.0) + 2723525.0) - 1220415.0) + 230545.0) + 15.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) * 0.001388888888888889);}
 inline float zeta_l6_6(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-75684.0 * y + 491946.0) - 1341186.0) + 1965117.0) - 1634115.0) + 732249.0) - 138327.0) - 6.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-75684.0 * y + 491946.0) - 1341186.0) + 1965117.0) - 1634115.0) + 732249.0) - 138327.0) - 6.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) * 0.001388888888888889);}
 inline float theta_l6_6(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (25228.0 * y - 163982.0) + 447062.0) - 655039.0) + 544705.0) - 244083.0) + 46109.0) + 1.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (25228.0 * y - 163982.0) + 447062.0) - 655039.0) + 544705.0) - 244083.0) + 46109.0) + 1.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) * 0.001388888888888889);}
 inline float iota_l6_6(float y){
-  return ((y * y * y * y * y * y * y * (y * (y * (y * (y * (y * (-3604.0 * y + 23426.0) - 63866.0) + 93577.0) - 77815.0) + 34869.0) - 6587.0)) / 720.0);}
+  return ((y * y * y * y * y * y * y * (y * (y * (y * (y * (y * (-3604.0 * y + 23426.0) - 63866.0) + 93577.0) - 77815.0) + 34869.0) - 6587.0)) * 0.001388888888888889);}
 
 
 inline float alpha_l8_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-3569.0 * y + 16061.0) - 27454.0) + 21126.0) - 6125.0) + 49.0) - 196.0) - 36.0) + 144.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-3569.0 * y + 16061.0) - 27454.0) + 21126.0) - 6125.0) + 49.0) - 196.0) - 36.0) + 144.0)) * 2.48015873015873e-05);}
 inline float beta_l8_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (32121.0 * y - 144548.0) + 247074.0) - 190092.0) + 55125.0) - 672.0) + 2016.0) + 512.0) - 1536.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (32121.0 * y - 144548.0) + 247074.0) - 190092.0) + 55125.0) - 672.0) + 2016.0) + 512.0) - 1536.0)) * 2.48015873015873e-05);}
 inline float gamma_l8_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-128484.0 * y + 578188.0) - 988256.0) + 760312.0) - 221060.0) + 4732.0) - 9464.0) - 4032.0) + 8064.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-128484.0 * y + 578188.0) - 988256.0) + 760312.0) - 221060.0) + 4732.0) - 9464.0) - 4032.0) + 8064.0)) * 2.48015873015873e-05);}
 inline float delta_l8_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (299796.0 * y - 1349096.0) + 2305856.0) - 1774136.0) + 517580.0) - 13664.0) + 13664.0) + 32256.0) - 32256.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (299796.0 * y - 1349096.0) + 2305856.0) - 1774136.0) + 517580.0) - 13664.0) + 13664.0) + 32256.0) - 32256.0)) * 2.48015873015873e-05);}
 inline float eta_l8_4(float y){
-  return ((y * y * (y * y * (y * (y * (y * (y * (-449694.0 * y + 2023630.0) - 3458700.0) + 2661540.0) - 778806.0) + 19110.0) - 57400.0) + 40320.0) / 40320.0);}
+  return ((y * y * (y * y * (y * (y * (y * (y * (-449694.0 * y + 2023630.0) - 3458700.0) + 2661540.0) - 778806.0) + 19110.0) - 57400.0) + 40320.0) * 2.48015873015873e-05);}
 inline float zeta_l8_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (449694.0 * y - 2023616.0) + 3458644.0) - 2662016.0) + 780430.0) - 13664.0) - 13664.0) + 32256.0) + 32256.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (449694.0 * y - 2023616.0) + 3458644.0) - 2662016.0) + 780430.0) - 13664.0) - 13664.0) + 32256.0) + 32256.0)) * 2.48015873015873e-05);}
 inline float theta_l8_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-299796.0 * y + 1349068.0) - 2305744.0) + 1775032.0) - 520660.0) + 4732.0) + 9464.0) - 4032.0) - 8064.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-299796.0 * y + 1349068.0) - 2305744.0) + 1775032.0) - 520660.0) + 4732.0) + 9464.0) - 4032.0) - 8064.0)) * 2.48015873015873e-05);}
 inline float iota_l8_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (128484.0 * y - 578168.0) + 988176.0) - 760872.0) + 223020.0) - 672.0) - 2016.0) + 512.0) + 1536.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (128484.0 * y - 578168.0) + 988176.0) - 760872.0) + 223020.0) - 672.0) - 2016.0) + 512.0) + 1536.0)) * 2.48015873015873e-05);}
 inline float kappa_l8_4(float y){
-  return ((y * (y * (y * (y * (y * (y * (y * (y * (-32121.0 * y + 144541.0) - 247046.0) + 190246.0) - 55685.0) + 49.0) + 196.0) - 36.0) - 144.0)) / 40320.0);}
+  return ((y * (y * (y * (y * (y * (y * (y * (y * (-32121.0 * y + 144541.0) - 247046.0) + 190246.0) - 55685.0) + 49.0) + 196.0) - 36.0) - 144.0)) * 2.48015873015873e-05);}
 inline float mu_l8_4(float y){
-  return ((y * y * y * y * y * (y * (y * (y * (3569.0 * y - 16060.0) + 27450.0) - 21140.0) + 6181.0)) / 40320.0);}
-
-
-#endif
+  return ((y * y * y * y * y * (y * (y * (y * (3569.0 * y - 16060.0) + 27450.0) - 21140.0) + 6181.0)) * 2.48015873015873e-05);}
diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl
index e1dc7d29e..4c0e12480 100644
--- a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl
+++ b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl
@@ -5,195 +5,194 @@
  */
 
 inline float alpha_l2_1(float y){
-  return (y*fma(y,fma(y,-1.0, 2.0), - 1.0)/2.0);}
+  return (y*fma(y,fma(y,-1.0, 2.0), - 1.0) * 0.5);}
 inline float beta_l2_1(float y){
-  return (fma(y*y, fma(y, 3.0, -5.0), 2.0) / 2.0);}
+  return (fma(y*y, fma(y, 3.0, -5.0), 2.0) * 0.5);}
 inline float gamma_l2_1(float   y){
-  return ((y * fma(y , fma(-3.0, y, 4.0), 1.0)) / 2.0);}
+  return ((y * fma(y , fma(-3.0, y, 4.0), 1.0)) * 0.5);}
 inline float delta_l2_1(float y){
-  return ((y * y * fma(1.0, y, - 1.0)) / 2.0);}
+  return ((y * y * fma(1.0, y, - 1.0)) * 0.5);}
 
 
 inline float alpha_l2_2(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, 2.0, -5.0), 3.0), 1.0), -1.0)) / 2.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, 2.0, -5.0), 3.0), 1.0), -1.0)) * 0.5);}
 inline float beta_l2_2(float y){
-  return (fma(y * y, fma(y, fma(y, fma(y, -6.0, 15.0), -9.0), -2.0), 2.0) / 2.0);}
+  return (fma(y * y, fma(y, fma(y, fma(y, -6.0, 15.0), -9.0), -2.0), 2.0) * 0.5);}
 inline float gamma_l2_2(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, 6.0, -15.0), 9.0), 1.0), 1.0)) / 2.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, 6.0, -15.0), 9.0), 1.0), 1.0)) * 0.5);}
 inline float delta_l2_2(float y){
-  return ((y * y * y * fma(y, fma(y, -2.0, 5.0), -3.0)) / 2.0);}
+  return ((y * y * y * fma(y, fma(y, -2.0, 5.0), -3.0)) * 0.5);}
 
 
 inline float alpha_l2_3(float y){
-  return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -6.0, 21.0), -25.0), 10.0), 1.0), -1.0)) / 2.0);}
+  return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -6.0, 21.0), -25.0), 10.0), 1.0), -1.0)) * 0.5);}
 inline float beta_l2_3(float y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 18.0, -63.0), 75.0), -30.0), -2.0), 2.0) / 2.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 18.0, -63.0), 75.0), -30.0), -2.0), 2.0) * 0.5);}
 inline float gamma_l2_3(float y){
-  return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -18.0, 63.0), -75.0), 30.0), 1.0), 1.0)) / 2.0);}
+  return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -18.0, 63.0), -75.0), 30.0), 1.0), 1.0)) * 0.5);}
 inline float delta_l2_3(float y){
-  return ((y * y * y * y * fma(y, fma(y, fma(y, 6.0, -21.0), 25.0), -10.0)) / 2.0);}
+  return ((y * y * y * y * fma(y, fma(y, fma(y, 6.0, -21.0), 25.0), -10.0)) * 0.5);}
 
 
 inline float alpha_l2_4(float y){
-  return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 20.0, -90.0), 154.0), -119.0), 35.0), 1.0), -1.0)) / 2.0);}
+  return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 20.0, -90.0), 154.0), -119.0), 35.0), 1.0), -1.0)) * 0.5);}
 inline float beta_l2_4(float y){
-  return (fma(y * y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, -60.0, 270.0), -462.0), 357.0), -105.0), -2.0), 2.0) / 2.0);}
+  return (fma(y * y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, -60.0, 270.0), -462.0), 357.0), -105.0), -2.0), 2.0) * 0.5);}
 inline float gamma_l2_4(float y){
-  return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 60.0, -270.0), 462.0), -357.0), 105.0), 1.0), 1.0)) / 2.0);}
+  return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 60.0, -270.0), 462.0), -357.0), 105.0), 1.0), 1.0)) * 0.5);}
 inline float delta_l2_4(float y){
-  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -20.0, 90.0), -154.0), 119.0), -35.0)) / 2.0);}
+  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -20.0, 90.0), -154.0), 119.0), -35.0)) * 0.5);}
 
 
 inline float alpha_l4_2(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, -5.0, 13.0), -9.0), -1.0), 2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, -5.0, 13.0), -9.0), -1.0), 2.0)) * 0.041666666666666664);}
 inline float beta_l4_2(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, 25.0, -64.0), 39.0), 16.0), -16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, 25.0, -64.0), 39.0), 16.0), -16.0)) * 0.041666666666666664);}
 inline float gamma_l4_2(float y){
-  return (fma(y * y, fma(y, fma(y, fma(y, -50.0, 126.0), -70.0), -30.0), 24.0) / 24.0);}
+  return (fma(y * y, fma(y, fma(y, fma(y, -50.0, 126.0), -70.0), -30.0), 24.0) * 0.041666666666666664);}
 inline float delta_l4_2(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, 50.0, -124.0), 66.0), 16.0), 16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, 50.0, -124.0), 66.0), 16.0), 16.0)) * 0.041666666666666664);}
 inline float eta_l4_2(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, -25.0, 61.0), -33.0), -1.0), -2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, -25.0, 61.0), -33.0), -1.0), -2.0)) * 0.041666666666666664);}
 inline float zeta_l4_2(float y){
-  return ((y * y * y * fma(y, fma(y, 5.0, -12.0), 7.0)) / 24.0);}
+  return ((y * y * y * fma(y, fma(y, 5.0, -12.0), 7.0)) * 0.041666666666666664);}
 
 
 inline float alpha_l4_3(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 14.0, -49.0), 58.0), -22.0), -2.0), -1.0), 2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 14.0, -49.0), 58.0), -22.0), -2.0), -1.0), 2.0)) * 0.041666666666666664);}
 inline float beta_l4_3(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -70.0, 245.0), -290.0), 111.0), 4.0), 16.0), -16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -70.0, 245.0), -290.0), 111.0), 4.0), 16.0), -16.0)) * 0.041666666666666664);}
 inline float gamma_l4_3(float y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 140.0, -490.0), 580.0), -224.0), -30.0), 24.0) / 24.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 140.0, -490.0), 580.0), -224.0), -30.0), 24.0) * 0.041666666666666664);}
 inline float delta_l4_3(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -140.0, 490.0), -580.0), 226.0), -4.0), 16.0), 16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -140.0, 490.0), -580.0), 226.0), -4.0), 16.0), 16.0)) * 0.041666666666666664);}
 inline float eta_l4_3(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 70.0, -245.0), 290.0), -114.0), 2.0), -1.0), -2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 70.0, -245.0), 290.0), -114.0), 2.0), -1.0), -2.0)) * 0.041666666666666664);}
 inline float zeta_l4_3(float y){
-  return ((y * y * y * y * fma(y, fma(y, fma(y, -14.0, 49.0), -58.0), 23.0)) / 24.0);}
+  return ((y * y * y * y * fma(y, fma(y, fma(y, -14.0, 49.0), -58.0), 23.0)) * 0.041666666666666664);}
 
 
 inline float alpha_l4_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -46.0, 207.0), -354.0), 273.0), -80.0), 1.0), -2.0), -1.0), 2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -46.0, 207.0), -354.0), 273.0), -80.0), 1.0), -2.0), -1.0), 2.0)) * 0.041666666666666664);}
 inline float beta_l4_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 230.0, -1035.0), 1770.0), -1365.0), 400.0), -4.0), 4.0), 16.0), -16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 230.0, -1035.0), 1770.0), -1365.0), 400.0), -4.0), 4.0), 16.0), -16.0)) * 0.041666666666666664);}
 inline float gamma_l4_4(float y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -460.0, 2070.0), -3540.0), 2730.0), -800.0), 6.0), -30.0), 24.0) / 24.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -460.0, 2070.0), -3540.0), 2730.0), -800.0), 6.0), -30.0), 24.0) * 0.041666666666666664);}
 inline float delta_l4_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 460.0, -2070.0), 3540.0), -2730.0), 800.0), -4.0), -4.0), 16.0), 16.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 460.0, -2070.0), 3540.0), -2730.0), 800.0), -4.0), -4.0), 16.0), 16.0)) * 0.041666666666666664);}
 inline float eta_l4_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -230.0, 1035.0), -1770.0), 1365.0), -400.0), 1.0), 2.0), -1.0), -2.0)) / 24.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -230.0, 1035.0), -1770.0), 1365.0), -400.0), 1.0), 2.0), -1.0), -2.0)) * 0.041666666666666664);}
 inline float zeta_l4_4(float y){
-  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 46.0, -207.0), 354.0), -273.0), 80.0)) / 24.0);}
+  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 46.0, -207.0), 354.0), -273.0), 80.0)) * 0.041666666666666664);}
 
 
 inline float alpha_M8p(float y){
-  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-10.0,y, + 21.0), + 28.0), - 105.0), + 70.0), + 35.0), - 56.0), + 17.0) / 3360.0);}
+  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-10.0,y, + 21.0), + 28.0), - 105.0), + 70.0), + 35.0), - 56.0), + 17.0) * 0.00029761904761904765);}
 inline float beta_M8p(float y){
-  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(70.0,y, - 175.0), - 140.0), + 770.0), - 560.0), - 350.0), + 504.0), - 102.0) / 3360.0);}
+  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(70.0,y, - 175.0), - 140.0), + 770.0), - 560.0), - 350.0), + 504.0), - 102.0) * 0.00029761904761904765);}
 inline float gamma_M8p(float y){
-  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-210.0,y, + 609.0), + 224.0), - 2135.0), + 910.0), + 2765.0), - 2520.0), + 255.0) / 3360.0);}
+  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-210.0,y, + 609.0), + 224.0), - 2135.0), + 910.0), + 2765.0), - 2520.0), + 255.0) * 0.00029761904761904765);}
 inline float delta_M8p(float y){
-  return (fma(y*y, fma(y*y, fma(y*y, fma(70.0,y, - 231.0), + 588.0), - 980.0), + 604.0) / 672.0);}
+  return (fma(y*y, fma(y*y, fma(y*y, fma(70.0,y, - 231.0), + 588.0), - 980.0), + 604.0) * 0.001488095238095238);}
 inline float eta_M8p(float y){
-  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-70.0,y, 259.0), - 84.0), - 427.0), - 182.0), + 553.0), + 504.0), + 51.0) / 672.0);}
+  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-70.0,y, 259.0), - 84.0), - 427.0), - 182.0), + 553.0), + 504.0), + 51.0) * 0.001488095238095238);}
 inline float zeta_M8p(float y){
-  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(210.0,y,- 861.0), + 532.0), + 770.0), + 560.0), - 350.0), - 504.0), - 102.0) / 3360.0);}
+  return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(210.0,y,- 861.0), + 532.0), + 770.0), + 560.0), - 350.0), - 504.0), - 102.0) * 0.00029761904761904765);}
 inline float theta_M8p(float y){
-  return (fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(-70.0, y, 315.0), -280.0), -105.0), -70.0), 35.0), 56.0), 17.0) / 3360.0);}
+  return (fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(-70.0, y, 315.0), -280.0), -105.0), -70.0), 35.0), 56.0), 17.0) * 0.00029761904761904765);}
 inline float iota_M8p(float y){
-  return ((y * y * y * y * y * fma(y , fma(10.0 , y ,- 49.0) , 56.0)) / 3360.0);}
+  return ((y * y * y * y * y * fma(y , fma(10.0 , y ,- 49.0) , 56.0)) * 0.00029761904761904765);}
 
 
 inline float alpha_l6_3(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -89.0, 312.0), -370.0), 140.0), 15.0), 4.0), -12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -89.0, 312.0), -370.0), 140.0), 15.0), 4.0), -12.0)) * 0.001388888888888889);}
 inline float beta_l6_3(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 623.0, -2183.0), 2581.0), -955.0), -120.0), -54.0), 108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 623.0, -2183.0), 2581.0), -955.0), -120.0), -54.0), 108.0)) * 0.001388888888888889);}
 inline float gamma_l6_3(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1869.0, 6546.0), -7722.0), 2850.0), 195.0), 540.0), -540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1869.0, 6546.0), -7722.0), 2850.0), 195.0), 540.0), -540.0)) * 0.001388888888888889);}
 inline float delta_l6_3(float y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 3115.0, -10905.0), 12845.0), -4795.0), -980.0), 720.0) / 720.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 3115.0, -10905.0), 12845.0), -4795.0), -980.0), 720.0) * 0.001388888888888889);}
 inline float eta_l6_3(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3115.0, 10900.0), -12830.0), 4880.0), -195.0), 540.0), 540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3115.0, 10900.0), -12830.0), 4880.0), -195.0), 540.0), 540.0)) * 0.001388888888888889);}
 inline float zeta_l6_3(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 1869.0, -6537.0), 7695.0), -2985.0), 120.0), -54.0), -108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 1869.0, -6537.0), 7695.0), -2985.0), 120.0), -54.0), -108.0)) * 0.001388888888888889);}
 inline float theta_l6_3(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -623.0, 2178.0), -2566.0), 1010.0), -15.0), 4.0), 12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -623.0, 2178.0), -2566.0), 1010.0), -15.0), 4.0), 12.0)) * 0.001388888888888889);}
 inline float iota_l6_3(float y){
-  return ((y * y * y * y * fma(y, fma(y, fma(y, 89.0, -311.0), 367.0), -145.0)) / 720.0);}
+  return ((y * y * y * y * fma(y, fma(y, fma(y, 89.0, -311.0), 367.0), -145.0)) * 0.001388888888888889);}
 
 
 inline float alpha_l6_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 290.0, -1305.0), 2231.0), -1718.0), 500.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 290.0, -1305.0), 2231.0), -1718.0), 500.0), -5.0), 15.0), 4.0), -12.0)) * 0.001388888888888889);}
 inline float beta_l6_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -2030.0, 9135.0), -15617.0), 12027.0), -3509.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -2030.0, 9135.0), -15617.0), 12027.0), -3509.0), 60.0), -120.0), -54.0), 108.0)) * 0.001388888888888889);}
 inline float gamma_l6_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 6090.0, -27405.0), 46851.0), -36084.0), 10548.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 6090.0, -27405.0), 46851.0), -36084.0), 10548.0), -195.0), 195.0), 540.0), -540.0)) * 0.001388888888888889);}
 inline float delta_l6_4(float y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -10150.0, 45675.0), -78085.0), 60145.0), -17605.0), 280.0), -980.0), 720.0) / 720.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -10150.0, 45675.0), -78085.0), 60145.0), -17605.0), 280.0), -980.0), 720.0) * 0.001388888888888889);}
 inline float eta_l6_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 10150.0, -45675.0), 78085.0), -60150.0), 17620.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 10150.0, -45675.0), 78085.0), -60150.0), 17620.0), -195.0), -195.0), 540.0), 540.0)) * 0.001388888888888889);}
 inline float zeta_l6_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -6090.0, 27405.0), -46851.0), 36093.0), -10575.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -6090.0, 27405.0), -46851.0), 36093.0), -10575.0), 60.0), 120.0), -54.0), -108.0)) * 0.001388888888888889);}
 inline float theta_l6_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 2030.0, -9135.0), 15617.0), -12032.0), 3524.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 2030.0, -9135.0), 15617.0), -12032.0), 3524.0), -5.0), -15.0), 4.0), 12.0)) * 0.001388888888888889);}
 inline float iota_l6_4(float y){
-  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -290.0, 1305.0), -2231.0), 1719.0), -503.0)) / 720.0);}
+  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -290.0, 1305.0), -2231.0), 1719.0), -503.0)) * 0.001388888888888889);}
 
 
 inline float alpha_l6_5(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1006.0, 5533.0), -12285.0), 13785.0), -7829.0), 1803.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1006.0, 5533.0), -12285.0), 13785.0), -7829.0), 1803.0), -3.0), -5.0), 15.0), 4.0), -12.0)) * 0.001388888888888889);}
 inline float beta_l6_5(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 7042.0, -38731.0), 85995.0), -96495.0), 54803.0), -12620.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 7042.0, -38731.0), 85995.0), -96495.0), 54803.0), -12620.0), 12.0), 60.0), -120.0), -54.0), 108.0)) * 0.001388888888888889);}
 inline float gamma_l6_5(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -21126.0, 116193.0), -257985.0), 289485.0), -164409.0), 37857.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -21126.0, 116193.0), -257985.0), 289485.0), -164409.0), 37857.0), -15.0), -195.0), 195.0), 540.0), -540.0)) * 0.001388888888888889);}
 inline float delta_l6_5(float y){
-  return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, 35210.0, -193655.0), 429975.0), -482475.0), 274015.0), -63090.0), 280.0), -980.0), 720.0) / 720.0);}
+  return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, 35210.0, -193655.0), 429975.0), -482475.0), 274015.0), -63090.0), 280.0), -980.0), 720.0) * 0.001388888888888889);}
 inline float eta_l6_5(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -35210.0, 193655.0), -429975.0), 482475.0), -274015.0), 63085.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -35210.0, 193655.0), -429975.0), 482475.0), -274015.0), 63085.0), 15.0), -195.0), -195.0), 540.0), 540.0)) * 0.001388888888888889);}
 inline float zeta_l6_5(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 21126.0, -116193.0), 257985.0), -289485.0), 164409.0), -37848.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 21126.0, -116193.0), 257985.0), -289485.0), 164409.0), -37848.0), -12.0), 60.0), 120.0), -54.0), -108.0)) * 0.001388888888888889);}
 inline float theta_l6_5(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -7042.0, 38731.0), -85995.0), 96495.0), -54803.0), 12615.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -7042.0, 38731.0), -85995.0), 96495.0), -54803.0), 12615.0), 3.0), -5.0), -15.0), 4.0), 12.0)) * 0.001388888888888889);}
 inline float iota_l6_5(float y){
-  return ((y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, 1006.0, -5533.0), 12285.0), -13785.0), 7829.0), -1802.0)) / 720.0);}
+  return ((y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, 1006.0, -5533.0), 12285.0), -13785.0), 7829.0), -1802.0)) * 0.001388888888888889);}
 
 
 inline float alpha_l6_6(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 3604.0, -23426.0), 63866.0), -93577.0), 77815.0), -34869.0), 6587.0), 1.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 3604.0, -23426.0), 63866.0), -93577.0), 77815.0), -34869.0), 6587.0), 1.0), -3.0), -5.0), 15.0), 4.0), -12.0)) * 0.001388888888888889);}
 inline float beta_l6_6(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -25228.0, 163982.0), -447062.0), 655039.0), -544705.0), 244083.0), -46109.0), -6.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -25228.0, 163982.0), -447062.0), 655039.0), -544705.0), 244083.0), -46109.0), -6.0), 12.0), 60.0), -120.0), -54.0), 108.0)) * 0.001388888888888889);}
 inline float gamma_l6_6(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 75684.0, -491946.0), 1341186.0), -1965117.0), 1634115.0), -732249.0), 138327.0), 15.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 75684.0, -491946.0), 1341186.0), -1965117.0), 1634115.0), -732249.0), 138327.0), 15.0), -15.0), -195.0), 195.0), 540.0), -540.0)) * 0.001388888888888889);}
 inline float delta_l6_6(float y){
-  return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -126140.0, 819910.0), -2235310.0), 3275195.0), -2723525.0), 1220415.0), -230545.0), -20.0), 280.0), -980.0), 720.0) / 720.0);}
+  return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -126140.0, 819910.0), -2235310.0), 3275195.0), -2723525.0), 1220415.0), -230545.0), -20.0), 280.0), -980.0), 720.0) * 0.001388888888888889);}
 inline float eta_l6_6(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 126140.0, -819910.0), 2235310.0), -3275195.0), 2723525.0), -1220415.0), 230545.0), 15.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 126140.0, -819910.0), 2235310.0), -3275195.0), 2723525.0), -1220415.0), 230545.0), 15.0), 15.0), -195.0), -195.0), 540.0), 540.0)) * 0.001388888888888889);}
 inline float zeta_l6_6(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -75684.0, 491946.0), -1341186.0), 1965117.0), -1634115.0), 732249.0), -138327.0), -6.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -75684.0, 491946.0), -1341186.0), 1965117.0), -1634115.0), 732249.0), -138327.0), -6.0), -12.0), 60.0), 120.0), -54.0), -108.0)) * 0.001388888888888889);}
 inline float theta_l6_6(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 25228.0, -163982.0), 447062.0), -655039.0), 544705.0), -244083.0), 46109.0), 1.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 25228.0, -163982.0), 447062.0), -655039.0), 544705.0), -244083.0), 46109.0), 1.0), 3.0), -5.0), -15.0), 4.0), 12.0)) * 0.001388888888888889);}
 inline float iota_l6_6(float y){
-  return ((y * y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3604.0, 23426.0), -63866.0), 93577.0), -77815.0), 34869.0), -6587.0)) / 720.0);}
+  return ((y * y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3604.0, 23426.0), -63866.0), 93577.0), -77815.0), 34869.0), -6587.0)) * 0.001388888888888889);}
 
 
 inline float alpha_l8_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3569.0, 16061.0), -27454.0), 21126.0), -6125.0), 49.0), -196.0), -36.0), 144.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3569.0, 16061.0), -27454.0), 21126.0), -6125.0), 49.0), -196.0), -36.0), 144.0)) * 2.48015873015873e-05);}
 inline float beta_l8_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 32121.0, -144548.0), 247074.0), -190092.0), 55125.0), -672.0), 2016.0), 512.0), -1536.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 32121.0, -144548.0), 247074.0), -190092.0), 55125.0), -672.0), 2016.0), 512.0), -1536.0)) * 2.48015873015873e-05);}
 inline float gamma_l8_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -128484.0, 578188.0), -988256.0), 760312.0), -221060.0), 4732.0), -9464.0), -4032.0), 8064.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -128484.0, 578188.0), -988256.0), 760312.0), -221060.0), 4732.0), -9464.0), -4032.0), 8064.0)) * 2.48015873015873e-05);}
 inline float delta_l8_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 299796.0, -1349096.0), 2305856.0), -1774136.0), 517580.0), -13664.0), 13664.0), 32256.0), -32256.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 299796.0, -1349096.0), 2305856.0), -1774136.0), 517580.0), -13664.0), 13664.0), 32256.0), -32256.0)) * 2.48015873015873e-05);}
 inline float eta_l8_4(float y){
-  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -449694.0, 2023630.0), -3458700.0), 2661540.0), -778806.0), 19110.0), -57400.0), 40320.0) / 40320.0);}
+  return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -449694.0, 2023630.0), -3458700.0), 2661540.0), -778806.0), 19110.0), -57400.0), 40320.0) * 2.48015873015873e-05);}
 inline float zeta_l8_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 449694.0, -2023616.0), 3458644.0), -2662016.0), 780430.0), -13664.0), -13664.0), 32256.0), 32256.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 449694.0, -2023616.0), 3458644.0), -2662016.0), 780430.0), -13664.0), -13664.0), 32256.0), 32256.0)) * 2.48015873015873e-05);}
 inline float theta_l8_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -299796.0, 1349068.0), -2305744.0), 1775032.0), -520660.0), 4732.0), 9464.0), -4032.0), -8064.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -299796.0, 1349068.0), -2305744.0), 1775032.0), -520660.0), 4732.0), 9464.0), -4032.0), -8064.0)) * 2.48015873015873e-05);}
 inline float iota_l8_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 128484.0, -578168.0), 988176.0), -760872.0), 223020.0), -672.0), -2016.0), 512.0), 1536.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 128484.0, -578168.0), 988176.0), -760872.0), 223020.0), -672.0), -2016.0), 512.0), 1536.0)) * 2.48015873015873e-05);}
 inline float kappa_l8_4(float y){
-  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -32121.0, 144541.0), -247046.0), 190246.0), -55685.0), 49.0), 196.0), -36.0), -144.0)) / 40320.0);}
+  return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -32121.0, 144541.0), -247046.0), 190246.0), -55685.0), 49.0), 196.0), -36.0), -144.0)) * 2.48015873015873e-05);}
 inline float mu_l8_4(float y){
-  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 3569.0, -16060.0), 27450.0), -21140.0), 6181.0)) / 40320.0);}
-
+  return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 3569.0, -16060.0), 27450.0), -21140.0), 6181.0)) * 2.48015873015873e-05);}
diff --git a/HySoP/hysop/gpu/config_default.py b/HySoP/hysop/gpu/config_default.py
index 3a96866ed..8bad707b9 100644
--- a/HySoP/hysop/gpu/config_default.py
+++ b/HySoP/hysop/gpu/config_default.py
@@ -172,3 +172,8 @@ kernels_config[2][DOUBLE_GPU]['advec_and_remesh'] = \
       "kernels/advection_and_remeshing_noVec.cl"],
      False, 1, advection_and_remeshing_index_space)
 
+kernels_config[3][FLOAT_GPU]['diffusion'] = \
+    (["common.cl", "kernels/diffusion.cl"],
+     16, 1, 1,
+     lambda size, nb_part, tile: ((size[0], size[1] / nb_part),
+                                  (tile, tile / nb_part)))
diff --git a/HySoP/hysop/gpu/config_k20m.py b/HySoP/hysop/gpu/config_k20m.py
index 102633579..e80548af6 100644
--- a/HySoP/hysop/gpu/config_k20m.py
+++ b/HySoP/hysop/gpu/config_k20m.py
@@ -180,3 +180,8 @@ kernels_config[2][DOUBLE_GPU]['advec_and_remesh'] = \
       "kernels/advection_and_remeshing_noVec.cl"],
      False, 1, advection_and_remeshing_index_space)
 
+kernels_config[3][DOUBLE_GPU]['diffusion'] = \
+    (["common.cl", "kernels/diffusion.cl"],
+     16, 4, 1,
+     lambda size, nb_part, tile: ((size[0], size[1] / nb_part),
+                                  (tile, tile / nb_part)))
diff --git a/HySoP/hysop/gpu/gpu_operator.py b/HySoP/hysop/gpu/gpu_operator.py
index ddae1f375..cf6e1387f 100644
--- a/HySoP/hysop/gpu/gpu_operator.py
+++ b/HySoP/hysop/gpu/gpu_operator.py
@@ -51,7 +51,7 @@ class GPUOperator(object):
         if self.dim == 3 and self.direction == 1:
             self._reorderVect = lambda v: (v[1], v[0], v[2])
         if self.dim == 3 and self.direction == 2:
-            if self._main_size == 1 and self.method[Splitting].find('o2') >= 0:
+            if self._comm_size == 1 and self.method[Splitting].find('o2') >= 0:
                 self._reorderVect = lambda v: (v[2], v[0], v[1])
             else:
                 self._reorderVect = lambda v: (v[2], v[1], v[0])
diff --git a/HySoP/hysop/gpu/tools.py b/HySoP/hysop/gpu/tools.py
index 98a369834..acee2503b 100644
--- a/HySoP/hysop/gpu/tools.py
+++ b/HySoP/hysop/gpu/tools.py
@@ -5,9 +5,10 @@ Tools for gpu management.
 """
 from parmepy import __VERBOSE__, __DEFAULT_PLATFORM_ID__, __DEFAULT_DEVICE_ID__
 from parmepy.constants import np, PARMES_REAL, ORDER
-from parmepy.gpu import cl, clTools, GPU_SRC, CL_PROFILE, clArray
+from parmepy.gpu import cl, clTools, GPU_SRC, CL_PROFILE
 import parmepy.tools.numpywrappers as npw
 import re
+import mpi4py.MPI as mpi
 FLOAT_GPU, DOUBLE_GPU = np.float32, np.float64
 
 
@@ -43,13 +44,22 @@ class OpenCLEnvironment(object):
         self.ctx = self._get_context(self.device, gl_sharing)
         ## OpenCL queue
         self.queue = self._get_queue(self.ctx)
+
         ## MPI sub-communicator for all processes attached to the same device
         if comm is None:
             from parmepy.mpi.main_var import main_comm
         else:
             main_comm = comm
-        self.gpu_comm = main_comm.Split(color=device_id,
-                                        key=main_comm.Get_rank())
+        # Splitting the mpi communicator by the device id is not enough:
+        # the id of the first gpu of each node is 0
+        # We build color from the processor name and the id
+        import hashlib
+        # The md5 sum of the proc name is tuncated to obtain an integer
+        # for fortran (32bit)
+        hash_name = hashlib.md5(mpi.Get_processor_name()).hexdigest()[-7:]
+        self.gpu_comm = main_comm.Split(
+            color=int(hash_name, 16) + device_id,
+            key=main_comm.Get_rank())
 
         ## Memory Pool allocator (immediate allocator)
         self.memPool = clTools.MemoryPool(
@@ -61,7 +71,7 @@ class OpenCLEnvironment(object):
         elif self.precision is DOUBLE_GPU:
             self.prec_size = 8
         self.macros = {}
-        self.default_build_opts = "-Werror"
+        self.default_build_opts = "-Werror" + self._get_precision_opts()
 
         ## Kernels configuration dictionary
         if self.device.name == "Cayman":
@@ -114,7 +124,7 @@ class OpenCLEnvironment(object):
                 print ("Warning, GPU precision is overrided from",)
                 print (self.precision, 'to', precision)
             self.precision = precision
-            self.default_build_opts = self._get_precision_opts()
+            self.default_build_opts = "-Werror" + self._get_precision_opts()
 
     def _get_platform(self, platform_id):
         """
@@ -360,9 +370,12 @@ class OpenCLEnvironment(object):
         for k in self.macros:
             gpu_src = gpu_src.replace(k, str(self.macros[k]))
         if self.precision is FLOAT_GPU:
-            float_replace = re.compile(r'(?P<float>\d\.\d+)')
+            # Rexexp to add 'f' suffix to float constants
+            # Match 1.2, 1.234, 1.2e3, 1.2E-05
+            float_replace = re.compile(r'(?P<float>\d\.\d+((e|E)-?\d+)?)')
             prg = cl.Program(
-                self.ctx, float_replace.sub(r'\g<float>f', gpu_src))
+                self.ctx,
+                float_replace.sub(r'\g<float>f', gpu_src))
         else:
             prg = cl.Program(self.ctx, gpu_src.replace('float', 'double'))
         # OpenCL program
@@ -486,47 +499,64 @@ class OpenCLEnvironment(object):
         return src
 
     def global_allocation(self, array):
-        buff = clArray.empty(self.queue, array.shape, self.precision,
-                             order=ORDER, allocator=self.memPool)
-        self.available_mem -= array.nbytes
-        return buff
-
-    def LocalMemAllocator(self, sizes_list):
+        # buff = clArray.empty(self.queue, array.shape, self.precision,
+        #                      order=ORDER, allocator=self.memPool)
+        # self.available_mem -= array.nbytes
+        # return buff
+        clBuff = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE,
+                           size=int(self.prec_size * np.prod(array.shape)))
+        # Touch the buffer on device to performs the allocation
+        # Transfers a single element in device (the precision no matters here)
+        e = np.zeros((1,), dtype=np.float64)
+        cl.enqueue_copy(self.queue, clBuff, e,
+                        buffer_origin=(0,0,0), host_origin=(0,0,0),
+                        region=(e.nbytes,)).wait()
+        self.available_mem -= clBuff.size
+        return clBuff
+
+    def LocalMemAllocator(self, sizes_list, type_list=None):
         """
         Allocates spaces in device local memory.
         @param sizes_list : list of sizes.
+        @param type_list : list of corresponding types
         It returns a list of buffers of given size (one per size specified in
         in the list) and the size of new buffers.
         @remark : Buffers are stored and could be reused.
         @remark : it assumes that all returned buffers are different
         """
         new_alloc = 0
+        if type_list is None:
+            type_list = [PARMES_REAL] * len(sizes_list)
         buff_list = []  # Returned list
-        for size in sizes_list:
+        keys_list = []
+        for s, t in zip(sizes_list, type_list):
+            keys_list.append(int(t(0).nbytes * s))
+
+        for size, key, t in zip(sizes_list, keys_list, type_list):
             buff = None
             try:
                 # List of existing buffers not already in the list
-                avail_buff = [b for b in self._locMem_Buffers[size]
+                avail_buff = [b for b in self._locMem_Buffers[key]
                               if b not in buff_list]
                 if len(avail_buff) > 0:
                     # adding the first buffer
                     buff = avail_buff[0]
                 else:
                     # Allocate a new buffer
-                    buff = cl.LocalMemory(int(self.prec_size * size))
+                    buff = cl.LocalMemory(int(t(0).nbytes * size))
                     new_alloc += buff.size
-                    self._locMem_Buffers[size].append(buff)
+                    self._locMem_Buffers[key].append(buff)
             except KeyError:
                 # Allocate a fist buffer of given size
-                buff = cl.LocalMemory(int(self.prec_size * size))
+                buff = cl.LocalMemory(int(t(0).nbytes * size))
                 new_alloc += buff.size
-                self._locMem_Buffers[size] = [buff]
+                self._locMem_Buffers[key] = [buff]
             buff_list.append(buff)
         return buff_list, new_alloc
 
 
-def get_opengl_shared_environment(platform_id=__DEFAULT_PLATFORM_ID__,
-                                  device_id=__DEFAULT_DEVICE_ID__,
+def get_opengl_shared_environment(platform_id=None,
+                                  device_id=None,
                                   device_type=None, precision=PARMES_REAL,
                                   comm=None):
     """
@@ -540,6 +570,10 @@ def get_opengl_shared_environment(platform_id=__DEFAULT_PLATFORM_ID__,
 
     The context is obtained with gl-shared properties depending on the OS.
     """
+    if platform_id is None:
+        platform_id = __DEFAULT_PLATFORM_ID__
+    if device_id is None:
+        device_id = __DEFAULT_DEVICE_ID__
     global __cl_env
     if __cl_env is None:
         __cl_env = OpenCLEnvironment(platform_id, device_id, device_type,
@@ -551,7 +585,7 @@ def get_opengl_shared_environment(platform_id=__DEFAULT_PLATFORM_ID__,
 
 
 def get_opencl_environment(platform_id=None,
-                           device_id=__DEFAULT_DEVICE_ID__,
+                           device_id=None,
                            device_type=None, precision=PARMES_REAL,
                            comm=None):
     """
diff --git a/HySoP/hysop/operator/monitors/printer.py b/HySoP/hysop/operator/monitors/printer.py
index 4d773c732..8f0110ac9 100644
--- a/HySoP/hysop/operator/monitors/printer.py
+++ b/HySoP/hysop/operator/monitors/printer.py
@@ -110,7 +110,7 @@ class Printer(Monitoring):
                 self.subset.globalResolution(self._topology)
         else:
             self.globalResolution = \
-                list(self._topology.mesh.resolution - 1)
+                list(self._topology.mesh.resolution)
             self._slices = self._topology.mesh.iCompute
         self.globalResolution.reverse()
 
-- 
GitLab