diff --git a/HySoP/hysop/gpu/cl_src/kernels/fine_to_coarse_filter.cl b/HySoP/hysop/gpu/cl_src/kernels/fine_to_coarse_filter.cl
index 3c51049ec0607176860e0310fe6c6b552f77145d..9f0436f7b562335cd172c089551b8d65d07cb5cb 100644
--- a/HySoP/hysop/gpu/cl_src/kernels/fine_to_coarse_filter.cl
+++ b/HySoP/hysop/gpu/cl_src/kernels/fine_to_coarse_filter.cl
@@ -13,9 +13,9 @@ __kernel void coarse_to_fine_filter(__global const float* scal_in,
   // Data in the fine grid are read only once for the whole computation.
   // Because of the stencil, these data are spread over multiple coarse grid cells -> we need a global memory synchronization.
   // The global synchronization is obtained by several kernel launch with an offset
-  unsigned int lid = get_local_id(0); // [0;31]
-  unsigned int gid_y = get_global_id(1); // [0:127]
-  unsigned int gid_z = get_global_id(2); // [0:127]
+  unsigned int lid = get_local_id(0);
+  unsigned int gid_y = get_global_id(1);
+  unsigned int gid_z = get_global_id(2);
   unsigned int iy_c = gid_y*L_STENCIL+offset_y;
   unsigned int iz_c = gid_z*L_STENCIL+offset_z;
   unsigned int iy_f = iy_c*PTS_PER_CELL_Y;
@@ -152,17 +152,16 @@ __kernel void coarse_to_fine_filter(__global const float* scal_in,
 		  p_res[3][3][3] += scale_factor * wx.w * wy.w * wz.w * line[lid*PTS_PER_CELL_X+pt_x];
 
 		  #endif
-
-		  barrier(CLK_LOCAL_MEM_FENCE);
 		}
 	    }
 	}
       // Store the registers results in local memory
       for (pt_z=0;pt_z<L_STENCIL;pt_z++)
 	for (pt_y=0;pt_y<L_STENCIL;pt_y++)
-	  for (pt_x=0;pt_x<L_STENCIL;pt_x++)
+	  for (pt_x=0;pt_x<L_STENCIL;pt_x++) {
 	    result[GHOSTS_OUT_X+b_id*WG+lid-SHIFT_STENCIL+pt_x][pt_y][pt_z] += p_res[pt_x][pt_y][pt_z];
-
+	    barrier(CLK_LOCAL_MEM_FENCE);
+	  }
     }
 
   // Write result in output array
diff --git a/HySoP/hysop/gpu/config_k20m.py b/HySoP/hysop/gpu/config_k20m.py
index 633bd60bbd644f76d6b8d8d39357d61104be1750..e9a23eb69b19b39d0e9dcbd36fc232427f2b562b 100644
--- a/HySoP/hysop/gpu/config_k20m.py
+++ b/HySoP/hysop/gpu/config_k20m.py
@@ -206,3 +206,19 @@ kernels_config[3][DOUBLE_GPU]['advec_MS_and_remesh_comm'] = \
     (['common.cl', 'remeshing/weights_noVec.cl',
       'kernels/comm_advection_MS_and_remeshing_noVec.cl'],
      False, 1, advection_and_remeshing_index_space)
+
+
+def fine_to_coarse_filter_index_space(size, stencil_width):
+    wg = size[0] / (2 * stencil_width)
+    return ((wg, size[1] / stencil_width, size[2] / stencil_width),
+            (wg, 1, 1))
+
+
+kernels_config[3][FLOAT_GPU]['fine_to_coarse_filter'] = \
+    (["common.cl", 'remeshing/weights_noVec.cl',
+      "kernels/fine_to_coarse_filter.cl"],
+     1, fine_to_coarse_filter_index_space)
+kernels_config[3][DOUBLE_GPU]['fine_to_coarse_filter'] = \
+    (["common.cl", 'remeshing/weights_noVec.cl',
+      "kernels/fine_to_coarse_filter.cl"],
+     1, fine_to_coarse_filter_index_space)
diff --git a/HySoP/hysop/gpu/tests/test_gpu_multiresolution_filter.py b/HySoP/hysop/gpu/tests/test_gpu_multiresolution_filter.py
index d6552d3397b04eb7e84398869732871b6134b88c..60527f458f2150728a9a2c8b6a93a118bdb82110 100644
--- a/HySoP/hysop/gpu/tests/test_gpu_multiresolution_filter.py
+++ b/HySoP/hysop/gpu/tests/test_gpu_multiresolution_filter.py
@@ -5,8 +5,9 @@ from hysop.fields.continuous import Field
 from hysop.operator.multiresolution_filter import MultiresolutionFilter
 import hysop.tools.numpywrappers as npw
 import numpy as np
-from hysop.methods_keys import Remesh, Support
+from hysop.methods_keys import Remesh, Support, ExtraArgs
 from hysop.methods import Rmsh_Linear, L2_1
+from hysop.mpi.main_var import main_rank
 
 
 L = [1., 1., 1.]
@@ -30,7 +31,8 @@ def test_filter_linear():
     op = MultiresolutionFilter(d_in=d_fine, d_out=d_coarse,
                                variables={f: d_coarse},
                                method={Remesh: Rmsh_Linear,
-                                       Support: 'gpu', })
+                                       Support: 'gpu',
+                                       ExtraArgs:{'device_id': main_rank}})
     op.discretize()
     op.setup()
     topo_coarse = op.discreteFields[f].topology
@@ -39,6 +41,7 @@ def test_filter_linear():
     f.initialize(topo=topo_fine)
     f_out = f.discreteFields[topo_coarse]
     f_out.toDevice()
+    f_out.wait()
     op.apply(simu)
     f_out.toHost()
     f_out.wait()
@@ -77,7 +80,8 @@ def test_filter_L2_1():
     op = MultiresolutionFilter(d_in=d_fine, d_out=d_coarse,
                                variables={f: d_coarse},
                                method={Remesh: L2_1,
-                                       Support: 'gpu', })
+                                       Support: 'gpu',
+                                       ExtraArgs:{'device_id': main_rank} })
     op.discretize()
     op.setup()
     topo_coarse = op.discreteFields[f].topology
@@ -110,7 +114,3 @@ def test_filter_L2_1():
             np.max(np.abs(valid[0][topo_coarse.mesh.iCompute] -
                           f_out[0][topo_coarse.mesh.iCompute]))
 
-
-if __name__ == '__main__':
-    test_filter_linear()
-    test_filter_L2_1()
diff --git a/HySoP/hysop/gpu/tools.py b/HySoP/hysop/gpu/tools.py
index f4d2df159e5a4b95db9d15c97e55fcf597e360dd..ee88a35095064058165b7f26b64c4f3808272792 100644
--- a/HySoP/hysop/gpu/tools.py
+++ b/HySoP/hysop/gpu/tools.py
@@ -11,7 +11,6 @@ import re
 import mpi4py.MPI as mpi
 FLOAT_GPU, DOUBLE_GPU = np.float32, np.float64
 
-
 ## Global variable handling an OpenCL Environment instance
 __cl_env = None
 
@@ -189,6 +188,7 @@ class OpenCLEnvironment(object):
             assert device_type.upper() == cl.device_type.to_string(device.type)
         if display or __VERBOSE__:
             print ("  Device")
+            print ("  - id                :", device_id)
             print ("  - Name                :",)
             print (device.name)
             print ("  - Type                :",)
diff --git a/HySoP/hysop/operator/discrete/baroclinic.py b/HySoP/hysop/operator/discrete/baroclinic.py
index 8810ea18ec2055fef2b9bfb0eaad58cd66dfdaee..f30ed13a578638461cdf2077d3d2382310a21f03 100644
--- a/HySoP/hysop/operator/discrete/baroclinic.py
+++ b/HySoP/hysop/operator/discrete/baroclinic.py
@@ -1,11 +1,14 @@
 # -*- coding: utf-8 -*-
-"""discrete MultiPhase Rot Grad P
+"""
+@file operator/discrete/baroclinic.py
+Discrete MultiPhase Rot Grad P
 """
 from hysop.operator.discrete.discrete import DiscreteOperator
 import hysop.numerics.differential_operations as diff_op
 from hysop.constants import debug, XDIR, YDIR, ZDIR, np
 from hysop.methods_keys import SpaceDiscretisation
 from hysop.numerics.update_ghosts import UpdateGhosts
+from hysop.tools.profiler import ftime
 import hysop.tools.numpywrappers as npw
 
 
@@ -159,6 +162,7 @@ class Baroclinic(DiscreteOperator):
             self._baroclinicTerm[d][iCompute] *= dt
             self.vorticity[d][iCompute] += self._baroclinicTerm[d][iCompute]
 
+
         # reinitialise for next iteration
         # velo(n-1) update
         for d in xrange(self.velocity.dimension):