diff --git a/.gitignore b/.gitignore
index 0f08dbcabd43d8bcd3fcaf73ac1dba9f50da7fe4..aa4dfb4ff9030e69783ed716d5762beb64a1973e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 *.pyc
-parmepy/__init__.py
+HySoP/hysop/__init__.py
 .#*
-parmepy/.pyflymakercc
+HySoP/hysop/.pyflymakercc
 .DS_Store
\ No newline at end of file
diff --git a/Examples/levelSet2D.cl b/Examples/LevelSet2D/levelSet2D.cl
similarity index 99%
rename from Examples/levelSet2D.cl
rename to Examples/LevelSet2D/levelSet2D.cl
index 7ae57d6cce35f5354d7cfd7a1c1370ef7f72d223..9d41f627f668b2385a2acb4f1ffa429721c97570 100644
--- a/Examples/levelSet2D.cl
+++ b/Examples/LevelSet2D/levelSet2D.cl
@@ -37,4 +37,3 @@ __kernel void initVelocity(__global float* veloX,__global float* veloY,
       veloY[i*V_NB_Y+gidY] = sin(piy) *sin(piy) * sin(pix * 2) * time_term;
     }
 }
-
diff --git a/Examples/LevelSet2D/levelSet2D.py b/Examples/LevelSet2D/levelSet2D.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b4e508c87b391fad6d42f4b076329042a758f3
--- /dev/null
+++ b/Examples/LevelSet2D/levelSet2D.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+import hysop
+import hysop.gpu
+hysop.gpu.CL_PROFILE = True
+from hysop.constants import np, HDF5
+from hysop.mpi.main_var import MPI
+from hysop.tools.parameters import Discretization, IO_params
+from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\
+    Support, Splitting, MultiScale, MultiScale
+from hysop.numerics.integrators.runge_kutta2 import RK2 as RK
+from hysop.numerics.remeshing import L2_1, L4_2, L2_1, L4_4, L8_4, L6_4, Linear
+from hysop.operator.advection import Advection
+from hysop.operator.analytic import Analytic
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.gpu.gpu_transfer import DataTransfer
+from hysop.problem.problem import Problem
+sin, cos, pi, sqrt = np.sin, np.cos, np.pi, np.sqrt
+
+
+def vitesse(res, x, y, t=0.):
+    res[0][...] = -np.sin(x * np.pi) ** 2 * np.sin(y * np.pi * 2) * \
+        np.cos(t * np.pi / 3.)
+    res[1][...] = np.sin(y * np.pi) ** 2 * np.sin(x * np.pi * 2) * \
+        np.cos(t * np.pi / 3.)
+    return res
+
+
+def scalaire(res, x, y, t=0.):
+    rr = np.sqrt((x - 0.5) ** 2 + (y - 0.75) ** 2)
+    res[0][...] = 0.
+    res[0][rr < 0.15] = 1.
+    return res
+
+
+def volume(_simu, var):
+    return np.sum(var[0].data[0] > 0.5) * \
+        np.prod(var[0].topology.mesh.space_step)
+
+
+# Parameters
+dim = 2
+boxLength = [1., 1.]
+boxMin = [0., 0.]
+nbElem_v = [1025] * dim
+nbElem_s = [1025] * dim
+dv = Discretization(nbElem_v)
+ds = Discretization(nbElem_s)
+
+timeStep = 0.35 / (2. * np.pi)
+finalTime = 12. - timeStep
+outputModulo = 20
+simu = hysop.Simulation(tinit=0.0, tend=finalTime,
+                        timeStep=timeStep, iterMax=100000)
+
+# Domain
+box = hysop.Box(length=boxLength, origin=boxMin)
+
+# Fields
+scal = hysop.Field(domain=box, name='Scalar')  # formula=scalaire
+velo = hysop.Field(domain=box, name='Velocity',
+                   isVector=True)  # formula=scalaire
+# Operators
+advec_method = {TimeIntegrator: RK,
+                Interpolation: Linear,
+                Remesh: L2_1,
+                Support: 'gpu_2k',
+                Splitting: 'o2',
+                MultiScale: Linear}
+# advec_method = {Scales: 'p_64', MultiScale: 'L4_4'}
+
+advec = Advection(velo,
+                  discretization=dv,
+                  variables={scal: ds},
+                  method=advec_method,
+                  user_src=['./levelSet2D.cl'])
+advec.discretize()
+
+# Get topologies from operator
+topo_v = advec.advec_dir[0].discreteFields[velo].topology
+topo_s = advec.advec_dir[0].discreteFields[scal].topology
+
+velocity = Analytic(variables={velo: topo_v},
+                    method={Support: 'gpu'},
+                    topo=topo_v)
+
+volume_m = CustomMonitor(function=volume, res_shape=1,
+                         variables={scal: topo_s},
+                         io_params=IO_params(filename="volume.dat"))
+p = HDF_Writer(variables={scal: topo_s},
+               io_params=IO_params(frequency=outputModulo,
+                                   filename="levelset",
+                                   fileformat=HDF5))
+# ToHost frequency is set to 1 since the volume computation
+# is performed at each iteration.
+toHost = DataTransfer(source=topo_s, target=p,
+                      variables={scal: topo_s},
+                      freq=1,
+                      run_till=[p, volume_m])
+
+pb = Problem([velocity, advec, toHost, p, volume_m],
+             simu, dumpFreq=-1)
+
+pb.setup()
+scal.initialize(topo=topo_s)
+velo.initialize(topo=topo_v)
+
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+volume_m.apply(simu)
+p.apply(simu)
+
+ctime = MPI.Wtime()
+pb.solve()
+print MPI.Wtime() - ctime
+
+simu.finalize()
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+p.apply(simu)
+volume_m.apply(simu)
+
+pb.finalize()
+
+print pb.profiler
diff --git a/Examples/levelSet3D.cl b/Examples/LevelSet3D/levelSet3D.cl
similarity index 100%
rename from Examples/levelSet3D.cl
rename to Examples/LevelSet3D/levelSet3D.cl
diff --git a/Examples/LevelSet3D/levelSet3D.py b/Examples/LevelSet3D/levelSet3D.py
new file mode 100644
index 0000000000000000000000000000000000000000..7e0d5feb622d58dfc0a4818d5a84ba74ded60e28
--- /dev/null
+++ b/Examples/LevelSet3D/levelSet3D.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+import hysop
+import hysop.gpu
+hysop.gpu.CL_PROFILE = True
+from hysop.constants import np, HDF5
+from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm
+from hysop.tools.parameters import Discretization, IO_params
+from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\
+    Support, Splitting, MultiScale, MultiScale
+from hysop.numerics.integrators.runge_kutta2 import RK2 as RK
+from hysop.numerics.remeshing import L6_4 as remesh_formula
+from hysop.numerics.remeshing import Linear
+from hysop.operator.advection import Advection
+from hysop.operator.analytic import Analytic
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.gpu.gpu_transfer import DataTransfer
+from hysop.problem.problem import Problem
+from hysop.problem.simulation import Simulation
+sin, cos, pi, sqrt = np.sin, np.cos, np.pi, np.sqrt
+
+
+def scalaire(res, x, y, z, t=0.):
+    r = sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2)
+    res[0][...] = 0.
+    res[0][r < 0.15] = 1.
+    return res
+
+
+def vitesse(res, x, y, z, t=0.):
+    res[0][...] = 2. * sin(pi*x)**2*sin(2.*pi*y)*sin(2.*pi*z)*cos(t*pi/3.)
+    res[1][...] = -sin(2.*pi*x)*sin(pi*y)**2*sin(2.*pi*z)*cos(t*pi/3.)
+    res[2][...] = -sin(2.*pi*x)*sin(2.*pi*y)*sin(pi*z)**2*cos(t*pi/3.)
+    return res
+
+
+def volume(_simu, var):
+    v_loc = np.sum(var[0].data[0] > 0.5) * np.prod(
+        var[0].topology.mesh.space_step)
+    return main_comm.allreduce(sendobj=v_loc, op=MPI.SUM)
+
+
+# Parameters
+dim = 3
+boxLength = [1., ] * dim
+boxMin = [0., ] * dim
+nbElem_v = [129] * dim
+nbElem_s = [129] * dim
+if nbElem_v[0] == nbElem_s[0]:
+    dv = Discretization(nbElem_v)
+else:
+    # In multi-scale, we need 1 ghost point for velocity
+    dv = Discretization(nbElem_v, ghosts=[1, ] * dim)
+ds = Discretization(nbElem_s)
+
+timeStep = 0.35 / (4. * np.pi)
+finalTime = 3.
+outputModulo = 10
+simu = Simulation(tinit=0.0, tend=finalTime,
+                  timeStep=timeStep, iterMax=120)
+
+# Domain
+box = hysop.Box(length=boxLength, origin=boxMin)
+
+# Fields
+scal = hysop.Field(domain=box, name='Scalar')# , formula=scalaire)
+velo = hysop.Field(domain=box, name='Velocity',
+                   isVector=True)#, formula=vitesse)
+# Operators
+advec_method = {TimeIntegrator: RK,
+                Interpolation: Linear,
+                Remesh: remesh_formula,
+                Support: 'gpu_1k',
+                Splitting: 'o2',
+                MultiScale: Linear}
+
+if main_size == 1:
+    advec = Advection(velo,
+                      discretization=dv,
+                      variables={scal: ds},
+                      method=advec_method)
+    #user_src=['./levelSet3D.cl'])
+else:
+    # Multi-GPU advection must know a max dt and velocity to
+    # compute communication buffers
+    advec = Advection(velo,
+                      discretization=dv,
+                      variables={scal: ds},
+                      method=advec_method,
+                      user_src=['./levelSet3D.cl'],
+                      device_id=main_rank % 2,
+                      max_dt=simu.timeStep,
+                      max_velocity=[2, 1, 1])
+advec.discretize()
+
+# Get topologies from operator
+topo_v = advec.advec_dir[0].discreteFields[velo].topology
+topo_s = advec.advec_dir[0].discreteFields[scal].topology
+
+velocity = Analytic(variables={velo: topo_v},
+                    method={Support: 'gpu'},
+                    topo=topo_v)
+
+volume_m = CustomMonitor(function=volume, res_shape=1,
+                         variables={scal: topo_s},
+                         io_params=IO_params(filename="volume.dat"))
+p = HDF_Writer(variables={scal: topo_s},
+               io_params=IO_params(frequency=outputModulo,
+                                   filename="levelset",
+                                   fileformat=HDF5))
+# ToHost frequency is set to 1 since the volume computation
+# is performed at each iteration.
+toHost = DataTransfer(source=topo_s, target=p,
+                      variables={scal: topo_s},
+                      freq=1,
+                      run_till=[p, volume_m])
+
+pb = Problem([velocity, advec, toHost, p, volume_m],
+             simu, dumpFreq=-1)
+
+pb.setup()
+scal.initialize(topo=topo_s)
+velo.initialize(topo=topo_v)
+
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+volume_m.apply(simu)
+p.apply(simu)
+
+ctime = MPI.Wtime()
+pb.solve()
+print MPI.Wtime() - ctime
+
+simu.finalize()
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+p.apply(simu)
+volume_m.apply(simu)
+
+pb.finalize()
+print pb.profiler
diff --git a/Examples/LevelSet3D/levelSet3D_Scales.py b/Examples/LevelSet3D/levelSet3D_Scales.py
new file mode 100644
index 0000000000000000000000000000000000000000..efe860315f2f386db68bf6e0f841c82639675652
--- /dev/null
+++ b/Examples/LevelSet3D/levelSet3D_Scales.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+import hysop
+from hysop.constants import np, HDF5
+from hysop.mpi.main_var import MPI, main_comm
+from hysop.tools.parameters import Discretization, IO_params
+from hysop.methods_keys import Scales, MultiScale
+from hysop.operator.advection import Advection
+from hysop.operator.analytic import Analytic
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.problem.problem import Problem
+from hysop.problem.simulation import Simulation
+sin, cos, pi, sqrt = np.sin, np.cos, np.pi, np.sqrt
+
+
+def scalaire(res, x, y, z, t=0.):
+    r = sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2)
+    res[0][...] = 0.
+    res[0][r < 0.15] = 1.
+    return res
+
+
+def vitesse(res, x, y, z, t=0.):
+    res[0][...] = 2. * sin(pi*x)**2*sin(2.*pi*y)*sin(2.*pi*z)*cos(t*pi/3.)
+    res[1][...] = -sin(2.*pi*x)*sin(pi*y)**2*sin(2.*pi*z)*cos(t*pi/3.)
+    res[2][...] = -sin(2.*pi*x)*sin(2.*pi*y)*sin(pi*z)**2*cos(t*pi/3.)
+    return res
+
+
+def volume(_simu, var):
+    v_loc = np.sum(var[0].data[0] > 0.5) * np.prod(
+        var[0].topology.mesh.space_step)
+    return main_comm.allreduce(sendobj=v_loc, op=MPI.SUM)
+
+
+# Parameters
+dim = 3
+boxLength = [1., ] * dim
+boxMin = [0., ] * dim
+nbElem_v = [65] * dim
+nbElem_s = [65] * dim
+dv = Discretization(nbElem_v)
+ds = Discretization(nbElem_s)
+
+timeStep = 0.35 / (4. * np.pi)
+finalTime = 3.
+outputModulo = 10
+simu = Simulation(tinit=0.0, tend=finalTime,
+                  timeStep=timeStep, iterMax=120)
+
+# Domain
+box = hysop.Box(length=boxLength, origin=boxMin)
+
+# Fields
+scal = hysop.Field(domain=box, name='Scalar', formula=scalaire)
+velo = hysop.Field(domain=box, name='Velocity',
+                   isVector=True, formula=vitesse)
+# Operators
+advec = Advection(velo,
+                  discretization=dv,
+                  variables={scal: ds},
+                  method={Scales: 'p_64', MultiScale: 'L4_4'})
+advec.discretize()
+
+# Get topologies from operator
+topo_v = advec.discreteFields[velo].topology
+topo_s = advec.discreteFields[scal].topology
+
+velocity = Analytic(variables={velo: topo_v},
+                    topo=topo_v)
+
+volume_m = CustomMonitor(function=volume, res_shape=1,
+                         variables={scal: topo_s},
+                         io_params=IO_params(filename="volume.dat"))
+p = HDF_Writer(variables={scal: topo_s},
+               io_params=IO_params(frequency=outputModulo,
+                                   filename="levelset",
+                                   fileformat=HDF5))
+pb = Problem([velocity, advec, p, volume_m],
+             simu, dumpFreq=-1)
+
+pb.setup()
+scal.initialize(topo=topo_s)
+velo.initialize(topo=topo_v)
+
+volume_m.apply(simu)
+p.apply(simu)
+
+ctime = MPI.Wtime()
+pb.solve()
+print MPI.Wtime() - ctime
+
+simu.finalize()
+p.apply(simu)
+volume_m.apply(simu)
+
+pb.finalize()
+print pb.profiler
diff --git a/Examples/LevelSet3D/levelSet3D_Scales_MultiScale.py b/Examples/LevelSet3D/levelSet3D_Scales_MultiScale.py
new file mode 100644
index 0000000000000000000000000000000000000000..886b566bf29c2782a68b601e701112a7d22f1026
--- /dev/null
+++ b/Examples/LevelSet3D/levelSet3D_Scales_MultiScale.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+import hysop
+from hysop.constants import np, HDF5
+from hysop.mpi.main_var import MPI, main_comm
+from hysop.tools.parameters import Discretization, IO_params
+from hysop.methods_keys import Scales, MultiScale
+from hysop.operator.advection import Advection
+from hysop.operator.analytic import Analytic
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.problem.problem import Problem
+from hysop.problem.simulation import Simulation
+sin, cos, pi, sqrt = np.sin, np.cos, np.pi, np.sqrt
+
+
+def scalaire(res, x, y, z, t=0.):
+    r = sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2)
+    res[0][...] = 0.
+    res[0][r < 0.15] = 1.
+    return res
+
+
+def vitesse(res, x, y, z, t=0.):
+    res[0][...] = 2. * sin(pi*x)**2*sin(2.*pi*y)*sin(2.*pi*z)*cos(t*pi/3.)
+    res[1][...] = -sin(2.*pi*x)*sin(pi*y)**2*sin(2.*pi*z)*cos(t*pi/3.)
+    res[2][...] = -sin(2.*pi*x)*sin(2.*pi*y)*sin(pi*z)**2*cos(t*pi/3.)
+    return res
+
+
+def volume(_simu, var):
+    v_loc = np.sum(var[0].data[0] > 0.5) * np.prod(
+        var[0].topology.mesh.space_step)
+    return main_comm.allreduce(sendobj=v_loc, op=MPI.SUM)
+
+
+# Parameters
+dim = 3
+boxLength = [1., ] * dim
+boxMin = [0., ] * dim
+nbElem_v = [65] * dim
+nbElem_s = [129] * dim
+dv = Discretization(nbElem_v)
+ds = Discretization(nbElem_s)
+
+timeStep = 0.35 / (4. * np.pi)
+finalTime = 3.
+outputModulo = 10
+simu = Simulation(tinit=0.0, tend=finalTime,
+                  timeStep=timeStep, iterMax=120)
+
+# Domain
+box = hysop.Box(length=boxLength, origin=boxMin)
+
+# Get topologies from operator
+topo_v = box.create_topology(dv, dim=2)
+topo_s = box.create_topology(ds, dim=2)
+
+
+# Fields
+scal = hysop.Field(domain=box, name='Scalar', formula=scalaire)
+velo = hysop.Field(domain=box, name='Velocity',
+                   isVector=True, formula=vitesse)
+# Operators
+advec = Advection(velo,
+                  discretization=topo_v,
+                  variables={scal: topo_s},
+                  method={Scales: 'p_64', MultiScale: 'L4_4'})
+advec.discretize()
+
+velocity = Analytic(variables={velo: topo_v},
+                    topo=topo_v)
+
+volume_m = CustomMonitor(function=volume, res_shape=1,
+                         variables={scal: topo_s},
+                         io_params=IO_params(filename="volume.dat"))
+p = HDF_Writer(variables={scal: topo_s},
+               io_params=IO_params(frequency=outputModulo,
+                                   filename="levelset",
+                                   fileformat=HDF5))
+pb = Problem([velocity, advec, p, volume_m],
+             simu, dumpFreq=-1)
+
+pb.setup()
+scal.initialize(topo=topo_s)
+velo.initialize(topo=topo_v)
+
+volume_m.apply(simu)
+p.apply(simu)
+
+ctime = MPI.Wtime()
+pb.solve()
+print MPI.Wtime() - ctime
+
+simu.finalize()
+p.apply(simu)
+volume_m.apply(simu)
+
+pb.finalize()
+print pb.profiler
diff --git a/Examples/LevelSet3D/levelSet3D_gpu.py b/Examples/LevelSet3D/levelSet3D_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..34fb8a8dd3aa47b3bc20f87d0c4542f4aafc85f4
--- /dev/null
+++ b/Examples/LevelSet3D/levelSet3D_gpu.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+import hysop
+# import hysop.gpu
+# hysop.gpu.CL_PROFILE = True
+from hysop.constants import np, HDF5
+from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm
+from hysop.tools.parameters import Discretization, IO_params
+from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\
+    Support, Splitting, MultiScale, MultiScale
+from hysop.numerics.integrators.runge_kutta2 import RK2 as RK
+from hysop.numerics.remeshing import L6_4 as remesh_formula
+from hysop.numerics.remeshing import Linear
+from hysop.operator.advection import Advection
+from hysop.operator.analytic import Analytic
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.gpu.gpu_transfer import DataTransfer
+from hysop.problem.problem import Problem
+from hysop.problem.simulation import Simulation
+sin, cos, pi, sqrt = np.sin, np.cos, np.pi, np.sqrt
+
+
+def scalaire(res, x, y, z, t=0.):
+    r = sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2)
+    res[0][...] = 0.
+    res[0][r < 0.15] = 1.
+    return res
+
+
+def vitesse(res, x, y, z, t=0.):
+    res[0][...] = 2. * sin(pi*x)**2*sin(2.*pi*y)*sin(2.*pi*z)*cos(t*pi/3.)
+    res[1][...] = -sin(2.*pi*x)*sin(pi*y)**2*sin(2.*pi*z)*cos(t*pi/3.)
+    res[2][...] = -sin(2.*pi*x)*sin(2.*pi*y)*sin(pi*z)**2*cos(t*pi/3.)
+    return res
+
+
+def volume(_simu, var):
+    v_loc = np.sum(var[0].data[0] > 0.5) * np.prod(
+        var[0].topology.mesh.space_step)
+    return main_comm.allreduce(sendobj=v_loc, op=MPI.SUM)
+
+
+# Parameters
+dim = 3
+boxLength = [1., ] * dim
+boxMin = [0., ] * dim
+nbElem_v = [129] * dim
+nbElem_s = [129] * dim
+dv = Discretization(nbElem_v)
+ds = Discretization(nbElem_s)
+
+timeStep = 0.35 / (4. * np.pi)
+finalTime = 3.
+outputModulo = 10
+simu = Simulation(tinit=0.0, tend=finalTime,
+                  timeStep=timeStep, iterMax=120)
+
+# Domain
+box = hysop.Box(length=boxLength, origin=boxMin)
+
+# Fields
+scal = hysop.Field(domain=box, name='Scalar', formula=scalaire)
+velo = hysop.Field(domain=box, name='Velocity',
+                   isVector=True, formula=vitesse)
+# Operators
+advec_method = {TimeIntegrator: RK,
+                Interpolation: Linear,
+                Remesh: remesh_formula,
+                Support: 'gpu_1k',
+                Splitting: 'o2',
+                MultiScale: Linear}
+
+if main_size == 1:
+    advec = Advection(velo,
+                      discretization=dv,
+                      variables={scal: ds},
+                      method=advec_method)
+else:
+    # Multi-GPU advection must know a max dt and velocity to
+    # compute communication buffers
+    advec = Advection(velo,
+                      discretization=dv,
+                      variables={scal: ds},
+                      method=advec_method,
+                      device_id=main_rank % 2,
+                      max_dt=simu.timeStep,
+                      max_velocity=[2, 1, 1])
+advec.discretize()
+
+# Get topologies from operator
+topo_v = advec.advec_dir[0].discreteFields[velo].topology
+topo_s = advec.advec_dir[0].discreteFields[scal].topology
+
+velocity = Analytic(variables={velo: topo_v},
+                    topo=topo_v)
+
+volume_m = CustomMonitor(function=volume, res_shape=1,
+                         variables={scal: topo_s},
+                         io_params=IO_params(filename="volume.dat"))
+p = HDF_Writer(variables={scal: topo_s},
+               io_params=IO_params(frequency=outputModulo,
+                                   filename="levelset",
+                                   fileformat=HDF5))
+# ToHost frequency is set to 1 since the volume computation
+# is performed at each iteration.
+toHost = DataTransfer(source=topo_s, target=p,
+                      variables={scal: topo_s},
+                      freq=1,
+                      run_till=[p, volume_m])
+
+pb = Problem([velocity, advec, toHost, p, volume_m],
+             simu, dumpFreq=-1)
+
+pb.setup()
+scal.initialize(topo=topo_s)
+velo.initialize(topo=topo_v)
+
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+volume_m.apply(simu)
+p.apply(simu)
+
+ctime = MPI.Wtime()
+pb.solve()
+print MPI.Wtime() - ctime
+
+simu.finalize()
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+p.apply(simu)
+volume_m.apply(simu)
+
+pb.finalize()
+print pb.profiler
diff --git a/Examples/LevelSet3D/levelSet3D_gpu_MultiScale.py b/Examples/LevelSet3D/levelSet3D_gpu_MultiScale.py
new file mode 100644
index 0000000000000000000000000000000000000000..585195c96165496489e5247ca4b873bd29eeb821
--- /dev/null
+++ b/Examples/LevelSet3D/levelSet3D_gpu_MultiScale.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+import hysop
+# import hysop.gpu
+# hysop.gpu.CL_PROFILE = True
+from hysop.constants import np, HDF5
+from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm
+from hysop.tools.parameters import Discretization, IO_params
+from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\
+    Support, Splitting, MultiScale, MultiScale
+from hysop.numerics.integrators.runge_kutta2 import RK2 as RK
+from hysop.numerics.remeshing import L6_4 as remesh_formula
+from hysop.numerics.remeshing import Linear
+from hysop.operator.advection import Advection
+from hysop.operator.analytic import Analytic
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.gpu.gpu_transfer import DataTransfer
+from hysop.problem.problem import Problem
+from hysop.problem.simulation import Simulation
+sin, cos, pi, sqrt = np.sin, np.cos, np.pi, np.sqrt
+
+
+def scalaire(res, x, y, z, t=0.):
+    r = sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2)
+    res[0][...] = 0.
+    res[0][r < 0.15] = 1.
+    return res
+
+
+def vitesse(res, x, y, z, t=0.):
+    res[0][...] = 2. * sin(pi*x)**2*sin(2.*pi*y)*sin(2.*pi*z)*cos(t*pi/3.)
+    res[1][...] = -sin(2.*pi*x)*sin(pi*y)**2*sin(2.*pi*z)*cos(t*pi/3.)
+    res[2][...] = -sin(2.*pi*x)*sin(2.*pi*y)*sin(pi*z)**2*cos(t*pi/3.)
+    return res
+
+
+def volume(_simu, var):
+    v_loc = np.sum(var[0].data[0] > 0.5) * np.prod(
+        var[0].topology.mesh.space_step)
+    return main_comm.allreduce(sendobj=v_loc, op=MPI.SUM)
+
+
+# Parameters
+dim = 3
+boxLength = [1., ] * dim
+boxMin = [0., ] * dim
+nbElem_v = [65] * dim
+nbElem_s = [129] * dim
+# In multi-scale, we need 1 ghost point for velocity
+dv = Discretization(nbElem_v, ghosts=[1, ] * dim)
+ds = Discretization(nbElem_s)
+
+timeStep = 0.35 / (4. * np.pi)
+finalTime = 3.
+outputModulo = 10
+simu = Simulation(tinit=0.0, tend=finalTime,
+                  timeStep=timeStep, iterMax=120)
+
+# Domain
+box = hysop.Box(length=boxLength, origin=boxMin)
+
+# Fields
+scal = hysop.Field(domain=box, name='Scalar', formula=scalaire)
+velo = hysop.Field(domain=box, name='Velocity',
+                   isVector=True, formula=vitesse)
+# Operators
+advec_method = {TimeIntegrator: RK,
+                Interpolation: Linear,
+                Remesh: remesh_formula,
+                Support: 'gpu_1k',
+                Splitting: 'o2',
+                MultiScale: Linear}
+
+if main_size == 1:
+    advec = Advection(velo,
+                      discretization=dv,
+                      variables={scal: ds},
+                      method=advec_method)
+else:
+    # Multi-GPU advection must know a max dt and velocity to
+    # compute communication buffers
+    advec = Advection(velo,
+                      discretization=dv,
+                      variables={scal: ds},
+                      method=advec_method,
+                      device_id=main_rank % 2,
+                      max_dt=simu.timeStep,
+                      max_velocity=[2, 1, 1])
+advec.discretize()
+
+# Get topologies from operator
+topo_v = advec.advec_dir[0].discreteFields[velo].topology
+topo_s = advec.advec_dir[0].discreteFields[scal].topology
+
+velocity = Analytic(variables={velo: topo_v},
+                    topo=topo_v)
+
+volume_m = CustomMonitor(function=volume, res_shape=1,
+                         variables={scal: topo_s},
+                         io_params=IO_params(filename="volume.dat"))
+p = HDF_Writer(variables={scal: topo_s},
+               io_params=IO_params(frequency=outputModulo,
+                                   filename="levelset",
+                                   fileformat=HDF5))
+# ToHost frequency is set to 1 since the volume computation
+# is performed at each iteration.
+toHost = DataTransfer(source=topo_s, target=p,
+                      variables={scal: topo_s},
+                      freq=1,
+                      run_till=[p, volume_m])
+
+pb = Problem([velocity, advec, toHost, p, volume_m],
+             simu, dumpFreq=-1)
+
+pb.setup()
+scal.initialize(topo=topo_s)
+velo.initialize(topo=topo_v)
+
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+volume_m.apply(simu)
+p.apply(simu)
+
+ctime = MPI.Wtime()
+pb.solve()
+print MPI.Wtime() - ctime
+
+simu.finalize()
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+p.apply(simu)
+volume_m.apply(simu)
+
+pb.finalize()
+print pb.profiler
diff --git a/Examples/LevelSet3D/levelSet3D_only_gpu.py b/Examples/LevelSet3D/levelSet3D_only_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..de176d50d0bab5ced71eda5c476557cdf31850ee
--- /dev/null
+++ b/Examples/LevelSet3D/levelSet3D_only_gpu.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+import hysop
+# import hysop.gpu
+# hysop.gpu.CL_PROFILE = True
+from hysop.constants import np, HDF5
+from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm
+from hysop.tools.parameters import Discretization, IO_params
+from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\
+    Support, Splitting, MultiScale, MultiScale
+from hysop.numerics.integrators.runge_kutta2 import RK2 as RK
+from hysop.numerics.remeshing import L6_4 as remesh_formula
+from hysop.numerics.remeshing import Linear
+from hysop.operator.advection import Advection
+from hysop.operator.analytic import Analytic
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.gpu.gpu_transfer import DataTransfer
+from hysop.problem.problem import Problem
+from hysop.problem.simulation import Simulation
+sin, cos, pi, sqrt = np.sin, np.cos, np.pi, np.sqrt
+
+
+def volume(_simu, var):
+    v_loc = np.sum(var[0].data[0] > 0.5) * np.prod(
+        var[0].topology.mesh.space_step)
+    return main_comm.allreduce(sendobj=v_loc, op=MPI.SUM)
+
+
+# Parameters
+dim = 3
+boxLength = [1., ] * dim
+boxMin = [0., ] * dim
+nbElem_v = [129] * dim
+nbElem_s = [129] * dim
+dv = Discretization(nbElem_v)
+ds = Discretization(nbElem_s)
+
+timeStep = 0.35 / (4. * np.pi)
+finalTime = 3.
+outputModulo = 10
+simu = Simulation(tinit=0.0, tend=finalTime,
+                  timeStep=timeStep, iterMax=120)
+
+# Domain
+box = hysop.Box(length=boxLength, origin=boxMin)
+
+# Fields
+scal = hysop.Field(domain=box, name='Scalar')
+velo = hysop.Field(domain=box, name='Velocity', isVector=True)
+# Operators
+advec_method = {TimeIntegrator: RK,
+                Interpolation: Linear,
+                Remesh: remesh_formula,
+                Support: 'gpu_1k',
+                Splitting: 'o2',
+                MultiScale: Linear}
+
+if main_size == 1:
+    advec = Advection(velo,
+                      discretization=dv,
+                      variables={scal: ds},
+                      method=advec_method,
+                      user_src=['./levelSet3D.cl'])
+else:
+    # Multi-GPU advection must know a max dt and velocity to
+    # compute communication buffers
+    advec = Advection(velo,
+                      discretization=dv,
+                      variables={scal: ds},
+                      method=advec_method,
+                      user_src=['./levelSet3D.cl'],
+                      device_id=main_rank % 2,
+                      max_dt=simu.timeStep,
+                      max_velocity=[2, 1, 1],
+                      velocity_only_on_device=True)
+advec.discretize()
+
+# Get topologies from operator
+topo_v = advec.advec_dir[0].discreteFields[velo].topology
+topo_s = advec.advec_dir[0].discreteFields[scal].topology
+
+velocity = Analytic(variables={velo: topo_v},
+                    method={Support: 'gpu'},
+                    topo=topo_v)
+
+volume_m = CustomMonitor(function=volume, res_shape=1,
+                         variables={scal: topo_s},
+                         io_params=IO_params(filename="volume.dat"))
+p = HDF_Writer(variables={scal: topo_s},
+               io_params=IO_params(frequency=outputModulo,
+                                   filename="levelset",
+                                   fileformat=HDF5))
+# ToHost frequency is set to 1 since the volume computation
+# is performed at each iteration.
+toHost = DataTransfer(source=topo_s, target=p,
+                      variables={scal: topo_s},
+                      freq=1,
+                      run_till=[p, volume_m])
+
+pb = Problem([velocity, advec, toHost, p, volume_m],
+             simu, dumpFreq=-1)
+
+pb.setup()
+scal.initialize(topo=topo_s)
+velo.initialize(topo=topo_v)
+
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+volume_m.apply(simu)
+p.apply(simu)
+
+ctime = MPI.Wtime()
+pb.solve()
+print MPI.Wtime() - ctime
+
+simu.finalize()
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+p.apply(simu)
+volume_m.apply(simu)
+
+pb.finalize()
+print pb.profiler
diff --git a/Examples/LevelSet3D/levelSet3D_only_gpu_MultiScale.py b/Examples/LevelSet3D/levelSet3D_only_gpu_MultiScale.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb89df5998a075597052185931f8cb0991637739
--- /dev/null
+++ b/Examples/LevelSet3D/levelSet3D_only_gpu_MultiScale.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+import hysop
+# import hysop.gpu
+# hysop.gpu.CL_PROFILE = True
+from hysop.constants import np, HDF5
+from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm
+from hysop.tools.parameters import Discretization, IO_params
+from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\
+    Support, Splitting, MultiScale, MultiScale
+from hysop.numerics.integrators.runge_kutta2 import RK2 as RK
+from hysop.numerics.remeshing import L6_4 as remesh_formula
+from hysop.numerics.remeshing import Linear
+from hysop.operator.advection import Advection
+from hysop.operator.analytic import Analytic
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.gpu.gpu_transfer import DataTransfer
+from hysop.problem.problem import Problem
+from hysop.problem.simulation import Simulation
+sin, cos, pi, sqrt = np.sin, np.cos, np.pi, np.sqrt
+
+
+def volume(_simu, var):
+    v_loc = np.sum(var[0].data[0] > 0.5) * np.prod(
+        var[0].topology.mesh.space_step)
+    return main_comm.allreduce(sendobj=v_loc, op=MPI.SUM)
+
+
+# Parameters
+dim = 3
+boxLength = [1., ] * dim
+boxMin = [0., ] * dim
+nbElem_v = [65] * dim
+nbElem_s = [129] * dim
+# In multi-scale, we need 1 ghost point for velocity
+dv = Discretization(nbElem_v, ghosts=[1, ] * dim)
+ds = Discretization(nbElem_s)
+
+timeStep = 0.35 / (4. * np.pi)
+finalTime = 3.
+outputModulo = 10
+simu = Simulation(tinit=0.0, tend=finalTime,
+                  timeStep=timeStep, iterMax=120)
+
+# Domain
+box = hysop.Box(length=boxLength, origin=boxMin)
+
+# Fields
+scal = hysop.Field(domain=box, name='Scalar')
+velo = hysop.Field(domain=box, name='Velocity', isVector=True)
+# Operators
+advec_method = {TimeIntegrator: RK,
+                Interpolation: Linear,
+                Remesh: remesh_formula,
+                Support: 'gpu_1k',
+                Splitting: 'o2',
+                MultiScale: Linear}
+
+if main_size == 1:
+    advec = Advection(velo,
+                      discretization=dv,
+                      variables={scal: ds},
+                      method=advec_method,
+                      user_src=['./levelSet3D.cl'])
+else:
+    # Multi-GPU advection must know a max dt and velocity to
+    # compute communication buffers
+    advec = Advection(velo,
+                      discretization=dv,
+                      variables={scal: ds},
+                      method=advec_method,
+                      user_src=['./levelSet3D.cl'],
+                      device_id=main_rank % 2,
+                      max_dt=simu.timeStep,
+                      max_velocity=[2, 1, 1],
+                      velocity_only_on_device=True)
+advec.discretize()
+
+# Get topologies from operator
+topo_v = advec.advec_dir[0].discreteFields[velo].topology
+topo_s = advec.advec_dir[0].discreteFields[scal].topology
+
+velocity = Analytic(variables={velo: topo_v},
+                    method={Support: 'gpu'},
+                    topo=topo_v)
+
+volume_m = CustomMonitor(function=volume, res_shape=1,
+                         variables={scal: topo_s},
+                         io_params=IO_params(filename="volume.dat"))
+p = HDF_Writer(variables={scal: topo_s},
+               io_params=IO_params(frequency=outputModulo,
+                                   filename="levelset",
+                                   fileformat=HDF5))
+# ToHost frequency is set to 1 since the volume computation
+# is performed at each iteration.
+toHost = DataTransfer(source=topo_s, target=p,
+                      variables={scal: topo_s},
+                      freq=1,
+                      run_till=[p, volume_m])
+
+pb = Problem([velocity, advec, toHost, p, volume_m],
+             simu, dumpFreq=-1)
+
+pb.setup()
+scal.initialize(topo=topo_s)
+velo.initialize(topo=topo_v)
+
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+volume_m.apply(simu)
+p.apply(simu)
+
+ctime = MPI.Wtime()
+pb.solve()
+print MPI.Wtime() - ctime
+
+simu.finalize()
+scal.discreteFields[topo_s].toHost()
+scal.discreteFields[topo_s].wait()
+p.apply(simu)
+volume_m.apply(simu)
+
+pb.finalize()
+print pb.profiler
diff --git a/Examples/LevelSet3D/levelSet3D_python.py b/Examples/LevelSet3D/levelSet3D_python.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce73c5004f8756573808cde3477bd527cd2d3f18
--- /dev/null
+++ b/Examples/LevelSet3D/levelSet3D_python.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+import hysop
+import hysop.gpu
+hysop.gpu.CL_PROFILE = True
+from hysop.constants import np, HDF5
+from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm
+from hysop.tools.parameters import Discretization, IO_params
+from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\
+    Support, Splitting, MultiScale, MultiScale
+from hysop.numerics.integrators.runge_kutta2 import RK2 as RK
+from hysop.numerics.remeshing import L2_1 as remesh_formula
+from hysop.numerics.interpolation import Linear
+from hysop.operator.advection import Advection
+from hysop.operator.analytic import Analytic
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.gpu.gpu_transfer import DataTransfer
+from hysop.problem.problem import Problem
+from hysop.problem.simulation import Simulation
+sin, cos, pi, sqrt = np.sin, np.cos, np.pi, np.sqrt
+
+
+def scalaire(res, x, y, z, t=0.):
+    r = sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2)
+    res[0][...] = 0.
+    res[0][r < 0.15] = 1.
+    return res
+
+
+def vitesse(res, x, y, z, t=0.):
+    res[0][...] = 2. * sin(pi*x)**2*sin(2.*pi*y)*sin(2.*pi*z)*cos(t*pi/3.)
+    res[1][...] = -sin(2.*pi*x)*sin(pi*y)**2*sin(2.*pi*z)*cos(t*pi/3.)
+    res[2][...] = -sin(2.*pi*x)*sin(2.*pi*y)*sin(pi*z)**2*cos(t*pi/3.)
+    return res
+
+
+def volume(_simu, var):
+    v_loc = np.sum(var[0].data[0] > 0.5) * np.prod(
+        var[0].topology.mesh.space_step)
+    return main_comm.allreduce(sendobj=v_loc, op=MPI.SUM)
+
+
+# Parameters
+dim = 3
+boxLength = [1., ] * dim
+boxMin = [0., ] * dim
+nbElem_v = [65] * dim
+nbElem_s = [65] * dim
+if nbElem_v[0] == nbElem_s[0]:
+    dv = Discretization(nbElem_v)
+else:
+    # In multi-scale, we need 1 ghost point for velocity
+    dv = Discretization(nbElem_v, ghosts=[1, ] * dim)
+ds = Discretization(nbElem_s)
+
+timeStep = 0.35 / (4. * np.pi)
+finalTime = 3.
+outputModulo = 10
+simu = Simulation(tinit=0.0, tend=finalTime,
+                  timeStep=timeStep, iterMax=120)
+
+# Domain
+box = hysop.Box(length=boxLength, origin=boxMin)
+
+# Fields
+scal = hysop.Field(domain=box, name='Scalar', formula=scalaire)
+velo = hysop.Field(domain=box, name='Velocity',
+                   isVector=True, formula=vitesse)
+# Operators
+advec_method = {TimeIntegrator: RK,
+                Interpolation: Linear,
+                Remesh: remesh_formula,
+                Support: '',
+                Splitting: 'o2',
+                MultiScale: Linear}
+
+#if main_size == 1:
+advec = Advection(velo,
+                  discretization=dv,
+                  variables={scal: ds},
+                  method=advec_method)
+    #user_src=['./levelSet3D.cl'])
+#else:
+    # # Multi-GPU advection must know a max dt and velocity to
+    # # compute communication buffers
+    # advec = Advection(velo,
+    #                   discretization=dv,
+    #                   variables={scal: ds},
+    #                   method=advec_method,
+    #                   user_src=['./levelSet3D.cl'],
+    #                   device_id=main_rank % 2,
+    #                   max_dt=simu.timeStep,
+    #                   max_velocity=[2, 1, 1])
+advec.discretize()
+
+# Get topologies from operator
+topo_v = advec.advec_dir[0].discreteFields[velo].topology
+topo_s = advec.advec_dir[0].discreteFields[scal].topology
+
+velocity = Analytic(variables={velo: topo_v},
+                    #method={Support: 'gpu'},
+                    topo=topo_v)
+
+volume_m = CustomMonitor(function=volume, res_shape=1,
+                         variables={scal: topo_s},
+                         io_params=IO_params(filename="volume.dat"))
+p = HDF_Writer(variables={scal: topo_s},
+               io_params=IO_params(frequency=outputModulo,
+                                   filename="levelset",
+                                   fileformat=HDF5))
+# ToHost frequency is set to 1 since the volume computation
+# is performed at each iteration.
+# toHost = DataTransfer(source=topo_s, target=p,
+#                       variables={scal: topo_s},
+#                       freq=1,
+#                       run_till=[p, volume_m])
+
+pb = Problem([velocity, advec, p, volume_m],
+             simu, dumpFreq=-1)
+
+pb.setup()
+scal.initialize(topo=topo_s)
+velo.initialize(topo=topo_v)
+
+#scal.discreteFields[topo_s].toHost()
+#scal.discreteFields[topo_s].wait()
+volume_m.apply(simu)
+p.apply(simu)
+
+ctime = MPI.Wtime()
+pb.solve()
+print MPI.Wtime() - ctime
+
+simu.finalize()
+#scal.discreteFields[topo_s].toHost()
+#scal.discreteFields[topo_s].wait()
+p.apply(simu)
+volume_m.apply(simu)
+
+pb.finalize()
+print pb.profiler
diff --git a/Examples/Plane_jet/NS_planeJet_CPU.py b/Examples/Plane_jet/NS_planeJet_CPU.py
deleted file mode 100644
index 76fc413698b0161040ccee1972bdd836fb624494..0000000000000000000000000000000000000000
--- a/Examples/Plane_jet/NS_planeJet_CPU.py
+++ /dev/null
@@ -1,224 +0,0 @@
-#!/usr/bin/python
-
-"""
-Taylor Green 3D : see paper van Rees 2011.
-
-All parameters are set and defined in python module dataTG.
-
-"""
-
-import parmepy as pp
-from parmepy.constants import ORDER, np, PARMES_REAL, HDF5
-from parmepy.f2py import fftw2py
-from parmepy.fields.continuous import Field
-from parmepy.mpi.topology import Cartesian
-from parmepy.operator.advection import Advection
-from parmepy.operator.stretching import Stretching
-from parmepy.operator.poisson import Poisson
-from parmepy.operator.diffusion import Diffusion
-from parmepy.operator.redistribute import Redistribute
-from parmepy.problem.navier_stokes import NSProblem
-from parmepy.operator.monitors.printer import Printer
-from parmepy.operator.monitors.energy_enstrophy import Energy_enstrophy
-from parmepy.problem.simulation import Simulation
-from parmepy.operator.differential import Curl
-from parmepy.mpi.main_var import main_size
-from parmepy.methods_keys import Scales, TimeIntegrator, Interpolation,\
-    Remesh, Support, Splitting, dtAdvecCrit, SpaceDiscretisation, GhostUpdate
-from parmepy.numerics.integrators.runge_kutta2 import RK2 as RK2
-from parmepy.numerics.integrators.runge_kutta3 import RK3 as RK3
-from parmepy.numerics.integrators.runge_kutta4 import RK4 as RK4
-from parmepy.numerics.finite_differences import FD_C_4, FD_C_2
-from parmepy.numerics.interpolation import Linear
-from parmepy.numerics.remeshing import L6_4
-from parmepy.f2py import fftw2py
-
-# problem dimension
-dim = 3
-# resolution
-nb = 129
-# number of ghosts in usual cartesian topo
-NBGHOSTS = 2
-ADVECTION_METHOD = {Scales: 'p_64'}
-# ADVECTION_METHOD = {TimeIntegrator: RK2,
-#                     Interpolation: Linear,
-#                     Remesh: L6_4,
-#                     Support: '',
-#                     Splitting: 'o2_FullHalf'}
-VISCOSITY = 1e-4
-
-
-## ----------- A 3d problem -----------
-## pi constant
-pi = np.pi
-cos = np.cos
-sin = np.sin
-exp = np.exp
-abs = np.abs
-tanh = np.tanh
-## Domain
-box = pp.Box(dim, length=[1., ]*3)
-
-## Global resolution
-nbElem = [nb] * dim
-randX = np.asarray(np.random.random([nb - 1, nb - 1,(nb - 1)/main_size]),
-                   dtype=PARMES_REAL, order=ORDER) - 0.5
-randY = np.asarray(np.random.random([nb - 1, nb - 1,(nb - 1)/main_size]),
-                   dtype=PARMES_REAL, order=ORDER) - 0.5
-randZ = np.asarray(np.random.random([nb - 1, nb - 1,(nb - 1)/main_size]),
-                   dtype=PARMES_REAL, order=ORDER) - 0.5
-
-##initjet.f
-width = 0.01
-ampl3 = 0.3
-ampl = 0.05
-ampl2=0.05
-def computeVel(res, x, y, z, t):
-    yy = abs(y - 0.5)
-    aux = (0.1 - 2. * yy) / (4. * width)
-    strg1 = exp(-abs(aux ** 2)) * randX
-    strg2 = exp(-abs(aux ** 2)) * randY
-    strg3 = exp(-abs(aux ** 2)) * randZ
-    res[0][...] = 0.5 * (1. + tanh(aux))
-    res[0][...] *= (1. + ampl3 * sin(8. * pi * x))
-    res[0][...] *= (1. + ampl * strg1)
-    res[1][...] = ampl * strg2
-    res[2][...] = ampl * strg3
-    return res
-
-
-def initScal(res, x, y, z, t):
-    yy = abs(y - 0.5)
-    aux = (0.1 - 2. * yy) / (4. * width)
-    res[0][...] = 0.5 * (1. + tanh(aux))
-    #strg3 = exp(-abs(aux ** 2)) * randZ
-    #res[0][...] *= (1. + ampl2 * strg3)
-    return res
-
-## Fields
-velo = Field(domain=box, formula=computeVel,
-             name='Velocity', is_vector=True)
-vorti = Field(domain=box,
-              name='Vorticity', is_vector=True)
-scal = Field(domain=box, formula=initScal,
-              name='PassiveScalar', is_vector=False)
-
-## Variables
-## Usual Cartesian topology definition
-# At the moment we use two (or three?) topologies :
-# - "topo" for Stretching and all operators based on finite differences.
-#    --> ghost layer = 2
-# - topo from Advection operator for all the other operators.
-#    --> no ghost layer
-# - topo from fftw for Poisson and Diffusion.
-# Todo : check compat between scales and fft operators topologies.
-ghosts = [NBGHOSTS] * box.dimension
-topo = Cartesian(box, box.dimension, nbElem,
-                 ghosts=ghosts)
-
-## Navier Stokes Operators
-advec = Advection(velo, vorti,
-                  resolutions={velo: nbElem,
-                               vorti: nbElem},
-                  method=ADVECTION_METHOD
-                  )
-advecScal = Advection(velo, scal,
-                      resolutions={velo: nbElem,
-                                   scal: nbElem},
-                      method=ADVECTION_METHOD
-                      )
-
-stretch = Stretching(velo, vorti,
-                     resolutions={velo: nbElem,
-                                  vorti: nbElem},
-                     topo=topo
-                     )
-
-diffusion = Diffusion(vorti,
-                      resolution=nbElem,
-                      viscosity=VISCOSITY
-                      )
-
-poisson = Poisson(velo, vorti,
-                  resolutions={velo: nbElem,
-                               vorti: nbElem},
-                  projection=[True, 1, False],
-                  )
-
-c = Curl(velo, vorti,
-         resolutions={velo: nbElem,
-                      vorti: nbElem},
-         method={SpaceDiscretisation: fftw2py, GhostUpdate: True},
-         )
-
-## Simulation
-simu = Simulation(tinit=0.0,
-                  tend=4.5, timeStep=0.001,
-                  iterMax=1000000)
-
-
-# Bridges between the different topologies in order to
-# redistribute data.
-# 1 -Advection to stretching
-toGhost_vorti = Redistribute([vorti], advec, stretch)
-toGhost_velo = Redistribute([velo], poisson, stretch)
-# 2 - Stretching to Poisson/Diffusion
-fromGhost_vorti = Redistribute([vorti], stretch, diffusion)
-
-#  Define the problem to solve
-pb = NSProblem(operators=[
-        toGhost_velo,
-        advecScal,
-        advec,
-        toGhost_vorti,
-        stretch,
-        fromGhost_vorti,
-        diffusion,
-        poisson,
-        ], simulation=simu, dumpFreq=-1)
-
-## Setting solver to Problem (o nly operators for computational tasks)
-pb.pre_setUp()
-## Diagnostics related to the problem
-topofft = poisson.discreteFields[poisson.vorticity].topology
-energy = Energy_enstrophy(velo, vorti, isNormalized=True,
-                          topo=topofft,
-                          viscosity=VISCOSITY,
-                          frequency=1,
-                          prefix='./res_NS_planeJet/energy_ref.dat')
-
-# initialisation
-vorti.setTopoInit(topofft)
-velo.setTopoInit(topofft)
-scal.setTopoInit(topofft)
-pb.addMonitors([energy])
-pb.setUp()
-
-
-c.discretize()
-c.setUp()
-
-c.apply(simu)
-
-p = Printer(variables=[scal,],
-            topo=topofft,
-            frequency=100,
-            prefix='./res_NS_planeJet/scal_ref',
-            formattype=HDF5)
-pb.addMonitors([p])
-p._printStep()
-
-
-def run():
-    pb.solve()
-
-## Solve problem
-from parmepy.mpi import MPI
-print "Start computation ..."
-time = MPI.Wtime()
-run()
-print 'total time (rank):', MPI.Wtime() - time, '(', topo.rank, ')'
-
-
-pb.finalize()
-## Clean memory buffers
diff --git a/Examples/Plane_jet/NS_planeJet_CPU_MS.py b/Examples/Plane_jet/NS_planeJet_CPU_MS.py
deleted file mode 100644
index 5cdedc7611d352a8fc6651968b063b4f7857457f..0000000000000000000000000000000000000000
--- a/Examples/Plane_jet/NS_planeJet_CPU_MS.py
+++ /dev/null
@@ -1,226 +0,0 @@
-#!/usr/bin/python
-
-"""
-Taylor Green 3D : see paper van Rees 2011.
-
-All parameters are set and defined in python module dataTG.
-
-"""
-
-import parmepy as pp
-from parmepy.constants import ORDER, np, PARMES_REAL, HDF5
-from parmepy.f2py import fftw2py
-from parmepy.fields.continuous import Field
-from parmepy.mpi.topology import Cartesian
-from parmepy.operator.advection import Advection
-from parmepy.operator.stretching import Stretching
-from parmepy.operator.poisson import Poisson
-from parmepy.operator.diffusion import Diffusion
-from parmepy.operator.redistribute import Redistribute
-from parmepy.problem.navier_stokes import NSProblem
-from parmepy.operator.monitors.printer import Printer
-from parmepy.operator.monitors.energy_enstrophy import Energy_enstrophy
-from parmepy.problem.simulation import Simulation
-from parmepy.operator.differential import Curl
-from parmepy.mpi.main_var import main_size
-from parmepy.methods_keys import Scales, TimeIntegrator, Interpolation,\
-    Remesh, Support, Splitting, dtAdvecCrit, SpaceDiscretisation, GhostUpdate, MultiScale
-from parmepy.numerics.integrators.runge_kutta2 import RK2 as RK2
-from parmepy.numerics.integrators.runge_kutta3 import RK3 as RK3
-from parmepy.numerics.integrators.runge_kutta4 import RK4 as RK4
-from parmepy.numerics.finite_differences import FD_C_4, FD_C_2
-from parmepy.numerics.interpolation import Linear
-from parmepy.numerics.remeshing import L6_4
-from parmepy.f2py import fftw2py
-
-# problem dimension
-dim = 3
-# resolution
-nb = 129
-nb_s = 257
-# number of ghosts in usual cartesian topo
-NBGHOSTS = 2
-ADVECTION_METHOD = {Scales: 'p_64', MultiScale: 'L4_4'}
-# ADVECTION_METHOD = {TimeIntegrator: RK2,
-#                     Interpolation: Linear,
-#                     Remesh: L6_4,
-#                     Support: '',
-#                     Splitting: 'o2_FullHalf'}
-VISCOSITY = 1e-4
-
-
-## ----------- A 3d problem -----------
-## pi constant
-pi = np.pi
-cos = np.cos
-sin = np.sin
-exp = np.exp
-abs = np.abs
-tanh = np.tanh
-## Domain
-box = pp.Box(dim, length=[1., ]*3)
-
-## Global resolution
-nbElem = [nb] * dim
-nbElem_s = [nb_s] * dim
-randX = np.asarray(np.random.random([nb - 1, nb - 1,(nb - 1)/main_size]),
-                   dtype=PARMES_REAL, order=ORDER) - 0.5
-randY = np.asarray(np.random.random([nb - 1, nb - 1,(nb - 1)/main_size]),
-                   dtype=PARMES_REAL, order=ORDER) - 0.5
-randZ = np.asarray(np.random.random([nb - 1, nb - 1,(nb - 1)/main_size]),
-                   dtype=PARMES_REAL, order=ORDER) - 0.5
-
-##initjet.f
-width = 0.01
-ampl3 = 0.3
-ampl = 0.05
-ampl2=0.05
-def computeVel(res, x, y, z, t):
-    yy = abs(y - 0.5)
-    aux = (0.1 - 2. * yy) / (4. * width)
-    strg1 = exp(-abs(aux ** 2)) * randX
-    strg2 = exp(-abs(aux ** 2)) * randY
-    strg3 = exp(-abs(aux ** 2)) * randZ
-    res[0][...] = 0.5 * (1. + tanh(aux))
-    res[0][...] *= (1. + ampl3 * sin(8. * pi * x))
-    res[0][...] *= (1. + ampl * strg1)
-    res[1][...] = ampl * strg2
-    res[2][...] = ampl * strg3
-    return res
-
-
-def initScal(res, x, y, z, t):
-    yy = abs(y - 0.5)
-    aux = (0.1 - 2. * yy) / (4. * width)
-    res[0][...] = 0.5 * (1. + tanh(aux))
-    #strg3 = exp(-abs(aux ** 2)) * randZ
-    #res[0][...] *= (1. + ampl2 * strg3)
-    return res
-
-## Fields
-velo = Field(domain=box, formula=computeVel,
-             name='Velocity', is_vector=True)
-vorti = Field(domain=box,
-              name='Vorticity', is_vector=True)
-scal = Field(domain=box, formula=initScal,
-              name='PassiveScalar', is_vector=False)
-
-## Variables
-## Usual Cartesian topology definition
-# At the moment we use two (or three?) topologies :
-# - "topo" for Stretching and all operators based on finite differences.
-#    --> ghost layer = 2
-# - topo from Advection operator for all the other operators.
-#    --> no ghost layer
-# - topo from fftw for Poisson and Diffusion.
-# Todo : check compat between scales and fft operators topologies.
-ghosts = [NBGHOSTS] * box.dimension
-topo = Cartesian(box, box.dimension, nbElem,
-                 ghosts=ghosts)
-
-## Navier Stokes Operators
-advec = Advection(velo, vorti,
-                  resolutions={velo: nbElem,
-                               vorti: nbElem},
-                  method=ADVECTION_METHOD
-                  )
-advecScal = Advection(velo, scal,
-                      resolutions={velo: nbElem,
-                                   scal: nbElem_s},
-                      method=ADVECTION_METHOD
-                      )
-
-stretch = Stretching(velo, vorti,
-                     resolutions={velo: nbElem,
-                                  vorti: nbElem},
-                     topo=topo
-                     )
-
-diffusion = Diffusion(vorti,
-                      resolution=nbElem,
-                      viscosity=VISCOSITY
-                      )
-
-poisson = Poisson(velo, vorti,
-                  resolutions={velo: nbElem,
-                               vorti: nbElem},
-                  projection=[True, 1, False],
-                  )
-
-c = Curl(velo, vorti,
-         resolutions={velo: nbElem,
-                      vorti: nbElem},
-         method={SpaceDiscretisation: fftw2py, GhostUpdate: True},
-         )
-
-## Simulation
-simu = Simulation(tinit=0.0,
-                  tend=4.5, timeStep=0.001,
-                  iterMax=1000000)
-
-
-# Bridges between the different topologies in order to
-# redistribute data.
-# 1 -Advection to stretching
-toGhost_vorti = Redistribute([vorti], advec, stretch)
-toGhost_velo = Redistribute([velo], poisson, stretch)
-# 2 - Stretching to Poisson/Diffusion
-fromGhost_vorti = Redistribute([vorti], stretch, diffusion)
-
-#  Define the problem to solve
-pb = NSProblem(operators=[
-        toGhost_velo,
-        advecScal,
-        advec,
-        toGhost_vorti,
-        stretch,
-        fromGhost_vorti,
-        diffusion,
-        poisson,
-        ], simulation=simu, dumpFreq=-1)
-
-## Setting solver to Problem (o nly operators for computational tasks)
-pb.pre_setUp()
-## Diagnostics related to the problem
-topofft = poisson.discreteFields[poisson.vorticity].topology
-energy = Energy_enstrophy(velo, vorti, isNormalized=True,
-                          topo=topofft,
-                          viscosity=VISCOSITY,
-                          frequency=1,
-                          prefix='./res_NS_planeJet/energy_ref_128-256.dat')
-
-# initialisation
-vorti.setTopoInit(topofft)
-velo.setTopoInit(topofft)
-#scal.setTopoInit(topofft)
-pb.addMonitors([energy])
-pb.setUp()
-
-
-c.discretize()
-c.setUp()
-
-c.apply(simu)
-
-p = Printer(variables=[scal,],
-            topo=scal.discreteFields.keys()[0],
-            frequency=100,
-            prefix='./res_NS_planeJet/scal_ref_128-256',
-            formattype=HDF5)
-pb.addMonitors([p])
-p._printStep()
-
-
-def run():
-    pb.solve()
-
-## Solve problem
-from parmepy.mpi import MPI
-print "Start computation ..."
-time = MPI.Wtime()
-run()
-print 'total time (rank):', MPI.Wtime() - time, '(', topo.rank, ')'
-
-
-pb.finalize()
-## Clean memory buffers
diff --git a/Examples/Plane_jet/NS_planeJet_hybrid_MS.py b/Examples/Plane_jet/NS_planeJet_hybrid_MS.py
index d5d8de7a3623a34095cd64e56b2a9019958a814a..fc8872b855cabcd1bf8cddd6824fc33da4f57504 100644
--- a/Examples/Plane_jet/NS_planeJet_hybrid_MS.py
+++ b/Examples/Plane_jet/NS_planeJet_hybrid_MS.py
@@ -1,43 +1,46 @@
-import parmepy as pp
-from parmepy.constants import ORDER, np, PARMES_REAL, HDF5
-from parmepy.f2py import fftw2py
-from parmepy.fields.continuous import Field
-from parmepy.mpi.topology import Cartesian
-from parmepy.operator.advection import Advection
-from parmepy.operator.stretching import Stretching
-from parmepy.operator.poisson import Poisson
-from parmepy.operator.diffusion import Diffusion
-from parmepy.operator.redistribute import Redistribute
-from parmepy.operator.redistribute_intercomm import RedistributeIntercomm
-from parmepy.operator.monitors import Printer, Energy_enstrophy, DragAndLift,\
-    Reprojection_criterion
-from parmepy.problem.simulation import Simulation
-from parmepy.operator.differential import Curl
-from parmepy.methods_keys import Scales, TimeIntegrator, Interpolation,\
-    Remesh, Support, Splitting, dtCrit, SpaceDiscretisation, GhostUpdate, MultiScale
-from parmepy.numerics.integrators.runge_kutta2 import RK2 as RK2
-from parmepy.numerics.integrators.runge_kutta3 import RK3 as RK3
-from parmepy.numerics.integrators.runge_kutta4 import RK4 as RK4
-from parmepy.numerics.finite_differences import FD_C_4, FD_C_2
-from parmepy.numerics.interpolation import Linear
-from parmepy.numerics.remeshing import L6_4, L4_4
-from parmepy.f2py import fftw2py
-from parmepy.mpi.main_var import main_comm, main_rank, main_size
-from parmepy.problem.problem_tasks import ProblemTasks
-from parmepy.fields.variable_parameter import VariableParameter
-from parmepy.operator.adapt_timestep import AdaptTimeStep
-
-NS_TASK = 0
-PS_TASK = 1
-
-assert main_size > 1
-proc_tasks = [NS_TASK,] * main_size
-proc_tasks[0] = PS_TASK
-if main_size > 5:
-    proc_tasks[5] = PS_TASK
-comm_s = main_comm.Split(color=proc_tasks[main_rank], key=main_rank)
-comm_s_size = comm_s.Get_size()
-
+#!/usr/bin/env python
+# Scripts arguments:
+# 1. Flow resolution
+# 2. Scalar resolution
+# 3. Dictionary for devices id: (mpi rank: device id)
+# 4. Is the initial condition is perturbed
+# 5. Is data output
+import sys
+USER_NB_ELEM_UW = eval(sys.argv[1])
+USER_NB_ELEM_S = eval(sys.argv[2])
+USER_RANK_DEVICE_ID = eval(sys.argv[3])
+RANDOM_INIT = eval(sys.argv[4])
+IS_OUTPUT = eval(sys.argv[5])
+import hysop
+# import hysop.gpu
+# hysop.gpu.CL_PROFILE = True
+from hysop.constants import np, HDF5, ASCII, HYSOP_MPI_REAL
+from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm
+from hysop.tools.parameters import MPI_params, Discretization, IO_params
+from hysop.problem.simulation import Simulation
+from hysop.fields.variable_parameter import VariableParameter
+from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\
+    Support, Splitting, MultiScale, MultiScale, SpaceDiscretisation, \
+    GhostUpdate, Scales, dtCrit
+from hysop.numerics.integrators.runge_kutta2 import RK2 as RK
+from hysop.numerics.integrators.runge_kutta3 import RK3 as RK3
+from hysop.numerics.finite_differences import FD_C_4
+from hysop.numerics.remeshing import L6_4 as remesh_formula
+from hysop.numerics.remeshing import Linear
+from hysop.operator.advection import Advection
+from hysop.operator.diffusion import Diffusion
+from hysop.operator.stretching import Stretching
+from hysop.operator.poisson import Poisson
+from hysop.operator.differential import Curl
+from hysop.operator.adapt_timestep import AdaptTimeStep
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.redistribute_inter import RedistributeInter
+from hysop.operator.redistribute_intra import RedistributeIntra
+from hysop.gpu.gpu_transfer import DataTransfer
+from hysop.domain.subsets.boxes import SubBox
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.operator.energy_enstrophy import EnergyEnstrophy
+import hysop.tools.numpywrappers as npw
 pi = np.pi
 cos = np.cos
 sin = np.sin
@@ -45,42 +48,57 @@ exp = np.exp
 abs = np.abs
 tanh = np.tanh
 
-## Global resolution
-dim = 3
-nb = 65
-nb_s = 129
-nbElem = [nb] * dim
-nbElem_s = [nb_s] * dim
-if proc_tasks[main_rank] == NS_TASK:
-    try:
-        rand_file = "{0}_{1}_{2}".format(*tuple([nb-1] * 3))
-        rand_file += "_{0}p_{1}".format(comm_s_size, comm_s.Get_rank())
-        randX = np.asarray(np.reshape(np.fromfile('rand/rand_X_' + rand_file),
-                                      [nb - 1, nb - 1,(nb - 1)/comm_s_size]),
-                           dtype=PARMES_REAL, order=ORDER)
-        randY = np.asarray(np.reshape(np.fromfile('rand/rand_Y_' + rand_file),
-                                      [nb - 1, nb - 1,(nb - 1)/comm_s_size]),
-                           dtype=PARMES_REAL, order=ORDER)
-        randZ = np.asarray(np.reshape(np.fromfile('rand/rand_Z_' + rand_file),
-                                      [nb - 1, nb - 1,(nb - 1)/comm_s_size]),
-                           dtype=PARMES_REAL, order=ORDER)
-    except IOError:
-        from create_random_arrays import create_arrays
-        create_arrays([nb-1] * 3)
-else:
-    randX, randY, randZ = None, None, None
 
-##initjet.f
+TASK_UW = 1
+TASK_SCALAR = 2
+PROC_TASKS = [TASK_UW, ] * main_size
+for p in USER_RANK_DEVICE_ID:
+    PROC_TASKS[p] = TASK_SCALAR
+try:
+    DEVICE_ID = USER_RANK_DEVICE_ID[main_rank]
+except KeyError:
+    DEVICE_ID = None
+out_freq = 200
+# Physical parameters:
+# Flow viscosity
+VISCOSITY = 1e-4
+# Schmidt number
+SC = ((1. * USER_NB_ELEM_S[0] - 1.)/(1. * USER_NB_ELEM_UW[0] - 1.))**2
+# Scalar diffusivity
+DIFF_COEFF_SCAL = VISCOSITY / SC
+
 width = 0.01
 ampl3 = 0.3
 ampl = 0.05
-ampl2=0.05
+ampl2 = 0.05
+
+
+ctime = MPI.Wtime()
+
+# Domain
+box = hysop.Box(length=[1., 1., 1.], origin=[0., 0., 0.],
+                proc_tasks=PROC_TASKS)
+mpi_params = MPI_params(comm=box.comm_task, task_id=PROC_TASKS[main_rank])
+mpi_params_S = MPI_params(comm=box.comm_task, task_id=TASK_SCALAR)
+mpi_params_UW = MPI_params(comm=box.comm_task, task_id=TASK_UW)
+
+
 def computeVel(res, x, y, z, t):
     yy = abs(y - 0.5)
     aux = (0.1 - 2. * yy) / (4. * width)
-    strg1 = exp(-abs(aux ** 2)) * randX
-    strg2 = exp(-abs(aux ** 2)) * randY
-    strg3 = exp(-abs(aux ** 2)) * randZ
+    strg1 = exp(-abs(aux ** 2))
+    strg2 = exp(-abs(aux ** 2))
+    strg3 = exp(-abs(aux ** 2))
+    if RANDOM_INIT:
+        from create_random_arrays import random_init
+        randX, randY, randZ = random_init(res[0].shape, box.comm_task)
+        strg1 = exp(-abs(aux ** 2)) * randX
+        strg2 = exp(-abs(aux ** 2)) * randY
+        strg3 = exp(-abs(aux ** 2)) * randZ
+    else:
+        strg1 = 0.
+        strg2 = 0.
+        strg3 = 0.
     res[0][...] = 0.5 * (1. + tanh(aux))
     res[0][...] *= (1. + ampl3 * sin(8. * pi * x))
     res[0][...] *= (1. + ampl * strg1)
@@ -93,201 +111,364 @@ def initScal(res, x, y, z, t):
     yy = abs(y - 0.5)
     aux = (0.1 - 2. * yy) / (4. * width)
     res[0][...] = 0.5 * (1. + tanh(aux))
+    res[0][...] *= (1. + ampl3 * sin(8. * pi * x))
     return res
 
-# number of ghosts in usual cartesian topo
-ADVECTION_METHOD = {Scales: 'p_64', MultiScale: 'L4_4'}
-ADVECTION_METHOD_S = {TimeIntegrator: RK2,
-                      Interpolation: Linear,
-                      Remesh: L6_4,
-                      Support: 'gpu_1k',
-                      Splitting: 'o2',
-                      MultiScale: L4_4}
-VISCOSITY = 1e-4
 
-## Domain
-box = pp.Box(dim, length=[1., ]*3)
-
-## Fields
-velo = Field(domain=box, formula=computeVel,
-             name='Velocity', is_vector=True)
-vorti = Field(domain=box,
-              name='Vorticity', is_vector=True)
-scal = Field(domain=box, formula=initScal,
-              name='PassiveScalar', is_vector=False)
-
-
-topodims = np.ones((dim, ))
-topodims[-1] = comm_s.Get_size()
-topo_shared = box.getOrCreateTopology( 1, nbElem, topodims, comm=comm_s)
-
-## Navier Stokes Operators
-advec = Advection(velo, vorti,
-                  resolutions={velo: nbElem,
-                               vorti: nbElem},
-                  method=ADVECTION_METHOD,
-                  task_id=NS_TASK,
-                  comm=comm_s,
-                  )
-advecScal = Advection(velo, scal,
-                      resolutions={velo: nbElem,
-                                   scal: nbElem_s},
-                      method=ADVECTION_METHOD_S,
-                      comm=comm_s,
-                      task_id=PS_TASK,
-                      device_id=main_rank//5,
-                      device_type='gpu',
-                      diffusion=True, diffusion_coeff=1e-5,
-                      )
-
-stretch = Stretching(velo, vorti,
-                     resolutions={velo: nbElem,
-                                  vorti: nbElem},
-                     comm=comm_s,
-                     task_id=NS_TASK,
-                     )
-
-diffusion = Diffusion(vorti,
-                      resolution=nbElem,
-                      viscosity=VISCOSITY,
-                      comm=comm_s,
-                      task_id=NS_TASK,
-                      )
-
-poisson = Poisson(velo, vorti,
-                  resolutions={velo: nbElem,
-                               vorti: nbElem},
-                  comm=comm_s,
-                  task_id=NS_TASK,
-                  )
-poisson.activateProjection(
-    Reprojection_criterion(vorti, reproj_cst=0, reprojRate=1,
-                           topo=None))
-
-c = Curl(velo, vorti,
-         resolutions={velo: nbElem,
-                      vorti: nbElem},
-         method={SpaceDiscretisation: 'fftw', GhostUpdate: True},
-         task_id=NS_TASK,
-         comm=comm_s,
-         )
+temp_maxvelo = npw.zeros((3, ))
+maxvelo_values = npw.zeros((3, ))
+
 
-data = {'dt': 0.001 }
+def calc_maxvelo(simu, v):
+    temp_maxvelo[0] = np.max(np.abs(v[0].data[0]))
+    temp_maxvelo[1] = np.max(np.abs(v[0].data[1]))
+    temp_maxvelo[2] = np.max(np.abs(v[0].data[2]))
+    v[0].topology.comm.Allreduce(sendbuf=[temp_maxvelo, 3, HYSOP_MPI_REAL],
+                                 recvbuf=[maxvelo_values, 3, HYSOP_MPI_REAL],
+                                 op=MPI.MAX)
+    return maxvelo_values
+
+
+# Fields
+velo = hysop.Field(domain=box, formula=computeVel,
+                   name='Velocity', isVector=True)
+vorti = hysop.Field(domain=box,
+                    name='Vorticity', isVector=True)
+scal = hysop.Field(domain=box, formula=initScal,
+                   name='Scalar', isVector=False)
+
+data = {'dt': 0.001}
 dt = VariableParameter(data)
+simu = Simulation(tinit=0.0, tend=5., timeStep=0.001, iterMax=10000)
+
+# Flow discretizations:
+d_uw = Discretization(USER_NB_ELEM_UW)
+d_uw_ghosts = Discretization(USER_NB_ELEM_UW, [2, ] * 3)
+# Scalar discretization
+d_s = Discretization(USER_NB_ELEM_S)
+# Velocity discretization for scalar advection
+if USER_NB_ELEM_UW[0] == USER_NB_ELEM_S[0]:
+    d_uw_for_s = Discretization(USER_NB_ELEM_UW)
+else:
+    d_uw_for_s = Discretization(USER_NB_ELEM_UW, [1, ] * 3)
+
+# Topologies
+topo_GPU_scal = None
+topo_GPU_velo = None
+topo_CPU_velo_fft = None
+topo_CPU_velo_ghosts = None
+topo_CPU_velo_scales = None
+if box.isOnTask(TASK_UW):
+    topo_CPU_velo_scales = box.create_topology(
+        d_uw, dim=2, mpi_params=mpi_params_UW)
+if box.isOnTask(TASK_SCALAR):
+    topo_GPU_velo = box.create_topology(
+        d_uw_for_s, dim=1, mpi_params=mpi_params_S)
+    topo_GPU_scal = box.create_topology(
+        d_s, dim=1, mpi_params=mpi_params_S)
+
+# Operators
+# GPU operators
+if PROC_TASKS.count(TASK_SCALAR) > 1:
+    advec_scal = Advection(velo,
+                           discretization=topo_GPU_velo,
+                           variables={scal: topo_GPU_scal},
+                           mpi_params=mpi_params_S,
+                           method={TimeIntegrator: RK,
+                                   Interpolation: Linear,
+                                   Remesh: remesh_formula,
+                                   Support: 'gpu_1k',
+                                   Splitting: 'o2',
+                                   MultiScale: Linear},
+                           max_velocity=[1.2, 0.7, 0.7],
+                           max_dt=0.012,
+                           device_id=DEVICE_ID,
+                           device_type='gpu')
+else:
+    advec_scal = Advection(velo,
+                           discretization=topo_GPU_velo,
+                           variables={scal: topo_GPU_scal},
+                           mpi_params=mpi_params_S,
+                           method={TimeIntegrator: RK,
+                                   Interpolation: Linear,
+                                   Remesh: remesh_formula,
+                                   Support: 'gpu_1k',
+                                   Splitting: 'o2',
+                                   MultiScale: Linear},
+                           device_id=DEVICE_ID,
+                           device_type='gpu')
+diffusion_scal = Diffusion(viscosity=DIFF_COEFF_SCAL,
+                           vorticity=scal,
+                           discretization=topo_GPU_scal,
+                           mpi_params=mpi_params_S,
+                           method={Support: 'gpu',
+                                   SpaceDiscretisation: 'fd'},
+                           device_id=DEVICE_ID,
+                           device_type='gpu')
+diffusion_scal.name += '_(Scalar)'
+
+# CPU operators
+advec = Advection(velo,
+                  discretization=topo_CPU_velo_scales,
+                  variables={vorti: topo_CPU_velo_scales},
+                  mpi_params=mpi_params_UW,
+                  method={Scales: 'p_64', MultiScale: 'L4_4'})
+stretch = Stretching(velo, vorti, discretization=d_uw_ghosts,
+                     mpi_params=mpi_params_UW)
+diffusion = Diffusion(variables={vorti: d_uw},
+                      viscosity=VISCOSITY,
+                      mpi_params=mpi_params_UW)
+poisson = Poisson(velo, vorti, discretization=d_uw,
+                  mpi_params=mpi_params_UW)
+c = Curl(velo, vorti, discretization=d_uw,
+         method={SpaceDiscretisation: 'fftw', GhostUpdate: True},
+         mpi_params=mpi_params_UW)
+#dt_output = None
+#if IS_OUTPUT:
+dt_output = IO_params(frequency=1, filename='dt.dat', fileformat=ASCII)
 dt_adapt = AdaptTimeStep(velo, vorti,
-                         dt_adapt=dt,
-                         resolutions={velo: nbElem,
-                                      vorti: nbElem},
-                         io_params={"filename": "dt_hybrid_64-128.dat"},
-                         task_id=NS_TASK,
-                         comm=comm_s)
-to_ghostsdt = Redistribute([velo], poisson, dt_adapt, name_suffix='toG_V_dt',
-                           task_id=NS_TASK)
-
-toGhost_vorti = Redistribute([vorti], advec, stretch, name_suffix='toG_W',
-                             task_id=NS_TASK)
-toGhost_velo = Redistribute([velo], poisson, stretch, name_suffix='toG_V',
-                            task_id=NS_TASK)
-fromGhost_vorti = Redistribute([vorti], stretch, diffusion, name_suffix='fromG_W',
-                               task_id=NS_TASK)
-
-distrForAdvecX = Redistribute([velo], None, advecScal.advecDir[0],
-                              component=0, name_suffix='toADVEC_X',
-                              task_id=PS_TASK)
-distrForAdvecY = Redistribute([velo], None, advecScal.advecDir[1],
-                              component=1, name_suffix='toADVEC_Y',
-                              task_id=PS_TASK)
-distrForAdvecZ = Redistribute([velo], None, advecScal.advecDir[2],
-                              component=2, name_suffix='toADVEC_Z',
-                              task_id=PS_TASK)
-
-redX = RedistributeIntercomm(velo, poisson, distrForAdvecX,
-                             proc_tasks, main_comm,
-                             topo=topo_shared, name_suffix='INTER_X',
-                             component=0)
-redY = RedistributeIntercomm(velo, poisson, distrForAdvecY,
-                             proc_tasks, main_comm,
-                             topo=topo_shared, name_suffix='INTER_Y',
-                             component=1)
-redZ = RedistributeIntercomm(velo, poisson, distrForAdvecZ,
-                             proc_tasks, main_comm,
-                             topo=topo_shared, name_suffix='INTER_Z',
-                             component=2)
-distrForAdvecX.opFrom = redX
-distrForAdvecY.opFrom = redY
-distrForAdvecZ.opFrom = redZ
-
-## Simulation
-simu = Simulation(tinit=0.0, tend=4.5, timeStep=dt.data['dt'], iterMax=10000,
-                  bcast_comm=main_comm, bcast_rk=1
-                  )
-
-#  Define the problem to solve
-pb = ProblemTasks(operators=[
-        redX, redY, redZ,
-        distrForAdvecX,
-        distrForAdvecY,
-        distrForAdvecZ,
-        toGhost_velo,
-        advecScal,
-        advec,
-        toGhost_vorti,
-        stretch,
-        fromGhost_vorti,
-        diffusion,
-        poisson,
-        to_ghostsdt, dt_adapt,
-        ], simulation=simu, tasks_list=proc_tasks,
-                  dumpFreq=-1, main_comm=main_comm)
-
-pb.pre_setUp()
-
-if proc_tasks[main_rank] == NS_TASK:
-    topofft = poisson.discreteFields[poisson.vorticity].topology
-    topo_scal = None
+                         simulation=simu,
+                         time_range=[0, np.infty],
+                         discretization=d_uw_ghosts,
+                         method={TimeIntegrator: RK3,
+                                 SpaceDiscretisation: FD_C_4,
+                                 dtCrit: ['gradU', 'cfl']},
+                         lcfl=0.15,
+                         cfl=1.5,
+                         io_params=dt_output,
+                         mpi_params=mpi_params_UW)
+
+# Operators discretizations
+if box.isOnTask(TASK_SCALAR):
+    for op in (advec_scal, diffusion_scal):
+        op.discretize()
+if box.isOnTask(TASK_UW):
+    for op in (advec, stretch, diffusion, poisson, c, dt_adapt):
+        op.discretize()
+
+# Get some topologies
+if box.isOnTask(TASK_UW):
+    topo_CPU_velo_fft = poisson.discreteFields[velo].topology
+    topo_CPU_velo_ghosts = stretch.discreteFields[velo].topology
+
+if IS_OUTPUT:
+    # Defining subdomains
+    L_V = 1. - 1. / (1. * USER_NB_ELEM_UW[0])
+    L_S = 1. - 1. / (1. * USER_NB_ELEM_S[0])
+    XY_plane_v = SubBox(origin=[0., 0., 0.5], length=[L_V, L_V, 0.],
+                        parent=box)
+    XZ_plane_v = SubBox(origin=[0., 0.5, 0.], length=[L_V, 0., L_V],
+                        parent=box)
+    XY_plane_s = SubBox(origin=[0., 0., 0.5], length=[L_S, L_S, 0.],
+                        parent=box)
+    XZ_plane_s = SubBox(origin=[0., 0.5, 0.], length=[L_S, 0., L_S],
+                        parent=box)
+    # Defining output operators
+    p_velo = HDF_Writer(variables={velo: topo_CPU_velo_fft},
+                        mpi_params=mpi_params_UW,
+                        io_params=IO_params(frequency=out_freq,
+                                            filename='flow',
+                                            fileformat=HDF5))
+    p_velo_xy = HDF_Writer(variables={velo: topo_CPU_velo_fft,
+                                      vorti: topo_CPU_velo_fft},
+                           var_names={velo: 'Velocity', vorti: 'Vorticity'},
+                           subset=XY_plane_v,
+                           mpi_params=mpi_params_UW,
+                           io_params=IO_params(frequency=out_freq,
+                                               filename='flow_XY',
+                                               fileformat=HDF5))
+    p_velo_xz = HDF_Writer(variables={velo: topo_CPU_velo_fft,
+                                      vorti: topo_CPU_velo_fft},
+                           var_names={velo: 'Velocity', vorti: 'Vorticity'},
+                           subset=XZ_plane_v,
+                           mpi_params=mpi_params_UW,
+                           io_params=IO_params(frequency=out_freq,
+                                               filename='flow_XZ',
+                                               fileformat=HDF5))
+    p_scal_xy = HDF_Writer(variables={scal: topo_GPU_scal},
+                           var_names={scal: 'Scalar'},
+                           subset=XY_plane_s,
+                           mpi_params=mpi_params_S,
+                           io_params=IO_params(frequency=out_freq,
+                                               filename='scal_XY',
+                                               fileformat=HDF5))
+    p_scal_xz = HDF_Writer(variables={scal: topo_GPU_scal},
+                           var_names={scal: 'Scalar'},
+                           subset=XZ_plane_s,
+                           mpi_params=mpi_params_S,
+                           io_params=IO_params(frequency=out_freq,
+                                               filename='scal_XZ',
+                                               fileformat=HDF5))
+    p_scal_xy.name += '_(Scalar)'
+    p_scal_xz.name += '_(Scalar)'
+    energy = EnergyEnstrophy(velocity=velo,
+                             vorticity=vorti,
+                             discretization=topo_CPU_velo_fft,
+                             mpi_params=mpi_params_UW,
+                             io_params=IO_params(frequency=1,
+                                                 filename='energy.dat',
+                                                 fileformat=ASCII))
+    maxvelo = CustomMonitor(variables={velo: topo_CPU_velo_scales},
+                            function=calc_maxvelo,
+                            res_shape=3,
+                            mpi_params=mpi_params_UW,
+                            io_params=IO_params(frequency=1,
+                                                filename='maxvelo.dat',
+                                                fileformat=ASCII))
+    if box.isOnTask(TASK_UW):
+        for op in (p_velo, p_velo_xy, p_velo_xz, energy, maxvelo):
+            op.discretize()
+    if box.isOnTask(TASK_SCALAR):
+        for op in (p_scal_xy, p_scal_xz):
+            op.discretize()
+
+# Redistribute operators
+# CPU redistributes
+toGhost_vorti = RedistributeIntra(variables=[vorti],
+                                  source=advec, target=stretch,
+                                  name_suffix='toG_W',
+                                  mpi_params=mpi_params_UW)
+toGhost_vorti.name += '_ToGhost_Vorti'
+toGhost_velo = RedistributeIntra(variables=[velo],
+                                 source=poisson, target=stretch,
+                                 run_till=[stretch, dt_adapt],
+                                 name_suffix='toG_V',
+                                 mpi_params=mpi_params_UW)
+toGhost_velo.name += '_ToGhost_Velo'
+toScales_velo = RedistributeIntra(variables=[velo],
+                                  source=poisson, target=advec,
+                                  mpi_params=mpi_params_UW)
+toScales_velo.name += '_toScales_V'
+toScales_vorti = RedistributeIntra(variables=[vorti],
+                                   source=poisson, target=advec,
+                                   mpi_params=mpi_params_UW)
+toScales_vorti.name += '_toScales_W'
+fromGhost_vorti = RedistributeIntra(variables=[vorti],
+                                    source=stretch, target=diffusion,
+                                    name_suffix='fromG_W',
+                                    mpi_params=mpi_params_UW)
+fromGhost_vorti.name += '_FromGhost_vorti'
+toDev = DataTransfer(source=topo_GPU_velo, target=advec_scal,
+                     variables={velo: topo_GPU_velo},
+                     mpi_params=mpi_params_S)
+toDev.name += '_ToDev_velo'
+
+redistrib = RedistributeInter(variables=[velo],
+                              parent=main_comm,
+                              source=topo_CPU_velo_fft, target=topo_GPU_velo,
+                              source_id=TASK_UW, target_id=TASK_SCALAR,
+                              run_till=[toDev])
+if IS_OUTPUT:
+    toHost = DataTransfer(source=topo_GPU_scal, target=p_scal_xy,
+                          variables={scal: topo_GPU_scal},
+                          mpi_params=mpi_params_S,
+                          freq=out_freq,
+                          run_till=[p_scal_xz, ])
+    toHost.name += '_ToHost_Scal'
+
+# Operators setup
+if box.isOnTask(TASK_SCALAR):
+    for op in (advec_scal, diffusion_scal):
+        op.setup()
+    if IS_OUTPUT:
+        for op in (p_scal_xy, p_scal_xz, toHost):
+            op.setup()
+    for op in (toDev, ):
+        op.setup()
+if box.isOnTask(TASK_UW):
+    for op in (advec, stretch, diffusion, poisson, c, dt_adapt):
+        op.setup()
+    if IS_OUTPUT:
+        for op in (p_velo, p_velo_xy, p_velo_xz, energy, maxvelo):
+            op.setup()
+    for op in (toGhost_vorti, fromGhost_vorti, toGhost_velo,
+               toScales_velo, toScales_vorti):
+        op.setup()
+# Wait for all operators setup before setup the intra-comm redistribute
+main_comm.Barrier()
+redistrib.setup()
+
+# Operators list
+if IS_OUTPUT:
+    operators_list = [redistrib,
+                      toDev, advec_scal, diffusion_scal,
+                      toHost, p_scal_xy, p_scal_xz,
+                      advec, toGhost_vorti, stretch,
+                      fromGhost_vorti, diffusion, poisson,
+                      p_velo, p_velo_xy, p_velo_xz, energy, maxvelo,
+                      toGhost_velo, toScales_velo, toScales_vorti, dt_adapt]
 else:
-    topofft = None
-    topo_scal = advecScal.advecDir[0].discreteFields[scal].topology
-
-energy = Energy_enstrophy(velo, vorti, isNormalized=True,
-                          topo=topofft,
-                          viscosity=VISCOSITY,
-                          io_params={"filename": "energy_hybrid_64-128.dat"},
-                          task_id=NS_TASK)
-p = Printer(variables=[scal,],
-            topo=topo_scal,
-            frequency=200,
-            prefix='scal_hybrid_64-128',
-            formattype=HDF5,
-            task_id=PS_TASK)
-pb.addMonitors([energy, p])
-
-
-# initialisation
-vorti.setTopoInit(topofft)
-velo.setTopoInit(topofft)
-scal.setTopoInit(topo_scal)
-
-pb.setUp()
-
-if proc_tasks[main_rank] == NS_TASK:
-    c.discretize()
-    c.setUp()
+    operators_list = [redistrib,
+                      toDev, advec_scal, diffusion_scal,
+                      advec, toGhost_vorti, stretch,
+                      fromGhost_vorti, diffusion, poisson,
+                      toGhost_velo, toScales_velo, toScales_vorti, dt_adapt]
+
+# Fields initializations
+if box.isOnTask(TASK_SCALAR):
+    scal.initialize(topo=topo_GPU_scal)
+    advec_dirX = advec_scal.advec_dir[0].discrete_op
+    gpu_scal = advec_dirX.fields_on_grid[0]
+    gpu_pscal = advec_dirX.fields_on_part[gpu_scal][0]
+    diffusion_scal.discrete_op.set_field_tmp(gpu_pscal)
+    if IS_OUTPUT:
+        p_scal_xy.apply(simu)
+        p_scal_xz.apply(simu)
+if box.isOnTask(TASK_UW):
+    velo.initialize(topo=topo_CPU_velo_fft)
     c.apply(simu)
-
-if proc_tasks[main_rank] == PS_TASK:
-    p.apply(simu)
-
+    poisson.apply(simu)
+    toGhost_velo.apply(simu)
+    toScales_velo.apply(simu)
+    toScales_vorti.apply(simu)
+    if IS_OUTPUT:
+        p_velo.apply(simu)
+        p_velo_xy.apply(simu)
+        p_velo_xz.apply(simu)
+
+simu.initialize()
+setup_time = MPI.Wtime() - ctime
 main_comm.Barrier()
-from parmepy.mpi import MPI
-print "Start computation ..."
-time = MPI.Wtime()
-pb.solve()
 
-if proc_tasks[main_rank] == PS_TASK:
-    p.apply(simu)
+# Solve
+solve_time = 0.
+while not simu.isOver:
+    ctime = MPI.Wtime()
+    if main_rank == 0:
+        simu.printState()
+    for op in operators_list:
+        if box.isOnTask(op.task_id()):
+            op.apply(simu)
+    if box.isOnTask(TASK_SCALAR):
+        # Wait gpu operations on scalar
+        advec_scal.advec_dir[0].discreteFields[scal].wait()
+    solve_time += MPI.Wtime() - ctime
+    dt_adapt.wait()
+    # Synchronize threads
+    main_comm.Barrier()
+    simu.advance()
 
-print 'total time (rank):', MPI.Wtime() - time, '(', main_rank, ')'
-
-pb.finalize()
-print pb.timer
+main_comm.Barrier()
+nb_ite = simu.currentIteration
+simu.finalize()
+
+
+if IS_OUTPUT:
+    if box.isOnTask(TASK_SCALAR):
+        p_scal_xy.apply(simu)
+        p_scal_xz.apply(simu)
+    if box.isOnTask(TASK_UW):
+        p_velo.apply(simu)
+        p_velo_xy.apply(simu)
+        p_velo_xz.apply(simu)
+
+for op in operators_list:
+    if box.isOnTask(op.task_id()):
+        op.finalize()
+
+velo.finalize()
+if box.isOnTask(TASK_SCALAR):
+    scal.finalize()
+if box.isOnTask(TASK_UW):
+    vorti.finalize()
+main_comm.Barrier()
diff --git a/Examples/Plane_jet/create_random_arrays.py b/Examples/Plane_jet/create_random_arrays.py
new file mode 100644
index 0000000000000000000000000000000000000000..de7322631e99cba53ce5e92b837ad3074bf21349
--- /dev/null
+++ b/Examples/Plane_jet/create_random_arrays.py
@@ -0,0 +1,39 @@
+import os
+import numpy as np
+from hysop.constants import HYSOP_REAL, ORDER
+
+
+def random_init(shape, mpi_comm):
+    # Create a folder to store all random arrays
+    d = 'rand_init'
+    if mpi_comm.Get_rank() == 0:
+        if not os.path.exists(d):
+            os.makedirs(d)
+    mpi_comm.Barrier()
+    file_name = "{0}_{1}_{2}".format(*shape)
+    file_name += "_{0}p_{1}.dat".format(mpi_comm.Get_size(),
+                                        mpi_comm.Get_rank())
+    try:
+        randX = np.asarray(
+            np.reshape(np.fromfile(os.path.join(d, 'randX_' + file_name)),
+                       shape),
+            dtype=HYSOP_REAL, order=ORDER)
+        randY = np.asarray(
+            np.reshape(np.fromfile(os.path.join(d, 'randY_' + file_name)),
+                       shape),
+            dtype=HYSOP_REAL, order=ORDER)
+        randZ = np.asarray(
+            np.reshape(np.fromfile(os.path.join(d, 'randZ_' + file_name)),
+                       shape),
+            dtype=HYSOP_REAL, order=ORDER)
+    except IOError:
+        randX = np.asarray(np.random.random(shape),
+                           dtype=HYSOP_REAL, order=ORDER) - 0.5
+        randY = np.asarray(np.random.random(shape),
+                           dtype=HYSOP_REAL, order=ORDER) - 0.5
+        randZ = np.asarray(np.random.random(shape),
+                           dtype=HYSOP_REAL, order=ORDER) - 0.5
+        randX.tofile(os.path.join(d, 'randX_' + file_name))
+        randY.tofile(os.path.join(d, 'randY_' + file_name))
+        randZ.tofile(os.path.join(d, 'randZ_' + file_name))
+    return randX, randY, randZ
diff --git a/Examples/levelSet2D.py b/Examples/levelSet2D.py
deleted file mode 100755
index ab5bab3feaee6e24445393313c639c035fcdf7d6..0000000000000000000000000000000000000000
--- a/Examples/levelSet2D.py
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-import parmepy
-#parmepy.__VERBOSE__ = True
-from parmepy.domain.box import Box
-# import parmepy.gpu
-# parmepy.gpu.CL_PROFILE = True
-from parmepy.fields.continuous import Field
-from parmepy.operator.advection import Advection
-from parmepy.problem.transport import TransportProblem
-from parmepy.operator.monitors.printer import Printer
-from parmepy.problem.simulation import Simulation
-from parmepy.operator.analytic import Analytic
-from parmepy.operator.redistribute import Redistribute
-from parmepy.methods_keys import TimeIntegrator, Interpolation, Remesh,\
-    Support, Splitting, MultiScale
-from parmepy.numerics.integrators.runge_kutta2 import RK2 as RK
-#from parmepy.numerics.integrators.runge_kutta4 import RK4 as RK
-from parmepy.numerics.interpolation import Linear
-from parmepy.numerics.remeshing import L2_1 as rmsh
-from parmepy.mpi.main_var import main_size
-import numpy as np
-
-
-# def vitesse(res, x, y, t=0.):
-#     res[0][...] = -np.sin(x * np.pi) ** 2 * np.sin(y * np.pi * 2) * \
-#         np.cos(t * np.pi / 3.)
-#     res[1][...] = np.sin(y * np.pi) ** 2 * np.sin(x * np.pi * 2) * \
-#         np.cos(t * np.pi / 3.)
-#     return res
-
-
-# def scalaire(res, x, y, t=0.):
-#     rr = np.sqrt((x - 0.5) ** 2 + (y - 0.75) ** 2)
-#     res[0][...] = 0.
-#     res[0][rr < 0.15] = 1.
-#     return res
-
-
-dim = 2
-boxLength = [1., 1.]
-boxMin = [0., 0.]
-v_nbElem = [4097+2048, ] * 2  # max 4097+2048 in double precision on marsanne
-nbElem = [4097+2048, ] * 2  # max 4097+2048 in double precision on marsanne
-
-timeStep = 0.025
-finalTime = 3.
-outputFilePrefix = 'res_2D/levelSet_2D_'
-outputModulo = 5
-simu = Simulation(tinit=0.0, tend=finalTime, timeStep=timeStep, iterMax=120)
-
-## Domain
-box = Box(dim, length=boxLength, origin=boxMin)
-
-## Fields
-scal = Field(domain=box, name='Scalar', #formula=scalaire
-             )
-velo = Field(domain=box, name='Velocity', is_vector=True, #formula=vitesse
-             )
-
-## Operators
-advec = Advection(velo, scal,
-                  resolutions={velo: v_nbElem,
-                               scal: nbElem},
-                  method={TimeIntegrator: RK,
-                          Interpolation: Linear,
-                          Remesh: rmsh,
-                          Support: 'gpu_1k',
-                          Splitting: 'o2',
-                          MultiScale: rmsh},
-                  src=['./levelSet2D.cl'],
-#batch_nb=2,
-                  )
-advec.discretize()
-velocity = Analytic(velo,
-                    resolutions={velo: v_nbElem},
-                    method={Support: 'gpu'},
-                    topo=advec.advecDir[0].discreteFields[velo].topology
-                    )
-
-distrForAdvecY = Redistribute([velo], velocity, advec.advecDir[1], component=1)
-
-##Problem
-# Sequential : no need of redistribute
-if main_size == 1:
-    pb = TransportProblem([velocity, advec], simu, dumpFreq=10000)
-else:
-    pb = TransportProblem([velocity, distrForAdvecY, advec],
-                          simu, dumpFreq=10000)
-
-## Setting solver to Problem
-pb.setUp()
-
-
-p = Printer(variables=[scal],
-            topo=scal.topoInit,
-            frequency=outputModulo,
-            prefix=outputFilePrefix,
-            ext='.dat')
-pb.addMonitors([p])
-p._printStep()
-
-## Solve problem
-pb.solve()
-
-pb.finalize()
-print pb.timer
diff --git a/Examples/levelSet2D_OpenCL-CPU.py b/Examples/levelSet2D_OpenCL-CPU.py
deleted file mode 100644
index f6c29c364a941f1d26db2b2bb1a0cd40128d53c7..0000000000000000000000000000000000000000
--- a/Examples/levelSet2D_OpenCL-CPU.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import parmepy
-#parmepy.__VERBOSE__ = True
-from parmepy.domain.box import Box
-# import parmepy.gpu
-# parmepy.gpu.CL_PROFILE = True
-from parmepy.fields.continuous import Field
-from parmepy.operator.advection import Advection
-from parmepy.problem.transport import TransportProblem
-from parmepy.operator.monitors.printer import Printer
-from parmepy.problem.simulation import Simulation
-from parmepy.operator.analytic import Analytic
-from parmepy.operator.redistribute import Redistribute
-from parmepy.methods_keys import TimeIntegrator, Interpolation, Remesh,\
-    Support, Splitting, MultiScale
-#from parmepy.numerics.integrators.runge_kutta2 import RK2 as RK
-from parmepy.numerics.integrators.runge_kutta4 import RK4 as RK
-from parmepy.numerics.interpolation import Linear
-from parmepy.numerics.remeshing import L6_6 as rmsh
-from parmepy.mpi.main_var import main_size
-from parmepy.constants import np, HDF5
-
-
-def vitesse(res, x, y, t=0.):
-    res[0][...] = -np.sin(x * np.pi) ** 2 * np.sin(y * np.pi * 2) * \
-        np.cos(t * np.pi / 3.)
-    res[1][...] = np.sin(y * np.pi) ** 2 * np.sin(x * np.pi * 2) * \
-        np.cos(t * np.pi / 3.)
-    return res
-
-
-def scalaire(res, x, y, t=0.):
-    rr = np.sqrt((x - 0.5) ** 2 + (y - 0.75) ** 2)
-    res[0][...] = 0.
-    res[0][rr < 0.15] = 1.
-    return res
-
-
-dim = 2
-boxLength = [1., 1.]
-boxMin = [0., 0.]
-nbElem = [513, ] * 2
-
-timeStep = 0.025
-finalTime = 3.
-outputFilePrefix = 'levelSet_2D_'
-outputModulo = 10
-simu = Simulation(tinit=0.0, tend=finalTime, timeStep=timeStep, iterMax=120)
-
-## Domain
-box = Box(dim, length=boxLength, origin=boxMin)
-
-## Fields
-scal = Field(domain=box, name='Scalar', formula=scalaire
-             )
-velo = Field(domain=box, name='Velocity', is_vector=True, formula=vitesse
-             )
-
-## Operators
-# By default, with a 'gpu' support, operator is going to use the defaut device
-# given at cmake. To specify an other device, user must set the proper
-# parameters: platform_id and device_id.
-# parameter device_type can be used to get a specific device type
-advec = Advection(velo, scal,
-                  resolutions={velo: nbElem,
-                               scal: nbElem},
-                  method={TimeIntegrator: RK,
-                          Interpolation: Linear,
-                          Remesh: rmsh,
-                          Support: 'gpu_1k',
-                          Splitting: 'o2'},
-                  # platform_id=0,
-                  # device_id=0,
-                  # device_type='cpu'
-                  )
-advec.discretize()
-velocity = Analytic(velo,
-                    resolutions={velo: nbElem},
-                    topo=advec.advecDir[0].discreteFields[velo].topology
-                    )
-
-if main_size > 1:
-    distrForAdvecY = Redistribute([velo], velocity, advec.advecDir[1],
-                                  component=1)
-
-##Problem
-# Sequential : no need of redistribute
-if main_size == 1:
-    pb = TransportProblem([velocity, advec], simu, dumpFreq=-1)
-else:
-    pb = TransportProblem([velocity, distrForAdvecY, advec],
-                          simu, dumpFreq=-1)
-
-## Setting solver to Problem
-pb.setUp()
-
-print scal.topoInit
-p = Printer(variables=[scal],
-            topo=scal.topoInit,
-            frequency=outputModulo,
-            prefix=outputFilePrefix,
-            formattype=HDF5)
-pb.addMonitors([p])
-p.apply(simu)
-
-## Solve problem
-pb.solve()
-
-p.apply(simu)
-
-pb.finalize()
-print pb.timer
diff --git a/Examples/levelSet3D.py b/Examples/levelSet3D.py
deleted file mode 100755
index 5565f8e6d9fbbeb0eae04e2565830088d1a1d133..0000000000000000000000000000000000000000
--- a/Examples/levelSet3D.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python
-import parmepy
-from parmepy.domain.box import Box
-# import parmepy.gpu
-# parmepy.gpu.CL_PROFILE = True
-from parmepy.fields.continuous import Field
-from parmepy.operator.advection import Advection
-from parmepy.problem.transport import TransportProblem
-from parmepy.operator.monitors.printer import Printer
-from parmepy.operator.analytic import Analytic
-from parmepy.problem.simulation import Simulation
-from parmepy.operator.redistribute import Redistribute
-from parmepy.methods_keys import TimeIntegrator, Interpolation, Remesh,\
-    Support, Splitting, MultiScale, Scales, MultiScale
-from parmepy.numerics.integrators.runge_kutta2 import RK2 as RK
-#from parmepy.numerics.integrators.runge_kutta4 import RK4 as RK
-from parmepy.numerics.interpolation import Linear
-from parmepy.numerics.remeshing import L2_1 as rmsh, L4_2, L2_1, L4_4, L8_4
-from parmepy.constants import np, HDF5
-from parmepy.mpi.main_var import main_size
-sin, cos, pi, sqrt = np.sin, np.cos, np.pi, np.sqrt
-
-
-def scalaire(res, x, y, z, t=0.):
-    r = sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2)
-    res[0][...] = 0.
-    res[0][r < 0.15] = 1.
-    return res
-
-
-def vitesse(res, x, y, z, t=0.):
-    res[0][...] = 2. * sin(pi*x)**2*sin(2.*pi*y)*sin(2.*pi*z)*cos(t*pi/3.)
-    res[1][...] = -sin(2.*pi*x)*sin(pi*y)**2*sin(2.*pi*z)*cos(t*pi/3.)
-    res[2][...] = -sin(2.*pi*x)*sin(2.*pi*y)*sin(pi*z)**2*cos(t*pi/3.)
-    return res
-
-dim = 3
-boxLength = [1., 1., 1.]
-boxMin = [0., 0., 0.]
-nbElem_v = [129] * 3
-nbElem_s = [129] * 3
-
-timeStep = 0.025
-finalTime = 3.
-outputFilePrefix = './res3D/levelSet_3D'
-outputModulo = 10
-simu = Simulation(tinit=0.0, tend=finalTime, timeStep=timeStep, iterMax=2000)
-
-## Domain
-box = Box(dim, length=boxLength, origin=boxMin)
-
-## Fields
-scal = Field(domain=box, name='Scalar', formula=scalaire
-             )
-velo = Field(domain=box, name='Velocity', is_vector=True, formula=vitesse
-             )
-
-## Operators
-advec = Advection(velo, scal,
-                  resolutions={velo: nbElem_v,
-                               scal: nbElem_s},
-                  method={TimeIntegrator: RK,
-                          Interpolation: Linear,
-                          Remesh: L6_4,
-                          Support: 'gpu_1k',
-                          Splitting: 'o2',
-                          MultiScale: L2_1},
-                  #src=['./levelSet3D.cl'],
-                  #device_type='cpu',platform_id=1,
-                  #batch_nb=2
-                  )
-advec.discretize()
-velocity = Analytic(velo,
-                    resolutions={velo: nbElem_v},
-                    #method={Support: 'gpu'},
-                    topo=advec.advecDir[0].discreteFields[velo].topology
-                    #topo=advec.discreteFields[velo].topology
-                    )
-if main_size > 1:
-    distrForAdvecZ = Redistribute([velo], velocity, advec.advecDir[2],
-                                  component=2, name_suffix='Zin')
-
-##Problem
-# Sequential : no need of redistribute
-if main_size == 1:
-    pb = TransportProblem([velocity, advec], simu, dumpFreq=10000)
-else:
-    pb = TransportProblem([velocity, distrForAdvecZ, advec],
-                          simu, dumpFreq=10000)
-pb.setUp()
-
-p = Printer(variables=[scal],
-            topo=scal.topoInit,
-            frequency=outputModulo,
-            prefix=outputFilePrefix,
-            formattype=HDF5)
-pb.addMonitors([p])
-p._printStep()
-
-# ## Solve problem
-pb.solve()
-
-pb.finalize()
-print pb.timer
diff --git a/Examples/levelSet3D_CPU.py b/Examples/levelSet3D_CPU.py
deleted file mode 100644
index be87bb18970fa797105fd0e3c45e8fa6da0f05fc..0000000000000000000000000000000000000000
--- a/Examples/levelSet3D_CPU.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/env python
-import time
-import parmepy as pp
-from parmepy.methods_keys import Scales, TimeIntegrator, Interpolation,\
-    Remesh, Support, Splitting, dtCrit, SpaceDiscretisation, GhostUpdate
-from parmepy.numerics.interpolation import Linear
-from parmepy.numerics.integrators.runge_kutta2 import RK2 as RK2
-from parmepy.numerics.remeshing import L4_2 as rmsh
-from parmepy.fields.continuous import Field
-from parmepy.mpi.topology import Cartesian
-from parmepy.operator.advection import Advection
-from parmepy.operator.analytic import Analytic
-from parmepy.problem.simulation import Simulation
-from parmepy.problem.transport import TransportProblem
-from parmepy.operator.monitors.printer import Printer
-from parmepy.constants import VTK
-import numpy as np
-import math
-import sys
-
-## constants
-pi = np.pi
-cos = np.cos
-sin = np.sin
-exp = np.exp
-abs = np.abs
-tanh = np.tanh
-
-def vitesse(res, x, y, z, t):
-    res[0] = 2.0 * sin(x * pi) ** 2 * sin(y * pi * 2) * sin(z * pi * 2) * cos(pi * t / 2.)
-    res[1] = - sin(x * pi * 2) * sin(y * pi) ** 2 * sin(z * pi * 2) * cos(pi * t / 2.)
-    res[2] = - sin(x * pi * 2) * sin(y * pi * 2) * sin(z * pi) ** 2 * cos(pi * t / 2.)
-    return res
-
-def scalaire(res, x, y, z, t):
-    rr = np.sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2)
-    ind = np.where(rr < 0.15)
-    res[0][...] = 0.
-    res[0][ind] = 1.0
-    return res
-
-def run(nb=65):
-    dim = 3
-    boxLength = [1., 1., 1.]
-    boxMin = [0., 0., 0.]
-    nbElem = [nb, nb, nb]
-
-    timeStep = 0.04
-    #period = 10.
-    finalTime = 2.0 #55 * timeStep
-    outputFilePrefix = './res/levelSet_3D_'
-    outputModulo = 10
-
-    t0 = time.time()
-
-    ## Domain
-    box = pp.Box(dim, length=boxLength, origin=boxMin)
-
-    ## Fields
-    velo = Field(domain=box, name='Velocity', is_vector=True)
-    scal = Field(domain=box, formula=scalaire, name='Scalar')
-
-    ## Usual Cartesian topology definition
-    ghosts = np.ones((box.dimension)) * 0
-    topo = Cartesian(box, box.dimension, nbElem,
-                     ghosts=ghosts)
-
-    ## Operators
-    advec = Advection(velo, scal,
-                      resolutions={velo: nbElem,
-                                   scal: nbElem},
-#                      method={TimeIntegrator: RK2,
-#                              Interpolation: Linear,
-#                              Remesh: rmsh,
-#                              Support: '',
-#                              Splitting: 'o1'}
-                      method={Scales: 'p_M4', Splitting: 'strang'},
-                      topo=topo)
-
-    velocity = Analytic(velo, resolutions={velo: nbElem}, 
-                        formula=vitesse, topo=topo)
-
-    ## Definition of simulation parameters
-    simu = Simulation(tinit=0.0, tend=2.0, timeStep=0.01, iterMax=1000000)
-
-    ##Problem
-    pb = TransportProblem(operators=[velocity, advec],
-                          simulation=simu, dumpFreq=-1)
-
-    ## Setting solver to Problem (only operators for computational tasks)
-    pb.pre_setUp()
-
-    printer = Printer(variables=[velo, scal],
-                      topo=topo,
-                      frequency=outputModulo,
-                      prefix=outputFilePrefix,
-                      formattype=VTK)
-
-    ## Setting solver to Problem
-    velo.setTopoInit(topo)
-    scal.setTopoInit(topo)
-    pb.addMonitors([printer])
-    pb.setUp()
-
-    t1 = time.time()
-
-    ## Solve problem
-    timings = pb.solve()
-
-    tf = time.time()
-
-    print "\n"
-    print "Total time : ", tf - t0, "sec (CPU)"
-    print "Init time : ", t1 - t0, "sec (CPU)"
-    print "Solving time : ", tf - t1, "sec (CPU)"
-    print ""
-    return pb.timings_info[0] + "\"Solving time\" \"Init time\" \"Total time\"", pb.timings_info[1] + str(tf - t1) + " " + str(t1 - t0) + " " + str(tf - t0)
-
-if __name__ == "__main__":
-    run(65)
diff --git a/Examples/levelSet3D_hybrid.py b/Examples/levelSet3D_hybrid.py
deleted file mode 100644
index 2028e644576ca265366c4aab1070d85245c79442..0000000000000000000000000000000000000000
--- a/Examples/levelSet3D_hybrid.py
+++ /dev/null
@@ -1,116 +0,0 @@
-"""
-Task parallelism example.
-
-One process is dedicated to file outputs and other are computing the data.
-"""
-import parmepy as pp
-from parmepy.constants import np, VTK, HDF5
-from parmepy.mpi.main_var import main_size, main_rank, main_comm
-from parmepy.mpi.topology import Cartesian
-from parmepy.fields.continuous import Field
-from parmepy.operator.advection import Advection
-from parmepy.operator.analytic import Analytic
-from parmepy.operator.redistribute_intercomm import RedistributeIntercomm
-from parmepy.operator.redistribute import Redistribute
-from parmepy.operator.monitors.printer import Printer
-from parmepy.problem.problem_tasks import ProblemTasks
-from parmepy.problem.simulation import Simulation
-from parmepy.tools.problem2dot import toDot
-from parmepy.methods_keys import TimeIntegrator, Interpolation, Remesh,\
-    Support, Splitting, MultiScale, Scales, MultiScale
-from parmepy.numerics.integrators.runge_kutta2 import RK2 as RK
-#from parmepy.numerics.integrators.runge_kutta4 import RK4 as RK
-from parmepy.numerics.interpolation import Linear
-from parmepy.numerics.remeshing import L2_1 as rmsh, L4_2, L2_1, L4_4, L8_4
-pi = np.pi
-cos = np.cos
-sin = np.sin
-sqrt = np.sqrt
-
-def scalaire(res, x, y, z, t=0.):
-    r = sqrt((x - 0.35) ** 2 + (y - 0.35) ** 2 + (z - 0.35) ** 2)
-    res[0][...] = 0.
-    res[0][r < 0.15] = 1.
-    return res
-
-
-def vitesse(res, x, y, z, t=0.):
-    res[0][...] = 2. * sin(pi*x)**2*sin(2.*pi*y)*sin(2.*pi*z)*cos(t*pi/3.)
-    res[1][...] = -sin(2.*pi*x)*sin(pi*y)**2*sin(2.*pi*z)*cos(t*pi/3.)
-    res[2][...] = -sin(2.*pi*x)*sin(2.*pi*y)*sin(pi*z)**2*cos(t*pi/3.)
-    return res
-
-dim = 3
-boxLength = [1., 1., 1.]
-boxMin = [0., 0., 0.]
-nbElem = [129] * 3
-
-assert main_size > 1
-proc_tasks = [0,] * main_size
-proc_tasks[0] = 1
-comm_s = main_comm.Split(color=proc_tasks[main_rank], key=main_rank)
-
-
-box = pp.Box(dim, length=boxLength, origin=boxMin)
-
-if proc_tasks[main_rank] == 1:
-    topo = box.getOrCreateTopology(1, gridResolution=nbElem,
-                     comm=comm_s
-                     )
-elif proc_tasks[main_rank] == 0:
-    topo = box.getOrCreateTopology(1, gridResolution=nbElem,
-                     comm=comm_s
-                     )
-## Fields
-scal = Field(domain=box, name='Scalar', formula=scalaire
-             )
-velo = Field(domain=box, name='Velocity', is_vector=True, formula=vitesse
-             )
-
-op_velo = Analytic([velo], {velo: nbElem}, topo=topo, task_id=1)
-advec = Advection(velo, scal,
-                  resolutions={velo: nbElem,
-                               scal: nbElem},
-                  method={TimeIntegrator: RK,
-                          Interpolation: Linear,
-                          Remesh: L2_1,
-                          Support: 'gpu_1k',
-                          Splitting: 'o2',
-                          MultiScale: L2_1},
-                  topo=topo,
-                  task_id=0,
-                  )
-p = Printer(variables=[scal],
-            topo=topo,
-            frequency=5,
-            prefix='./levelset3D_hybrid/',
-            formattype=HDF5,
-            task_id=0)
-
-distrForAdvecZ = Redistribute([velo], None, advec.advecDir[2], component=2,
-                              task_id=0)
-
-redX = RedistributeIntercomm(velo, topo, op_velo, advec.advecDir[0],
-                             proc_tasks, main_comm,
-                            component=0)
-redY = RedistributeIntercomm(velo, topo, op_velo, advec.advecDir[1],
-                             proc_tasks, main_comm,
-                            component=1)
-redZ = RedistributeIntercomm(velo, topo, op_velo, distrForAdvecZ,
-                             proc_tasks, main_comm,
-                            component=2)
-distrForAdvecZ.opFrom = redZ
-
-simu = Simulation(tinit=0.0, tend=3., timeStep=0.025, iterMax=200)
-
-
-pb = ProblemTasks([op_velo, redX, redY, redZ, distrForAdvecZ, advec, p],
-                  simu, proc_tasks, dumpFreq=1000)
-
-from parmepy.tools.problem2dot import toDot
-toDot(pb)
-
-pb.setUp()
-pb.solve()
-pb.finalize()
-
diff --git a/HySoP/hysop/gpu/config_k20m.py b/HySoP/hysop/gpu/config_k20m.py
index 68309d213beec2bbb6b5b49f73d6badb52669db5..633bd60bbd644f76d6b8d8d39357d61104be1750 100644
--- a/HySoP/hysop/gpu/config_k20m.py
+++ b/HySoP/hysop/gpu/config_k20m.py
@@ -194,17 +194,14 @@ kernels_config[3][DOUBLE_GPU]['advec_MS_comm'] = \
     (['common.cl', "remeshing/weights_noVec_builtin.cl",
       'kernels/comm_MS_advection_noVec.cl'],
      False, 1, advection_index_space_3d)
-
 kernels_config[3][DOUBLE_GPU]['remesh_comm'] = \
     (['common.cl', 'remeshing/weights_noVec.cl',
       'kernels/comm_remeshing_noVec.cl'],
      False, 1, remeshing_index_space_3d)
-
 kernels_config[3][DOUBLE_GPU]['advec_and_remesh_comm'] = \
     (['common.cl', 'remeshing/weights_noVec.cl',
       'kernels/comm_advection_and_remeshing_noVec.cl'],
      False, 1, advection_and_remeshing_index_space)
-
 kernels_config[3][DOUBLE_GPU]['advec_MS_and_remesh_comm'] = \
     (['common.cl', 'remeshing/weights_noVec.cl',
       'kernels/comm_advection_MS_and_remeshing_noVec.cl'],
diff --git a/HySoP/hysop/gpu/gpu_particle_advection.py b/HySoP/hysop/gpu/gpu_particle_advection.py
index cd1cc6435c96a7d572a6434c19dcbb8cf4701d75..110ca4d6cc7219398a894d8f6bb335c1493dc26c 100644
--- a/HySoP/hysop/gpu/gpu_particle_advection.py
+++ b/HySoP/hysop/gpu/gpu_particle_advection.py
@@ -28,7 +28,8 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
 
     @debug
     def __init__(self, platform_id=None, device_id=None, device_type=None,
-                 user_src=None, max_velocity=None, max_dt=None, **kwds):
+                 user_src=None, max_velocity=None, max_dt=None,
+                 velocity_only_on_device=None, **kwds):
         """
         Create a Advection operator.
         Work on a given field (scalar or vector) at a given velocity to compute
@@ -84,7 +85,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
         if MultiScale in self.method:
             if self.method[MultiScale] is not None:
                 self._isMultiScale = True
-        
+
         if self._isMultiScale:
             self._synchronize = True
 
diff --git a/HySoP/hysop/gpu/gpu_transfer.py b/HySoP/hysop/gpu/gpu_transfer.py
index 334d73352f5f5de9f84411b4baf68dc15be253b2..49826eba5883f431de1430d759a2f242cdcab89e 100644
--- a/HySoP/hysop/gpu/gpu_transfer.py
+++ b/HySoP/hysop/gpu/gpu_transfer.py
@@ -91,15 +91,15 @@ class DataTransfer(Computational):
                     "One of source or target must be a GPU operator.")
             if not source_is_gpu and not target_is_gpu:
                 if source_is_topo:
-                    print "Assume this is a toHost transfer"
+                    print self.name, "Assume this is a toHost transfer"
                     self._transfer = self._apply_toHost
                 elif target_is_topo:
-                    print "Assume this is a toDevice transfer"
+                    print self.name, "Assume this is a toDevice transfer"
                     self._transfer = self._apply_toDevice
                 else:
                     raise RuntimeError(
-                        "One of source or target must be a GPU operator.")
-
+                        "One of source or target must be a GPU operator " +
+                        self.name + ".")
         # Function to synchronize ghosts before send data to device.
         self._ghosts_synchro = None
         # This function is needed only in a toDevice transfer and if the target operator needs ghosts
diff --git a/HySoP/hysop/gpu/multi_gpu_particle_advection.py b/HySoP/hysop/gpu/multi_gpu_particle_advection.py
index 44b96d1bd563294abb07a54b3452c96ec343be38..e7e58fbe7430b37d3a4e2f71c990f93e51447123 100644
--- a/HySoP/hysop/gpu/multi_gpu_particle_advection.py
+++ b/HySoP/hysop/gpu/multi_gpu_particle_advection.py
@@ -25,7 +25,8 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
     __metaclass__ = ABCMeta
 
     @debug
-    def __init__(self, max_velocity=None, max_dt=None, **kwds):
+    def __init__(self, max_velocity=None, max_dt=None,
+                 velocity_only_on_device=False, **kwds):
         """
         Create a Advection operator.
         Work on a given field (scalar or vector) at a given velocity to compute
@@ -39,6 +40,8 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
         @remark : Buffer widths are computed from max_velocity, max_dt and the mesh sizes:
           - velocity buffers : |max_velocity * max_dt / v_dx| + 1
           - scalar buffers : |max_velocity * max_dt / s_dx| + 1 + remesh_stencil/2
+        @remark : by default, velocity data are supposed to be on the host. If not, user
+        should set the arttribute velocity_only_on_device to True.
         """
         # fields_topo = kwds['fields_on_grid'][0].topology
         # direction = kwds['direction']
@@ -46,14 +49,20 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
         # self._is_cut_dir = self._cut_dir[direction]
 
         super(MultiGPUParticleAdvection, self).__init__(**kwds)
-
-        msg = "max_dt and _max_velocity must be given to advection "
-        msg += "for computing communication buffer sizes."
-        assert max_dt is not None and max_velocity is not None
+        self._velocity_only_on_device = velocity_only_on_device
+        if self._velocity_only_on_device:
+            self._get_velocity_buffers = self._get_velocity_buffers_from_device
+        else:
+            self._get_velocity_buffers = self._get_velocity_buffers_from_host
 
         msg = "The Multi-GPU works only with the RK2 TimeIntegrator"
         assert self.method[TimeIntegrator] == RK2, msg
         assert self._comm_size > 1, 'Parallel only'
+        assert self.dim == 3, 'A 2D multi-GPU version is not yet available'
+
+        msg = "max_dt and _max_velocity must be given to advection "
+        msg += "for computing communication buffer sizes."
+        assert max_dt is not None and max_velocity is not None
 
         assert self.fields_topo.cutdir[self.direction]
         assert self.fields_topo.shape[self.direction] > 1
@@ -164,7 +173,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
         self._v_pitches_host = (int(_v_l_buff[:, 0, 0].nbytes),
                                 int(_v_l_buff[:, :, 0].nbytes))
         self._v_buffer_region = (
-            int(self._v_buff_width * HYSOP_REAL(0.).nbytes),
+            int(self._v_buff_width * SIZEOF_HYSOP_REAL),
             int(self.v_resol_dir[1]),
             int(self.v_resol_dir[2]))
         self._v_block_size = 1024 * 1024  # 1MByte
@@ -290,16 +299,25 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
             self._pitches_dev = (
                 int(self.fields_on_grid[0].data[0][0, 0, :].nbytes),
                 int(self.fields_on_grid[0].data[0][:, 0, :].nbytes))
+            self._v_pitches_dev = (
+                int(self.velocity.data[0][0, 0, :].nbytes),
+                int(self.velocity.data[0][:, 0, :].nbytes))
         elif self.direction == 1:
             # Device is in YXZ layout
             self._pitches_dev = (
                 int(self.fields_on_grid[0].data[0][0, :, 0].nbytes),
                 int(self.fields_on_grid[0].data[0][:, :, 0].nbytes))
+            self._v_pitches_dev = (
+                int(self.velocity.data[0][0, :, 0].nbytes),
+                int(self.velocity.data[0][:, :, 0].nbytes))
         elif self.direction == 0:
             # Device is in XYZ layout
             self._pitches_dev = (
                 int(self.fields_on_grid[0].data[0][:, 0, 0].nbytes),
                 int(self.fields_on_grid[0].data[0][:, :, 0].nbytes))
+            self._v_pitches_dev = (
+                int(self.velocity.data[0][:, 0, 0].nbytes),
+                int(self.velocity.data[0][:, :, 0].nbytes))
         # Beanching the proper _compute function
         if self.fields_on_grid[0].nb_components > 1:
             raise ValueError("Not yet implemented")
@@ -459,7 +477,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
             int(self.resol_dir[2]))
         self._origin_locr = (
             int((self.resol_dir[0] - self._s_buff_width_from_r)
-                * HYSOP_REAL(0).nbytes), 0, 0)
+                * SIZEOF_HYSOP_REAL), 0, 0)
 
         # Recompute blocks number and block size
         self._s_block_size_to_r, self._s_n_blocks_to_r, \
@@ -519,19 +537,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
             slices[b] = slice(b * n_elem, (b + 1) * n_elem)
         return int(block), int(n_b), n_elem, slices
 
-    def _exchange_velocity_buffers(self, dt):
-        ctime = MPI.Wtime()
-        ghosts = self.velocity_topo.ghosts()[self.direction]
-        if self.direction == 0:
-            max_velo_p = np.max(self.velocity.data[0][-ghosts - 1:, :, :])
-            max_velo_m = np.max(self.velocity.data[0][:ghosts + 1, :, :])
-        if self.direction == 1:
-            max_velo_p = np.max(self.velocity.data[1][:, -ghosts - 1:, :])
-            max_velo_m = np.max(self.velocity.data[1][:, :ghosts + 1, :])
-        if self.direction == 2:
-            max_velo_p = np.max(self.velocity.data[2][:, :, -ghosts - 1:])
-            max_velo_m = np.max(self.velocity.data[2][:, :, :ghosts + 1])
-        self._recompute_scal_buffers(max_velo_m, max_velo_p, dt)
+    def _get_velocity_buffers_from_host(self, ghosts):
         if self.direction == 0:
             velo_sl = (slice(self.v_resol_dir[0] - self._v_buff_width - ghosts,
                              self.v_resol_dir[0] - ghosts),
@@ -562,6 +568,46 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
                        slice(0 + ghosts, self._v_buff_width + ghosts))
             self._v_l_buff_loc[...] = \
                 self.velocity.data[2][velo_sl].swapaxes(0, 1).swapaxes(0, 2)
+
+    def _get_velocity_buffers_from_device(self, ghosts):
+        _evt_l_v = cl.enqueue_copy(
+            self._queue_comm_m,
+            self._v_l_buff_loc,
+            self.velocity.gpu_data[self.direction],
+            host_origin=(0, 0, 0),
+            host_pitches=self._v_pitches_host,
+            buffer_origin=(int(SIZEOF_HYSOP_REAL * ghosts), 0, 0),
+            buffer_pitches=self._v_pitches_dev,
+            region=self._v_buffer_region,
+            is_blocking=False)
+        _evt_r_v = cl.enqueue_copy(
+            self._queue_comm_m,
+            self._v_r_buff_loc,
+            self.velocity.gpu_data[self.direction],
+            host_origin=(0, 0, 0),
+            host_pitches=self._v_pitches_host,
+            buffer_origin=(int(SIZEOF_HYSOP_REAL * (
+                self.v_resol_dir[0] - self._v_buff_width - ghosts)), 0, 0),
+            buffer_pitches=self._v_pitches_dev,
+            region=self._v_buffer_region,
+            is_blocking=False)
+        _evt_l_v.wait()
+        _evt_r_v.wait()
+
+    def _exchange_velocity_buffers(self, dt):
+        ctime = MPI.Wtime()
+        ghosts = self.velocity_topo.ghosts()[self.direction]
+        if self.direction == 0:
+            max_velo_p = np.max(self.velocity.data[0][-ghosts - 1:, :, :])
+            max_velo_m = np.max(self.velocity.data[0][:ghosts + 1, :, :])
+        if self.direction == 1:
+            max_velo_p = np.max(self.velocity.data[1][:, -ghosts - 1:, :])
+            max_velo_m = np.max(self.velocity.data[1][:, :ghosts + 1, :])
+        if self.direction == 2:
+            max_velo_p = np.max(self.velocity.data[2][:, :, -ghosts - 1:])
+            max_velo_m = np.max(self.velocity.data[2][:, :, :ghosts + 1])
+        self._recompute_scal_buffers(max_velo_m, max_velo_p, dt)
+        self._get_velocity_buffers(ghosts)
         self.profiler['comm_cpu_advec_get'] += MPI.Wtime() - ctime
 
         ctime = MPI.Wtime()
diff --git a/HySoP/hysop/operator/advection.py b/HySoP/hysop/operator/advection.py
index ef3e9e20d8ce1377c69e86450787d7a5a0c8fd6e..130785a895cdd956f1bdb8fa9dc182de2f89a1f0 100644
--- a/HySoP/hysop/operator/advection.py
+++ b/HySoP/hysop/operator/advection.py
@@ -305,7 +305,7 @@ class Advection(Computational):
         # a Cartesian object, used for all fields.
         # We use it to initialize scales solver
         comm = self._mpis.comm
-        topodims = [1, 1, comm.Get_size()]
+        #topodims = [1, 1, comm.Get_size()]
         nbcells = toporef.mesh.discretization.resolution - 1
 
         scalesres, global_start = \
@@ -316,8 +316,8 @@ class Advection(Computational):
         for v in self.variables:
             topo = self.variables[v]
             assert isinstance(topo, Cartesian), str(topo)
-            assert (topo.shape == topodims).all(), \
-                str(topo.shape) + ' != ' + str(topodims)
+            #assert (topo.shape == topodims).all(), \
+            #    str(topo.shape) + ' != ' + str(topodims)
             assert not self._single_topo or (topo.mesh.resolution == scalesres).all(), \
                 str(topo.mesh.resolution) + ' != ' + str(scalesres)
             assert not self._single_topo or (topo.mesh.start() == global_start).all(), \