diff --git a/Examples/Multiphase/NS_planeJet_hybrid_MS_MP.py b/Examples/Multiphase/NS_planeJet_hybrid_MS_MP.py
new file mode 100644
index 0000000000000000000000000000000000000000..912aa6cf966b226da2d4dc89f6a9a768b2c1c541
--- /dev/null
+++ b/Examples/Multiphase/NS_planeJet_hybrid_MS_MP.py
@@ -0,0 +1,698 @@
+#!/usr/bin/env python
+# Scripts arguments:
+# 1. Flow resolution
+# 2. Scalar resolution
+# 3. Dictionary for devices id: (mpi rank: device id)
+# 4. Is the initial condition is perturbed
+# 5. Is data output
+# 6. Flow density
+# 7. Jet density
+# mpirun -np 10 python ./NS_planeJet_hybrid_MS_MP.py "[129,129,129]" "[257,257,257]" "{0:0,5:1}" "True" "True" "1" "3"
+import sys
+USER_NB_ELEM_UW = eval(sys.argv[1])
+USER_NB_ELEM_S = eval(sys.argv[2])
+USER_RANK_DEVICE_ID = eval(sys.argv[3])
+RANDOM_INIT = eval(sys.argv[4])
+IS_OUTPUT = eval(sys.argv[5])
+FLOW_DENSITY = eval(sys.argv[6])
+JET_DENSITY = eval(sys.argv[7])
+import hysop
+import hysop.gpu
+#hysop.gpu.CL_PROFILE = True
+from hysop.constants import np, HDF5, ASCII, HYSOP_MPI_REAL
+from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm
+from hysop.tools.parameters import MPIParams, Discretization, IOParams
+from hysop.problem.simulation import Simulation
+from hysop.fields.variable_parameter import VariableParameter
+from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\
+    Support, Splitting, MultiScale, MultiScale, SpaceDiscretisation, \
+    GhostUpdate, Scales, dtCrit, ExtraArgs
+from hysop.numerics.integrators.runge_kutta2 import RK2 as RK
+from hysop.numerics.integrators.runge_kutta3 import RK3 as RK3
+from hysop.numerics.finite_differences import FD_C_4
+from hysop.numerics.remeshing import L6_4 as remesh_formula
+from hysop.numerics.remeshing import L2_1
+from hysop.numerics.remeshing import Linear
+from hysop.operator.advection import Advection
+from hysop.operator.diffusion import Diffusion
+from hysop.operator.stretching import Stretching
+from hysop.operator.poisson import Poisson
+from hysop.operator.differential import Curl
+from hysop.operator.adapt_timestep import AdaptTimeStep
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.redistribute_inter import RedistributeInter
+from hysop.operator.redistribute_intra import RedistributeIntra
+from hysop.gpu.gpu_transfer import DataTransfer
+from hysop.domain.subsets import SubBox
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.operator.energy_enstrophy import EnergyEnstrophy
+import hysop.tools.numpywrappers as npw
+from hysop.tools.profiler import Profiler, FProfiler
+from hysop.tools.io_utils import IO
+from hysop.operator.baroclinic_from_rhs import BaroclinicFromRHS
+from hysop.operator.multiresolution_filter import MultiresolutionFilter
+from hysop.operator.multiphase_gradp import MultiphaseGradP
+from hysop.operator.multiphase_baroclinic_rhs import MultiphaseBaroclinicRHS
+from hysop.operator.spectrum import Spectrum
+IO.set_default_path('/scratch_p/jmetancelin/PlaneJet_F{0}_J{1}'.format(FLOW_DENSITY,JET_DENSITY))
+
+pi = np.pi
+cos = np.cos
+sin = np.sin
+exp = np.exp
+abs = np.abs
+tanh = np.tanh
+
+
+TASK_UW = 1
+TASK_SCALAR = 2
+PROC_TASKS = [TASK_UW, ] * main_size
+for p in USER_RANK_DEVICE_ID:
+    PROC_TASKS[p] = TASK_SCALAR
+try:
+    DEVICE_ID = USER_RANK_DEVICE_ID[main_rank]
+except KeyError:
+    DEVICE_ID = None
+out_freq = 10
+# Physical parameters:
+# Flow viscosity
+VISCOSITY = 1e-4
+# Schmidt number
+SC = ((1. * USER_NB_ELEM_S[0] - 1.)/(1. * USER_NB_ELEM_UW[0] - 1.))**2
+# Scalar diffusivity
+DIFF_COEFF_SCAL = VISCOSITY / SC
+
+width = 0.01
+ampl3 = 0.3
+ampl = 0.05
+ampl2 = 0.05
+
+
+ctime = MPI.Wtime()
+setup_time = FProfiler("Setup")
+
+# Domain
+box = hysop.Box(length=[1., 1., 1.], origin=[0., 0., 0.],
+                proc_tasks=PROC_TASKS)
+mpi_params = MPIParams(comm=box.comm_task, task_id=PROC_TASKS[main_rank])
+mpi_params_S = MPIParams(comm=box.comm_task, task_id=TASK_SCALAR)
+mpi_params_UW = MPIParams(comm=box.comm_task, task_id=TASK_UW)
+
+
+def computeVel(res, x, y, z, t):
+    yy = abs(y - 0.5)
+    aux = (0.1 - 2. * yy) / (4. * width)
+    strg1 = exp(-abs(aux ** 2))
+    strg2 = exp(-abs(aux ** 2))
+    strg3 = exp(-abs(aux ** 2))
+    if RANDOM_INIT:
+        from create_random_arrays import random_init
+        randX, randY, randZ = random_init(res[0].shape, box.comm_task)
+        strg1 = exp(-abs(aux ** 2)) * randX
+        strg2 = exp(-abs(aux ** 2)) * randY
+        strg3 = exp(-abs(aux ** 2)) * randZ
+    else:
+        strg1 = 0.
+        strg2 = 0.
+        strg3 = 0.
+    res[0][...] = 0.5 * (1. + tanh(aux))
+    res[0][...] *= (1. + ampl3 * sin(8. * pi * x))
+    res[0][...] *= (1. + ampl * strg1)
+    res[1][...] = ampl * strg2
+    res[2][...] = ampl * strg3
+    return res
+
+
+
+def initScal(res, x, y, z, t):
+    yy = abs(y - 0.5)
+    aux = (0.1 - 2. * yy) / (4. * width)
+    res[0][...] = 0.5 * (1. + tanh(aux))
+    res[0][...] *= (1. + ampl3 * sin(8. * pi * x))
+    return res
+
+temp_maxvelo = npw.zeros((3, ))
+maxvelo_values = npw.zeros((3, ))
+
+
+def calc_maxvelo(simu, v):
+    temp_maxvelo[0] = np.max(np.abs(v[0].data[0]))
+    temp_maxvelo[1] = np.max(np.abs(v[0].data[1]))
+    temp_maxvelo[2] = np.max(np.abs(v[0].data[2]))
+    v[0].topology.comm.Allreduce(sendbuf=[temp_maxvelo, 3, HYSOP_MPI_REAL],
+                                 recvbuf=[maxvelo_values, 3, HYSOP_MPI_REAL],
+                                 op=MPI.MAX)
+    return maxvelo_values
+
+
+# Fields
+velo = hysop.Field(domain=box, formula=computeVel,
+                   name='Velocity', is_vector=True)
+vorti = hysop.Field(domain=box,
+                    name='Vorticity', is_vector=True)
+scal = hysop.Field(domain=box, formula=initScal,
+                   name='Scalar', is_vector=False)
+gradp = hysop.Field(domain=box,
+                    name='GradP', is_vector=True)
+baroclinic_rhs = hysop.Field(domain=box,
+                             name='B_rhs', is_vector=True)
+
+
+data = {'dt': 0.01}
+dt = VariableParameter(data)
+simu = Simulation(tinit=0.0, tend=5., timeStep=0.01, iterMax=1000)
+
+# Flow discretizations:
+d_F_0g = Discretization(USER_NB_ELEM_UW)
+d_F_2g = Discretization(USER_NB_ELEM_UW, [2, ] * 3)
+# Scalar discretization
+d_S_0g = Discretization(USER_NB_ELEM_S)
+# Velocity discretization for scalar advection
+if USER_NB_ELEM_UW[0] == USER_NB_ELEM_S[0]:
+    d_F_1g = Discretization(USER_NB_ELEM_UW)
+else:
+    d_F_1g = Discretization(USER_NB_ELEM_UW, [1, ] * 3)
+
+# Topologies
+topo_S_0g_1d = None
+topo_F_1g_1d = None
+topo_F_0g_1d = None
+topo_F_2g_1d = None
+topo_F_0g_2d = None
+topo_F_0g_2d = box.create_topology(
+    d_F_0g, dim=2, mpi_params=mpi_params)
+topo_F_2g_1d = box.create_topology(
+    d_F_2g, dim=1, mpi_params=mpi_params)
+topo_F_0g_1d = box.create_topology(
+    d_F_0g, dim=1, mpi_params=mpi_params)
+topo_F_1g_1d = box.create_topology(
+    d_F_1g, dim=1, mpi_params=mpi_params)
+topo_S_0g_1d = box.create_topology(
+    d_S_0g, dim=1, mpi_params=mpi_params)
+
+
+# Operators
+# GPU operators
+advec_scal_method = {TimeIntegrator: RK,
+                     Interpolation: Linear,
+                     Remesh: remesh_formula,
+                     Support: 'gpu_2k',
+                     Splitting: 'o2',
+                     MultiScale: Linear}
+if PROC_TASKS.count(TASK_SCALAR) == 1:
+    advec_scal_method[ExtraArgs] = {'device_id': DEVICE_ID,
+                                    'device_type': 'gpu'}
+else:
+    advec_scal_method[ExtraArgs] = {'max_velocity': [1.2, 0.7, 0.7],
+                                    'max_dt': 0.012,
+                                    'device_id': DEVICE_ID,
+                                    'device_type': 'gpu',
+                                    'velocity_only_on_device': False}
+advec_scal = Advection(velo,
+                       discretization=topo_F_1g_1d,
+                       variables={scal: topo_S_0g_1d},
+                       mpi_params=mpi_params_S,
+                       method=advec_scal_method)
+# diffusion_scal = Diffusion(viscosity=DIFF_COEFF_SCAL,
+#                            vorticity=scal,
+#                            discretization=topo_S_0g_1d,
+#                            mpi_params=mpi_params_S,
+#                            method={Support: 'gpu',
+#                                    SpaceDiscretisation: 'fd',
+#                                    ExtraArgs: {'device_id': DEVICE_ID,
+#                                                'device_type': 'gpu'}})
+# diffusion_scal.name += '_(Scalar)'
+# tanh(x) is in [-1:1] and -1 stand for the flow.
+# We want [FLOW_DENSITY:JET_DENSITY] if FLOW_DENSITY<JET_DENSITY
+# We want [JET_DENSITY:FLOW_DENSITY] if FLOW_DENSITY>JET_DENSITY
+if FLOW_DENSITY < JET_DENSITY:
+    scal_to_rho = '{0:f}*(0.5*tanh(100.0*x-50.0)+0.5)+{1:f}'.format(
+        abs(FLOW_DENSITY - JET_DENSITY),
+        min(FLOW_DENSITY, JET_DENSITY))
+else:
+    scal_to_rho = '{0:f}*(-0.5*tanh(100.0*x-50.0)+0.5)+{1:f}'.format(
+        abs(FLOW_DENSITY - JET_DENSITY),
+        min(FLOW_DENSITY, JET_DENSITY))
+print
+baroclinic_rhs_op = MultiphaseBaroclinicRHS(
+    baroclinic_rhs, scal, gradp,
+    variables={baroclinic_rhs: topo_S_0g_1d,
+               gradp: topo_F_2g_1d,
+               scal: topo_S_0g_1d},
+    method={Support: 'gpu',
+            SpaceDiscretisation: FD_C_4,
+            ExtraArgs: {'density_func': scal_to_rho,
+                        'device_id': DEVICE_ID,
+                        'device_type': 'gpu'}},
+    mpi_params=mpi_params_S)
+filter_scal = MultiresolutionFilter(
+    d_in=topo_S_0g_1d, d_out=topo_F_2g_1d,
+    variables={baroclinic_rhs: topo_F_2g_1d},
+    method={Remesh: L2_1,
+            Support: 'gpu',
+            ExtraArgs: {'device_id': DEVICE_ID,
+                        'device_type': 'gpu'}},
+    mpi_params=mpi_params_S)
+gradp_op = MultiphaseGradP(velocity=velo, gradp=gradp, viscosity=VISCOSITY,
+                           discretization=topo_F_2g_1d,
+                           mpi_params=mpi_params_S,
+                           method={SpaceDiscretisation: FD_C_4,
+                                   ExtraArgs: {'gravity': [0., 0., 0.]}})
+
+# CPU operators
+advec = Advection(velo,
+                  discretization=topo_F_0g_2d,
+                  variables={vorti: topo_F_0g_2d},
+                  mpi_params=mpi_params_UW,
+                  method={Scales: 'p_64', MultiScale: 'L4_4'})
+stretch = Stretching(velo, vorti, discretization=topo_F_2g_1d,
+                     mpi_params=mpi_params_UW)
+baroclinic = BaroclinicFromRHS(vorti, baroclinic_rhs,
+                               discretization=topo_F_0g_1d,
+                               mpi_params=mpi_params_UW)
+diffusion = Diffusion(variables={vorti: topo_F_0g_1d},
+                      viscosity=VISCOSITY,
+                      mpi_params=mpi_params_UW)
+poisson = Poisson(velo, vorti, discretization=topo_F_0g_1d,
+                  mpi_params=mpi_params_UW)
+c = Curl(velo, vorti, discretization=topo_F_0g_1d,
+         method={SpaceDiscretisation: 'fftw', GhostUpdate: True},
+         mpi_params=mpi_params_UW)
+#dt_output = None
+#if IS_OUTPUT:
+dt_output = IOParams(frequency=1, filename='dt.dat', fileformat=ASCII)
+dt_adapt = AdaptTimeStep(velo, vorti,
+                         simulation=simu,
+                         time_range=[0, np.infty],
+                         discretization=topo_F_2g_1d,
+                         method={TimeIntegrator: RK3,
+                                 SpaceDiscretisation: FD_C_4,
+                                 dtCrit: ['gradU', 'cfl', ]},
+                         lcfl=0.15,
+                         cfl=1.5,
+                         io_params=dt_output,
+                         mpi_params=mpi_params_UW)
+
+# Operators discretizations
+if box.is_on_task(TASK_SCALAR):
+    for op in (advec_scal, gradp_op, filter_scal, baroclinic_rhs_op):  # , diffusion_scal):
+        op.discretize()
+if box.is_on_task(TASK_UW):
+    for op in (advec, stretch, diffusion, poisson, c, dt_adapt, baroclinic):
+        op.discretize()
+
+if IS_OUTPUT:
+    # Defining subdomains
+    L_V = 1. - 1. / (1. * USER_NB_ELEM_UW[0])
+    L_S = 1. - 1. / (1. * USER_NB_ELEM_S[0])
+    XY_plane_v = SubBox(origin=[0., 0., 0.5], length=[L_V, L_V, 0.],
+                        parent=box)
+    XZ_plane_v = SubBox(origin=[0., 0.5, 0.], length=[L_V, 0., L_V],
+                        parent=box)
+    XY_plane_s = SubBox(origin=[0., 0., 0.5], length=[L_S, L_S, 0.],
+                        parent=box)
+    XZ_plane_s = SubBox(origin=[0., 0.5, 0.], length=[L_S, 0., L_S],
+                        parent=box)
+    # Defining output operators
+    p_velo = HDF_Writer(variables={velo: topo_F_0g_1d},
+                        mpi_params=mpi_params_UW,
+                        io_params=IOParams(frequency=out_freq,
+                                           filename='flow',
+                                           fileformat=HDF5))
+    p_velo_xy = HDF_Writer(variables={velo: topo_F_0g_1d,
+                                      vorti: topo_F_0g_1d},
+                           var_names={velo: 'Velocity', vorti: 'Vorticity'},
+                           subset=XY_plane_v,
+                           mpi_params=mpi_params_UW,
+                           io_params=IOParams(frequency=out_freq,
+                                              filename='flow_XY',
+                                              fileformat=HDF5))
+    p_velo_xz = HDF_Writer(variables={velo: topo_F_0g_1d,
+                                      vorti: topo_F_0g_1d},
+                           var_names={velo: 'Velocity', vorti: 'Vorticity'},
+                           subset=XZ_plane_v,
+                           mpi_params=mpi_params_UW,
+                           io_params=IOParams(frequency=out_freq,
+                                              filename='flow_XZ',
+                                              fileformat=HDF5))
+    p_gradp_xy = HDF_Writer(variables={gradp: topo_F_2g_1d},
+                           var_names={gradp: 'gradp'},
+                           subset=XY_plane_s,
+                           mpi_params=mpi_params_S,
+                           io_params=IOParams(frequency=out_freq,
+                                              filename='gradp_XY',
+                                              fileformat=HDF5))
+    # p_rhs_xy = HDF_Writer(variables={baroclinic_rhs: topo_S_0g_1d},
+    #                        var_names={baroclinic_rhs: 'RHS'},
+    #                        subset=XY_plane_s,
+    #                        mpi_params=mpi_params_S,
+    #                        io_params=IOParams(frequency=out_freq,
+    #                                           filename='rhs_XY',
+    #                                           fileformat=HDF5))
+    p_scal_xy = HDF_Writer(variables={scal: topo_S_0g_1d},
+                           var_names={scal: 'Scalar'},
+                           subset=XY_plane_s,
+                           mpi_params=mpi_params_S,
+                           io_params=IOParams(frequency=out_freq,
+                                              filename='scal_XY',
+                                              fileformat=HDF5))
+    p_scal_xz = HDF_Writer(variables={scal: topo_S_0g_1d},
+                           var_names={scal: 'Scalar'},
+                           subset=XZ_plane_s,
+                           mpi_params=mpi_params_S,
+                           io_params=IOParams(frequency=out_freq,
+                                              filename='scal_XZ',
+                                              fileformat=HDF5))
+    p_scal_xy.name += '_(Scalar)'
+    p_scal_xz.name += '_(Scalar)'
+    energy = EnergyEnstrophy(velocity=velo,
+                             vorticity=vorti,
+                             discretization=topo_F_0g_1d,
+                             mpi_params=mpi_params_UW,
+                             io_params=IOParams(frequency=1,
+                                                filename='energy.dat',
+                                                fileformat=ASCII))
+    maxvelo = CustomMonitor(variables={velo: topo_F_0g_1d},
+                            function=calc_maxvelo,
+                            res_shape=3,
+                            mpi_params=mpi_params_UW,
+                            io_params=IOParams(frequency=1,
+                                               filename='maxvelo.dat',
+                                               fileformat=ASCII))
+    if box.is_on_task(TASK_UW):
+        for op in (p_velo, p_velo_xy, p_velo_xz, energy, maxvelo):
+            op.discretize()
+    if box.is_on_task(TASK_SCALAR):
+        for op in (p_scal_xy, p_scal_xz, p_gradp_xy):
+            op.discretize()
+
+# Redistribute operators
+# CPU redistributes
+RF_vorti_0g2d_2g1d = RedistributeIntra(variables=[vorti],  # W_C_2d_to_2G
+                                  source=advec, target=stretch,
+                                  mpi_params=mpi_params_UW)
+RF_vorti_0g2d_2g1d.name += '_toG_W'
+RF_velo_0g1d_2g1d = RedistributeIntra(variables=[velo],  # U_C_1d_to_C_2d_2G
+                                 source=poisson, target=stretch,
+                                 run_till=[stretch, dt_adapt],
+                                 mpi_params=mpi_params_UW)
+RF_velo_0g1d_2g1d.name += '_toG_V'
+toDev_velo_1g1d = DataTransfer(source=topo_F_1g_1d, target=advec_scal,
+                     variables={velo: topo_F_1g_1d},
+                     run_till=[advec_scal, gradp_op],
+                     mpi_params=mpi_params_S)
+toDev_velo_1g1d.name += '_ToDev_V'
+RS_velo_0g1d_1g1d = RedistributeIntra(variables=[velo],  # U_C_1d_to_C_2d_2G
+                                    source=topo_F_0g_1d, target=topo_F_1g_1d,
+                                    run_till=[toDev_velo_1g1d],
+                                    mpi_params=mpi_params_S)
+RS_velo_0g1d_1g1d.name += '_GPUtoG_V'
+RF_vorti_0g1d_2g1d = RedistributeIntra(variables=[vorti],  # W_C_1d_to_C_2d_2G
+                                 source=diffusion, target=dt_adapt,
+                                 mpi_params=mpi_params_UW)
+RF_vorti_0g1d_2g1d.name += '_toG_W'
+RF_velo_0g1d_0g2d = RedistributeIntra(variables=[velo], # U_C_1d_to_C_2d
+                                  source=poisson, target=advec,
+                                  mpi_params=mpi_params_UW)
+RF_velo_0g1d_0g2d.name += '_toScales_V'
+RF_vorti_0g1d_0g2d = RedistributeIntra(variables=[vorti],  # W_C_1d_to_C_2d
+                                   source=poisson, target=advec,
+                                   mpi_params=mpi_params_UW)
+RF_vorti_0g1d_0g2d.name += '_toScales_W'
+RF_vorti_2g1d_0g1d = RedistributeIntra(variables=[vorti],   # W_C_2d_2G_to_1d
+                                    source=baroclinic,  #stretch,
+                                    target=diffusion,
+                                    mpi_params=mpi_params_UW)
+RF_vorti_2g1d_0g1d.name += '_FromG_W'
+
+F0g1d_to_S0g1d_velo = RedistributeInter(variables=[velo],
+                              parent=main_comm,
+                              source=topo_F_0g_1d, target=topo_F_0g_1d,
+                              source_id=TASK_UW, target_id=TASK_SCALAR,
+                              run_till=[gradp_op, RS_velo_0g1d_1g1d])
+S2g1d_to_F2g1d_rhs = RedistributeInter(variables=[baroclinic_rhs],
+                                   parent=main_comm,
+                                   source=topo_F_2g_1d, target=topo_F_0g_1d,
+                                   source_id=TASK_SCALAR, target_id=TASK_UW,
+                                   run_till=[baroclinic])
+S2g1d_to_F2g1d_rhs.name += '_rhs_GPU_to_CPU'
+toHost_rhs_2g1d = DataTransfer(source=topo_F_2g_1d,
+                         target=S2g1d_to_F2g1d_rhs,
+                         variables={baroclinic_rhs: topo_F_2g_1d},
+                         mpi_params=mpi_params_S)
+toHost_rhs_2g1d.name += '_ToHost_RHS'
+toDev_gradp_2g1d = DataTransfer(source=topo_F_2g_1d, target=baroclinic_rhs_op,
+                          variables={gradp: topo_F_2g_1d},
+                          mpi_params=mpi_params_S)
+toDev_gradp_2g1d.name += '_ToDev_GradP'
+if IS_OUTPUT:
+    toHost_scal_0g1d = DataTransfer(source=topo_S_0g_1d, target=p_scal_xy,
+                          variables={scal: topo_S_0g_1d},
+                          mpi_params=mpi_params_S,
+                          freq=out_freq,
+                          run_till=[p_scal_xz, ])
+    toHost_scal_0g1d.name += '_ToHost_S'
+
+# Operators setup
+if box.is_on_task(TASK_SCALAR):
+    for op in (advec_scal, gradp_op, baroclinic_rhs_op, filter_scal):  # , diffusion_scal):
+        op.setup()
+    if IS_OUTPUT:
+        for op in (p_gradp_xy, p_scal_xy, p_scal_xz, toHost_scal_0g1d):
+            op.setup()
+    for op in (toDev_velo_1g1d, toHost_rhs_2g1d, toDev_gradp_2g1d, RS_velo_0g1d_1g1d):
+        op.setup()
+if box.is_on_task(TASK_UW):
+    for op in (advec, stretch, diffusion, poisson, c, dt_adapt, baroclinic):
+        op.setup()
+    if IS_OUTPUT:
+        for op in (p_velo, p_velo_xy, p_velo_xz, energy, maxvelo):
+            op.setup()
+    for op in (RF_vorti_0g2d_2g1d, RF_vorti_2g1d_0g1d, RF_velo_0g1d_2g1d, RF_vorti_0g1d_2g1d,
+               RF_velo_0g1d_0g2d, RF_vorti_0g1d_0g2d):
+        op.setup()
+# Wait for all operators setup before setup the intra-comm redistribute
+main_comm.Barrier()
+F0g1d_to_S0g1d_velo.setup()
+S2g1d_to_F2g1d_rhs.setup()
+
+# Operators list
+if IS_OUTPUT:
+    operators_list = [RS_velo_0g1d_1g1d, toDev_velo_1g1d,  ## Scal
+                      gradp_op, p_gradp_xy, toDev_gradp_2g1d,  ## Scal
+                      advec, RF_vorti_0g2d_2g1d, stretch,  ## Flow
+                      advec_scal,# diffusion_scal,  ## Scal
+                      toHost_scal_0g1d, p_scal_xy, p_scal_xz,  ## Scal
+                      baroclinic_rhs_op, filter_scal, toHost_rhs_2g1d,  ## Scal
+                      S2g1d_to_F2g1d_rhs,  ## Scal->Flow
+                      baroclinic,  ## Flow
+                      diffusion, poisson,  ## Flow
+                      F0g1d_to_S0g1d_velo,  ## Flow->Scal
+                      p_velo, p_velo_xy, p_velo_xz, energy, maxvelo,  ## Flow
+                      RF_velo_0g1d_2g1d, RF_vorti_0g1d_2g1d,  ## Flow
+                      RF_velo_0g1d_0g2d, RF_vorti_0g1d_0g2d,  ## Flow
+                      dt_adapt,  ## Flow
+    ]
+else:
+    raise RuntimeError('')
+    # operators_list = [F0g1d_to_S2g1d_velo,
+    #                   toDev, advec_scal, diffusion_scal,
+    #                   advec, RF_vorti_0g2d_2g1d, stretch,
+    #                   RF_vorti_2g1d_0g1d, diffusion, poisson,
+    #                   RF_velo_0g1d_2g1d, RF_velo_0g1d_0g2d, RF_vorti_0g1d_0g2d, dt_adapt]
+
+# Fields initializations
+if box.is_on_task(TASK_SCALAR):
+    scal.initialize(topo=topo_S_0g_1d)
+    advec_dirX = advec_scal.advec_dir[0].discrete_op
+    gpu_scal = advec_dirX.fields_on_grid[0]
+    gpu_pscal = advec_dirX.fields_on_part[gpu_scal][0]
+    #diffusion_scal.discrete_op.set_field_tmp(gpu_pscal)
+    if IS_OUTPUT:
+        p_scal_xy.apply(simu)
+        p_scal_xz.apply(simu)
+if box.is_on_task(TASK_UW):
+    velo.initialize(topo=topo_F_0g_1d)
+    c.apply(simu)
+    poisson.apply(simu)
+    RF_velo_0g1d_2g1d.apply(simu)
+    RF_velo_0g1d_0g2d.apply(simu)
+    RF_vorti_0g1d_0g2d.apply(simu)
+    RF_vorti_0g1d_2g1d.apply(simu)
+    if IS_OUTPUT:
+        p_velo.apply(simu)
+        p_velo_xy.apply(simu)
+        p_velo_xz.apply(simu)
+F0g1d_to_S0g1d_velo.apply(simu)
+F0g1d_to_S0g1d_velo.wait()
+if box.is_on_task(TASK_SCALAR):
+    RS_velo_0g1d_1g1d.apply(simu)
+    RS_velo_0g1d_1g1d.wait()
+    toDev_velo_1g1d.apply(simu)
+    toDev_velo_1g1d.wait()
+    gradp_op.initialize_velocity()
+simu.initialize()
+setup_time += MPI.Wtime() - ctime
+main_comm.Barrier()
+
+# Solve
+total_time = FProfiler("Total")
+solve_time = FProfiler("Solve")
+cttime = MPI.Wtime()
+while not simu.isOver:
+    ctime = MPI.Wtime()
+    if main_rank == 0:
+        simu.printState()
+    for op in operators_list:
+        if box.is_on_task(op.task_id()):
+            op.apply(simu)
+    if box.is_on_task(TASK_SCALAR):
+        # Wait gpu operations on scalar
+        advec_scal.advec_dir[0].discreteFields[scal].wait()
+    solve_time += MPI.Wtime() - ctime
+    dt_adapt.wait()
+    # Synchronize threads
+    main_comm.Barrier()
+    simu.advance()
+
+main_comm.Barrier()
+nb_ite = simu.currentIteration
+total_time += MPI.Wtime() - cttime
+simu.finalize()
+
+
+# if IS_OUTPUT:
+#     if box.is_on_task(TASK_SCALAR):
+#         p_scal_xy.apply(simu)
+#         p_scal_xz.apply(simu)
+#     if box.is_on_task(TASK_UW):
+#         p_velo.apply(simu)
+#         p_velo_xy.apply(simu)
+#         p_velo_xz.apply(simu)
+
+
+# prof = Profiler(None, box.comm_task)
+# prof += setup_time
+# prof += solve_time
+# for op in operators_list:
+#     if box.is_on_task(op.task_id()):
+#         op.finalize()
+#         prof += op.profiler
+# for v in (velo, vorti, scal):
+#     prof += v.profiler
+# prof.summarize()
+
+# if box.is_on_task(TASK_SCALAR):
+#     prof.write(prefix=' '.join([str(s-1) for s in USER_NB_ELEM_UW]) +
+#                ' ' + str(nb_ite) +
+#                ' ' + str(PROC_TASKS[main_rank]) +
+#                ' ' + str(PROC_TASKS.count(PROC_TASKS[main_rank])),
+#                hprefix='Nx Ny Nz Nite Task Np')
+# main_comm.Barrier()
+# if box.is_on_task(TASK_UW):
+#     prof.write(prefix=' '.join([str(s-1) for s in USER_NB_ELEM_UW]) +
+#                ' ' + str(nb_ite) +
+#                ' ' + str(PROC_TASKS[main_rank]) +
+#                ' ' + str(PROC_TASKS.count(PROC_TASKS[main_rank])),
+#                hprefix='Nx Ny Nz Nite Task Np')
+
+
+# if main_rank < 2:
+#     print prof
+# for i in xrange(main_size):
+#     if i == main_rank:
+#         print prof
+#     main_comm.Barrier()
+# main_comm.Barrier()
+
+velo.finalize()
+if box.is_on_task(TASK_SCALAR):
+    scal.finalize()
+if box.is_on_task(TASK_UW):
+    vorti.finalize()
+main_comm.Barrier()
+
+
+# def extract_time(prof, key):
+#     if key is None:
+#         return prof.t
+#     if prof is None:
+#         return 0.0
+#     return prof.profiler[key].t
+
+# times = npw.asrealarray([np.sum([extract_time(pf, k) for pf, k in l]) for l in (
+#             ((total_time, None), ),
+#             ((solve_time, None), ),
+#             ((dt_adapt, 'apply'), ),
+#             ((advec, 'apply'), ),
+#             ((stretch, 'apply'), ),
+#             ((diffusion, 'apply'), ),
+#             ((poisson, 'apply'), ),
+#             ((advec_scal.advec_dir[0], 'apply'), ),
+#             ((advec_scal.advec_dir[0].discrete_op, 'OpenCL_copy'),
+#              (advec_scal.advec_dir[0].discrete_op, 'OpenCL_transpose_xy'),
+#              (advec_scal.advec_dir[0].discrete_op, 'OpenCL_advection_and_remeshing'), ),
+#             ((advec_scal.advec_dir[1], 'apply'), ),
+#             ((advec_scal.advec_dir[1].discrete_op, 'OpenCL_transpose_xy'),
+#              (advec_scal.advec_dir[1].discrete_op, 'OpenCL_transpose_xz'),
+#              (advec_scal.advec_dir[1].discrete_op, 'OpenCL_advection_and_remeshing'),
+#              (advec_scal.advec_dir[1].discrete_op, 'OpenCL_buff_advec_and_remesh_l'),
+#              (advec_scal.advec_dir[1].discrete_op, 'OpenCL_buff_advec_and_remesh'),
+#              (advec_scal.advec_dir[1].discrete_op, 'OpenCL_buff_advec_and_remesh_r'), ),
+#             ((advec_scal.advec_dir[1].discrete_op, 'comm_gpu_advec_set'),
+#              (advec_scal.advec_dir[1].discrete_op, 'comm_cpu_advec'),
+#              (advec_scal.advec_dir[1].discrete_op, 'comm_cpu_advec_get'),
+#              (advec_scal.advec_dir[1].discrete_op, 'comm_gpu_remesh_get'),
+#              (advec_scal.advec_dir[1].discrete_op, 'comm_gpu_remesh_set_loc'),
+#              (advec_scal.advec_dir[1].discrete_op, 'comm_gpu_remesh_get_loc'),
+#              (advec_scal.advec_dir[1].discrete_op, 'comm_cpu_remesh'), ),
+#             ((advec_scal.advec_dir[2], 'apply'), ),
+#             ((advec_scal.advec_dir[2].discrete_op, 'OpenCL_transpose_xz'),
+#              (advec_scal.advec_dir[2].discrete_op, 'OpenCL_advection_and_remeshing'),
+#              (advec_scal.advec_dir[2].discrete_op, 'OpenCL_buff_advec_and_remesh_l'),
+#              (advec_scal.advec_dir[2].discrete_op, 'OpenCL_buff_advec_and_remesh'),
+#              (advec_scal.advec_dir[2].discrete_op, 'OpenCL_buff_advec_and_remesh_r'), ),
+#             ((advec_scal.advec_dir[2].discrete_op, 'comm_gpu_advec_set'),
+#              (advec_scal.advec_dir[2].discrete_op, 'comm_cpu_advec'),
+#              (advec_scal.advec_dir[2].discrete_op, 'comm_cpu_advec_get'),
+#              (advec_scal.advec_dir[2].discrete_op, 'comm_gpu_remesh_get'),
+#              (advec_scal.advec_dir[2].discrete_op, 'comm_gpu_remesh_set_loc'),
+#              (advec_scal.advec_dir[2].discrete_op, 'comm_gpu_remesh_get_loc'),
+#              (advec_scal.advec_dir[2].discrete_op, 'comm_cpu_remesh'), ),
+#             # ((diffusion_scal, 'apply'), ),
+#             # ((diffusion_scal.discrete_op, 'OpenCL_enqueue_copy'),
+#             #  (diffusion_scal.discrete_op, 'OpenCL_diffusion'), ),
+#             # ((diffusion_scal.discrete_op, 'comm_diffusion'), ),
+#             ((RF_vorti_0g2d_2g1d, 'apply'),
+#              (RF_vorti_2g1d_0g1d, 'apply'),
+#              (RF_velo_0g1d_0g2d, 'apply'),
+#              (RF_vorti_0g1d_0g2d, 'apply'),
+#              (F0g1d_to_S2g1d_velo, 'apply'),
+#              (toDev_velo_1g1d, 'apply'), ),
+#             )])
+# tl = "Total Solve Dt Avection(w) Stretching Diffusion(w) Poisson "
+# tl += "Advection(s)_X Advection(s)_CL_X "
+# tl += "Advection(s)_Y Advection(s)_CL_Y Advection_Comm_Y "
+# tl += "Advection(s)_Z Advection(s)_CL_Z Advection_Comm_Z "
+# tl += "Diffusion(s) Diffusion(s)_CL Diffusion(s)_Comm Comm"
+
+
+# times_task = npw.zeros_like(times)
+# box.comm_task.Reduce([times, times.size, HYSOP_MPI_REAL],
+#                      [times_task, times.size, HYSOP_MPI_REAL])
+# times_task /= (1.0 * nb_ite * PROC_TASKS.count(PROC_TASKS[main_rank]))
+# if PROC_TASKS[main_rank] == TASK_UW:
+#     p_l = ["FLOW", ]
+# if PROC_TASKS[main_rank] == TASK_SCALAR:
+#     p_l = ["SCALAR", ]
+# p_l += [PROC_TASKS[main_rank], nb_ite] + USER_NB_ELEM_UW + USER_NB_ELEM_S
+# p_l += [PROC_TASKS.count(PROC_TASKS[main_rank]), ]
+# pp_l = "Label Task Ite UW_X UW_Y UW_Z S_X S_Y S_Z Np"
+# for i in xrange(main_size):
+#     if main_rank == i:
+#         if i == 0:
+#             print pp_l + ' ' + tl
+#         if box.comm_task.Get_rank() == 0:
+#             print ' '.join([str(e) for e in p_l]) + \
+#                 ' ' + ' '.join([str(e) for e in times_task])
+#     main_comm.Barrier()
diff --git a/Examples/Multiphase/RTI.py b/Examples/Multiphase/RTI.py
new file mode 100644
index 0000000000000000000000000000000000000000..9df0bb972d09f480fb68ae2599705b5d56cd570c
--- /dev/null
+++ b/Examples/Multiphase/RTI.py
@@ -0,0 +1,541 @@
+#!/usr/bin/env python
+# Rayleigh-Taylor Instability
+# Scripts arguments:
+# 1. Flow resolution
+# 2. Scalar resolution
+# 3. Dictionary for devices id: (mpi rank: device id)
+# 4. Is data output
+# 5. Low density
+# 6. High density
+# mpirun -np 10 python ./RTI_new.py "[129,129,129]" "[257,257,257]" "{0:0,5:1}" "True" "1" "3"
+import sys
+USER_NB_ELEM_UW = eval(sys.argv[1])
+USER_NB_ELEM_S = eval(sys.argv[2])
+USER_RANK_DEVICE_ID = eval(sys.argv[3])
+IS_OUTPUT = eval(sys.argv[4])
+LOW_DENSITY = eval(sys.argv[5])
+HIGH_DENSITY = eval(sys.argv[6])
+import hysop
+# import hysop.gpu
+# hysop.gpu.CL_PROFILE = True
+from hysop.constants import np, HDF5, ASCII, HYSOP_MPI_REAL
+from hysop.mpi.main_var import MPI, main_size, main_rank, main_comm
+from hysop.tools.parameters import MPIParams, Discretization, IOParams
+from hysop.problem.simulation import Simulation
+from hysop.fields.variable_parameter import VariableParameter
+from hysop.methods_keys import TimeIntegrator, Interpolation, Remesh,\
+    Support, Splitting, MultiScale, MultiScale, SpaceDiscretisation, \
+    GhostUpdate, Scales, dtCrit, ExtraArgs
+from hysop.numerics.integrators.runge_kutta2 import RK2 as RK
+from hysop.numerics.integrators.runge_kutta3 import RK3 as RK3
+from hysop.numerics.finite_differences import FD_C_4
+from hysop.numerics.remeshing import L6_4 as remesh_formula
+from hysop.numerics.remeshing import L2_1
+from hysop.numerics.remeshing import Linear
+from hysop.operator.advection import Advection
+from hysop.operator.diffusion import Diffusion
+from hysop.operator.stretching import Stretching
+from hysop.operator.poisson import Poisson
+from hysop.operator.baroclinic_from_rhs import BaroclinicFromRHS
+from hysop.operator.differential import Curl
+from hysop.operator.multiresolution_filter import MultiresolutionFilter
+from hysop.operator.multiphase_gradp import MultiphaseGradP
+from hysop.operator.multiphase_baroclinic_rhs import MultiphaseBaroclinicRHS
+from hysop.operator.adapt_timestep import AdaptTimeStep
+from hysop.operator.custom import CustomMonitor
+from hysop.operator.redistribute_inter import RedistributeInter
+from hysop.operator.redistribute_intra import RedistributeIntra
+from hysop.gpu.gpu_transfer import DataTransfer
+from hysop.domain.subsets import SubBox
+from hysop.operator.hdf_io import HDF_Writer
+from hysop.operator.energy_enstrophy import EnergyEnstrophy
+import hysop.tools.numpywrappers as npw
+from hysop.tools.io_utils import IO
+from hysop.operator.spectrum import Spectrum
+pi = np.pi
+cos = np.cos
+sin = np.sin
+exp = np.exp
+abs = np.abs
+tanh = np.tanh
+
+TASK_UW = 1
+TASK_SCALAR = 2
+PROC_TASKS = [TASK_UW, ] * main_size
+for p in USER_RANK_DEVICE_ID:
+    PROC_TASKS[p] = TASK_SCALAR
+try:
+    DEVICE_ID = USER_RANK_DEVICE_ID[main_rank]
+except KeyError:
+    DEVICE_ID = None
+print USER_RANK_DEVICE_ID
+out_freq = 10
+# Physical parameters:
+# Flow viscosity
+VISCOSITY = 5e-5
+# Schmidt number
+SC = ((1. * USER_NB_ELEM_S[0] - 1.)/(1. * USER_NB_ELEM_UW[0] - 1.))**2
+# Scalar diffusivity
+DIFF_COEFF_SCAL = VISCOSITY / SC
+MAX_DT = 0.05  # ok with 0.05
+MAX_CFL = 1.5  # ok with 1.5 (lcfl bound)
+MAX_LCFL = 0.15 # ok with 0.2
+#MAX_CFL_VEC = [1.5, 0.6, 0.5]
+
+
+width = 0.01
+ampl3 = 0.3
+ampl = 0.05
+ampl2 = 0.05
+
+# NS
+# CPU ::  Velo(C_2d), Vorti(C_2d) > Advec > Vorti(C_2d)
+# CPU ::  Velo(C_2d,2G), Vorti(C_2d,2G) > Stretching > Vorti(C_2d,2G)
+# CPU ::  Vorti(C_2d,2G) > Penalisation > Vorti(C_2d,2G)
+# CPU ::  Vorti(C_1d) > Diffusion > Vorti(C_1d)
+# CPU ::  Vorti(C_1d) > Poisson > Velo(C_1d)
+# CPU ::  Velo(C_2d,2G), Vorti(C_2d,2G) > Dt
+# Scal
+# GPU ::  Velo(C_2d,1G), Scal(F_2d) > Advec > Scal(F_2d)
+# Baroclinic
+# GPU ::  Velo(C_2d,2G) > GradP > gradp(C_2d,2G)
+# GPU ::  Rho(F_2d), gradp(C_2d,2G) > BaroclinicRHS > rhs(F_2d)
+# GPU ::  rhs(F_2d) > Filter > rhs(C_2d,2G)
+# CPU ::  rhs(C_2d,2G), Vorti(C_2d,2G) > Baroclinic > Vorti(C_2d,2G)
+
+ctime = MPI.Wtime()
+
+# Domain
+box = hysop.Box(length=[1., 1., 1.], origin=[0., 0., 0.],
+                proc_tasks=PROC_TASKS)
+mpi_params = MPIParams(comm=box.comm_task, task_id=PROC_TASKS[main_rank])
+mpi_params_S = MPIParams(comm=box.comm_task, task_id=TASK_SCALAR)
+mpi_params_UW = MPIParams(comm=box.comm_task, task_id=TASK_UW)
+
+SIGMA=1e-3
+NOISE=1e-4
+
+def computeVelo(res, x, y, z, t):
+    zz = exp(-(z-0.5) * (z-0.5) / SIGMA) * NOISE
+    from create_random_arrays import random_init
+    randX, randY, randZ = random_init(res[0].shape, box.comm_task)
+    res[0][...] = zz * randX
+    res[1][...] = zz * randY
+    res[2][...] = zz * randZ
+    return res
+
+def initScal(res, x, y, z, t):
+    res[0][...] = z
+    return res
+
+
+temp_maxvelo = npw.zeros((3, ))
+maxvelo_values = npw.zeros((3, ))
+
+
+def calc_maxvelo(simu, v):
+    temp_maxvelo[0] = np.max(np.abs(v[0].data[0]))
+    temp_maxvelo[1] = np.max(np.abs(v[0].data[1]))
+    temp_maxvelo[2] = np.max(np.abs(v[0].data[2]))
+    v[0].topology.comm.Allreduce(sendbuf=[temp_maxvelo, 3, HYSOP_MPI_REAL],
+                                 recvbuf=[maxvelo_values, 3, HYSOP_MPI_REAL],
+                                 op=MPI.MAX)
+    return maxvelo_values
+
+
+# Fields
+velo = hysop.Field(domain=box, formula=computeVelo,
+                   name='Velocity', is_vector=True)
+vorti = hysop.Field(domain=box,
+                    name='Vorticity', is_vector=True)
+scal = hysop.Field(domain=box, formula=initScal,
+                   name='Scalar', is_vector=False)
+gradp = hysop.Field(domain=box,
+                    name='GradP', is_vector=True)
+baroclinic_rhs = hysop.Field(domain=box,
+                             name='B_rhs', is_vector=True)
+
+data = {'dt': 0.001}
+dt = VariableParameter(data)
+simu = Simulation(tinit=0.0, tend=2.5, timeStep=0.001, iterMax=10000)
+
+# Flow discretizations:
+d_C = Discretization(USER_NB_ELEM_UW)
+d_C_2G = Discretization(USER_NB_ELEM_UW, [2, ] * 3)
+# Scalar discretization
+d_F = Discretization(USER_NB_ELEM_S)
+
+# Topologies
+topo_C_2d = None
+topo_C_2d_2G = None
+topo_C_1d = None
+topo_F_2d = None
+topo_C_2d_2G = box.create_topology(
+    d_C_2G, dim=1, mpi_params=mpi_params)
+topo_C_2d = box.create_topology(
+    d_C, dim=2, mpi_params=mpi_params)
+topo_C_1d = box.create_topology(
+    d_C, dim=1, mpi_params=mpi_params)
+topo_F_2d = box.create_topology(
+    d_F, dim=1, mpi_params=mpi_params)
+###### WARNING This topo is 1D to be fftw compliant.
+
+
+# Operators
+# GPU operators
+advec_scal_method = {TimeIntegrator: RK,
+                     Interpolation: Linear,
+                     Remesh: remesh_formula,
+                     Support: 'gpu_1k',
+                     Splitting: 'o2',
+                     MultiScale: Linear}
+if PROC_TASKS.count(TASK_SCALAR) == 1:
+    advec_scal_method[ExtraArgs] = {'device_id': DEVICE_ID,
+                                    'device_type': 'gpu'}
+else:
+    advec_scal_method[ExtraArgs] = {'max_cfl': MAX_CFL,
+                                    'max_velocity': [2.5, 2.5, 3.5], # for rho=1<->3
+                                    'max_dt': MAX_DT,
+                                    'device_id': DEVICE_ID,
+                                    'device_type': 'gpu'}
+advec_scal = Advection(velo,
+                       discretization=topo_C_2d_2G,
+                       variables={scal: topo_F_2d},
+                       mpi_params=mpi_params_S,
+                       method=advec_scal_method)
+diffusion_scal = Diffusion(viscosity=DIFF_COEFF_SCAL,
+                           vorticity=scal,
+                           discretization=topo_F_2d,
+                           mpi_params=mpi_params_S,
+                           method={Support: 'gpu',
+                                   SpaceDiscretisation: 'fd',
+                                   ExtraArgs: {'device_id': DEVICE_ID,
+                                               'device_type': 'gpu'}})
+diffusion_scal.name += '_(Scalar)'
+# tanh(x) is in [-1:1] and -1 stand for the flow.
+# We want [LOW_DENSITY:HIGH_DENSITY]
+scal_to_rho = '{0:f}*({1}*tanh(100.0*x-50.0)+0.5)+{2:f}'.format(
+    abs(LOW_DENSITY - HIGH_DENSITY),
+    0.5 if LOW_DENSITY < HIGH_DENSITY else -0.5,
+    min(LOW_DENSITY, HIGH_DENSITY))
+baroclinic_rhs_op = MultiphaseBaroclinicRHS(
+    baroclinic_rhs, scal, gradp,
+    variables={baroclinic_rhs: topo_F_2d,
+               gradp: topo_C_2d_2G,
+               scal: topo_F_2d},
+    method={Support: 'gpu',
+            SpaceDiscretisation: FD_C_4,
+            ExtraArgs: {'density_func': scal_to_rho,
+                        'device_id': DEVICE_ID,
+                        'device_type': 'gpu'}},
+    mpi_params=mpi_params_S)
+filter_scal = MultiresolutionFilter(
+    d_in=topo_F_2d, d_out=topo_C_2d_2G,
+    variables={baroclinic_rhs: topo_C_2d_2G},
+    method={Remesh: L2_1,
+            Support: 'gpu',
+            ExtraArgs: {'device_id': DEVICE_ID,
+                        'device_type': 'gpu'}},
+    mpi_params=mpi_params_S)
+gradp_op = MultiphaseGradP(velocity=velo, gradp=gradp, viscosity=VISCOSITY,
+                           discretization=topo_C_2d_2G,
+                           mpi_params=mpi_params_S,
+                           method={SpaceDiscretisation: FD_C_4,
+                                   ExtraArgs: {'gravity': [0., 0., -1.]}})
+
+# CPU operators
+advec = Advection(velo,
+                  discretization=topo_C_2d,
+                  variables={vorti: topo_C_2d},
+                  mpi_params=mpi_params_UW,
+                  method={Scales: 'p_64', MultiScale: 'L4_4'})
+stretch = Stretching(velo, vorti, discretization=topo_C_2d_2G,
+                     mpi_params=mpi_params_UW)
+baroclinic = BaroclinicFromRHS(vorti, baroclinic_rhs,
+                               discretization=topo_C_2d_2G,
+                               mpi_params=mpi_params_UW)
+diffusion = Diffusion(variables={vorti: topo_C_1d},
+                      viscosity=VISCOSITY,
+                      mpi_params=mpi_params_UW)
+poisson = Poisson(velo, vorti, discretization=topo_C_1d,
+                  mpi_params=mpi_params_UW)
+c = Curl(velo, vorti, discretization=topo_C_1d,
+         method={SpaceDiscretisation: 'fftw', GhostUpdate: True},
+         mpi_params=mpi_params_UW)
+#dt_output = None
+#if IS_OUTPUT:
+dt_output = IOParams(frequency=1, filename='dt.dat', fileformat=ASCII)
+dt_adapt = AdaptTimeStep(velo, vorti,
+                         simulation=simu,
+                         time_range=[10, np.infty],
+                         discretization=topo_C_2d_2G,
+                         method={TimeIntegrator: RK3,
+                                 SpaceDiscretisation: FD_C_4,
+                                 dtCrit: ['gradU', 'cfl']},
+                         lcfl=MAX_LCFL, maxdt=MAX_DT,
+                         cfl=MAX_CFL,
+                         io_params=dt_output,
+                         mpi_params=mpi_params_UW)
+
+# Operators discretizations
+if box.is_on_task(TASK_SCALAR):
+    for op in (advec_scal, gradp_op, filter_scal,
+               baroclinic_rhs_op, diffusion_scal):
+        op.discretize()
+if box.is_on_task(TASK_UW):
+    for op in (advec, stretch, diffusion, poisson, c, dt_adapt, baroclinic):
+        op.discretize()
+
+if IS_OUTPUT:
+    # Defining subdomains
+    L_Vx = 1. - 1. / (1. * USER_NB_ELEM_UW[0])
+    L_Sx = 1. - 1. / (1. * USER_NB_ELEM_S[0])
+    L_Vy = 1. - 1. / (1. * USER_NB_ELEM_UW[1])
+    L_Sy = 1. - 1. / (1. * USER_NB_ELEM_S[1])
+    L_Vz = 1. - 1. / (1. * USER_NB_ELEM_UW[2])
+    L_Sz = 1. - 1. / (1. * USER_NB_ELEM_S[2])
+    XY_plane_v = SubBox(origin=[0., 0., 1.], length=[L_Vx, L_Vy, 0.],
+                        parent=box)
+    XZ_plane_v = SubBox(origin=[0., 0.5, 0.], length=[L_Vx, 0., L_Vz],
+                        parent=box)
+    YZ_plane_s = SubBox(origin=[0.5, 0., 0.], length=[0., L_Sy, L_Sz],
+                        parent=box)
+    XZ_plane_s = SubBox(origin=[0., 0.5, 0.], length=[L_Sx, 0., L_Sz],
+                        parent=box)
+    # Defining output operators
+    p_velo = HDF_Writer(variables={velo: topo_C_1d, vorti: topo_C_1d},
+                        mpi_params=mpi_params_UW,
+                        io_params=IOParams(frequency=out_freq,
+                                           filename='flow',
+                                           fileformat=HDF5))
+    p_scal_yz = HDF_Writer(variables={scal: topo_F_2d},
+                           var_names={scal: 'Scalar'},
+                           subset=YZ_plane_s,
+                           mpi_params=mpi_params_S,
+                           io_params=IOParams(frequency=out_freq,
+                                              filename='scal_YZ',
+                                              fileformat=HDF5))
+    p_scal_xz = HDF_Writer(variables={scal: topo_F_2d},
+                           var_names={scal: 'Scalar'},
+                           subset=XZ_plane_s,
+                           mpi_params=mpi_params_S,
+                           io_params=IOParams(frequency=out_freq,
+                                              filename='scal_XZ',
+                                              fileformat=HDF5))
+    p_scal = HDF_Writer(variables={scal: topo_F_2d},
+                        var_names={scal: 'Scalar'},
+                        mpi_params=mpi_params_S,
+                        io_params=IOParams(frequency=out_freq,
+                                           filename='scal',
+                                           fileformat=HDF5))
+    p_scal_yz.name += '_(Scalar)'
+    p_scal_xz.name += '_(Scalar)'
+    p_scal.name += '_(Scalar)'
+    energy = EnergyEnstrophy(velocity=velo,
+                             vorticity=vorti,
+                             discretization=topo_C_1d,
+                             mpi_params=mpi_params_UW,
+                             io_params=IOParams(frequency=1,
+                                                filename='energy.dat',
+                                                fileformat=ASCII))
+    maxvelo = CustomMonitor(variables={velo: topo_C_1d},
+                            function=calc_maxvelo,
+                            res_shape=3,
+                            mpi_params=mpi_params_UW,
+                            io_params=IOParams(frequency=1,
+                                               filename='maxvelo.dat',
+                                               fileformat=ASCII))
+    if box.is_on_task(TASK_UW):
+        for op in (p_velo, energy, maxvelo):
+            op.discretize()
+    if box.is_on_task(TASK_SCALAR):
+        for op in (p_scal_yz, p_scal_xz, p_scal):
+            op.discretize()
+
+# Redistribute operators
+# CPU redistributes
+W_C_2d_to_2G = RedistributeIntra(variables=[vorti],
+                                 source=advec, target=stretch,
+                                 mpi_params=mpi_params_UW)
+W_C_2d_to_2G.name += '_W_C_2d_to_2G'
+W_C_2d_2G_to_1d = RedistributeIntra(variables=[vorti],
+                                    source=baroclinic,  # stretch,
+                                    target=diffusion,
+                                    mpi_params=mpi_params_UW)
+W_C_2d_2G_to_1d.name += '_W_C_2d_2G_to_1d'
+W_C_1d_to_C_2d_2G = RedistributeIntra(variables=[vorti],
+                                      source=diffusion, target=dt_adapt,
+                                      mpi_params=mpi_params_UW)
+W_C_1d_to_C_2d_2G.name += '_W_C_1d_to_C_2d_2G'
+W_C_1d_to_C_2d = RedistributeIntra(variables=[vorti],
+                                   source=diffusion, target=advec,
+                                   mpi_params=mpi_params_UW)
+W_C_1d_to_C_2d.name += "_W_C_1d_to_C_2d"
+U_C_1d_to_C_2d_2G = RedistributeIntra(variables=[velo],
+                                      source=poisson, target=stretch,
+                                      run_till=[stretch, dt_adapt],
+                                      mpi_params=mpi_params_UW)
+U_C_1d_to_C_2d_2G.name += '_U_C_1d_to_C_2d_2G'
+U_C_1d_to_C_2d = RedistributeIntra(variables=[velo],
+                                   source=poisson, target=advec,
+                                   mpi_params=mpi_params_UW)
+U_C_1d_to_C_2d.name += '_U_C_1d_to_C_2d'
+toDevU = DataTransfer(source=topo_C_2d_2G, target=advec_scal,
+                      variables={velo: topo_C_2d_2G},
+                      run_till=[advec_scal, gradp_op],
+                      mpi_params=mpi_params_S)
+toDevU.name += '_ToDev_U'
+U_CPU_to_GPU = RedistributeInter(variables=[velo],
+                                 parent=main_comm,
+                                 source=topo_C_1d, target=topo_C_2d_2G,
+                                 source_id=TASK_UW, target_id=TASK_SCALAR,
+                                 run_till=[toDevU])
+rhs_GPU_to_CPU = RedistributeInter(variables=[baroclinic_rhs],
+                                   parent=main_comm,
+                                   source=topo_C_2d_2G, target=topo_C_2d_2G,
+                                   source_id=TASK_SCALAR, target_id=TASK_UW,
+                                   run_till=[baroclinic])
+rhs_GPU_to_CPU.name += '_rhs_GPU_to_CPU'  # ok
+toHostRHS = DataTransfer(source=topo_C_2d_2G,
+                         target=rhs_GPU_to_CPU,
+                         variables={baroclinic_rhs: topo_C_2d_2G},
+                         mpi_params=mpi_params_S,)
+toHostRHS.name += '_ToHost_RHS'
+toDevGradP = DataTransfer(source=topo_C_2d_2G, target=baroclinic_rhs_op,
+                          variables={gradp: topo_C_2d_2G},
+                          mpi_params=mpi_params_S)
+toDevGradP.name += '_ToDev_GradP'
+
+if IS_OUTPUT:
+    toHostS = DataTransfer(source=topo_F_2d, target=p_scal_yz,
+                           variables={scal: topo_F_2d},
+                           mpi_params=mpi_params_S,
+                           freq=out_freq,
+                           run_till=[p_scal_xz, p_scal_yz, p_scal])
+    toHostS.name += '_ToHost_S'
+
+# Operators setup
+if box.is_on_task(TASK_SCALAR):
+    for op in (advec_scal, gradp_op,
+               baroclinic_rhs_op, filter_scal, diffusion_scal):
+        op.setup()
+    if IS_OUTPUT:
+        for op in (p_scal_yz, p_scal_xz, p_scal, toHostS):
+            op.setup()
+    for op in (toDevU, toHostRHS, toDevGradP):
+        op.setup()
+if box.is_on_task(TASK_UW):
+    for op in (advec, stretch, diffusion, poisson, c, dt_adapt, baroclinic):
+        op.setup()
+    if IS_OUTPUT:
+        for op in (p_velo, energy, maxvelo):
+            op.setup()
+    for op in (W_C_2d_to_2G, W_C_2d_2G_to_1d,
+               W_C_1d_to_C_2d, W_C_1d_to_C_2d_2G,
+               U_C_1d_to_C_2d, U_C_1d_to_C_2d_2G):
+        op.setup()
+# Wait for all operators setup before setup the intra-comm redistribute
+main_comm.Barrier()
+U_CPU_to_GPU.setup()
+rhs_GPU_to_CPU.setup()
+
+# Operators list
+if IS_OUTPUT:
+    operators_list = [toDevU, gradp_op, toDevGradP,
+                      advec_scal, diffusion_scal,
+                      toHostS, p_scal_yz, p_scal_xz, p_scal,
+                      baroclinic_rhs_op, filter_scal,
+                      toHostRHS, rhs_GPU_to_CPU,
+                      advec, W_C_2d_to_2G, stretch, baroclinic,
+                      W_C_2d_2G_to_1d, diffusion, poisson,
+                      p_velo, energy, maxvelo,
+                      U_CPU_to_GPU,
+                      U_C_1d_to_C_2d, U_C_1d_to_C_2d_2G,
+                      W_C_1d_to_C_2d, W_C_1d_to_C_2d_2G, dt_adapt]
+else:
+    operators_list = [toDevU, advec_scal, diffusion_scal,
+                      advec, W_C_2d_to_2G, stretch,
+                      W_C_2d_2G_to_1d, diffusion, poisson,
+                      energy, maxvelo,
+                      U_CPU_to_GPU,
+                      U_C_1d_to_C_2d, U_C_1d_to_C_2d_2G,
+                      W_C_1d_to_C_2d, W_C_1d_to_C_2d_2G, dt_adapt]
+
+# Fields initializations
+if box.is_on_task(TASK_SCALAR):
+    scal.initialize(topo=topo_F_2d)
+    advec_dirX = advec_scal.advec_dir[0].discrete_op
+    gpu_scal = advec_dirX.fields_on_grid[0]
+    gpu_pscal = advec_dirX.fields_on_part[gpu_scal][0]
+    diffusion_scal.discrete_op.set_field_tmp(gpu_pscal)
+    if IS_OUTPUT:
+        p_scal_yz.apply(simu)
+        p_scal_xz.apply(simu)
+        p_scal.apply(simu)
+if box.is_on_task(TASK_UW):
+    velo.initialize(topo=topo_C_1d)
+    c.apply(simu)
+    poisson.apply(simu)
+    U_C_1d_to_C_2d.apply(simu)
+    U_C_1d_to_C_2d_2G.apply(simu)
+    W_C_1d_to_C_2d.apply(simu)
+    W_C_1d_to_C_2d_2G.apply(simu)
+    if IS_OUTPUT:
+        p_velo.apply(simu)
+        # p_velo_xy.apply(simu)
+        # p_velo_xz.apply(simu)
+U_CPU_to_GPU.apply(simu)
+U_CPU_to_GPU.wait()
+if box.is_on_task(TASK_SCALAR):
+    toDevU.apply(simu)
+    toDevU.wait()
+    gradp_op.initialize_velocity()
+
+simu.initialize()
+setup_time = MPI.Wtime() - ctime
+main_comm.Barrier()
+
+# Solve
+solve_time = 0.
+while not simu.isOver:
+    ctime = MPI.Wtime()
+    if main_rank == 0:
+        simu.printState()
+    for op in operators_list:
+        if box.is_on_task(op.task_id()):
+            op.apply(simu)
+    if box.is_on_task(TASK_SCALAR):
+        # Wait gpu operations on scalar
+        advec_scal.advec_dir[0].discreteFields[scal].wait()
+    solve_time += MPI.Wtime() - ctime
+    dt_adapt.wait()
+    # Synchronize threads
+    main_comm.Barrier()
+    simu.advance()
+
+U_CPU_to_GPU.wait()
+main_comm.Barrier()
+nb_ite = simu.currentIteration
+simu.finalize()
+
+
+if IS_OUTPUT:
+    if box.is_on_task(TASK_SCALAR):
+        p_scal_yz.apply(simu)
+        p_scal_xz.apply(simu)
+        p_scal.apply(simu)
+    if box.is_on_task(TASK_UW):
+        p_velo.apply(simu)
+        # p_velo_xy.apply(simu)
+        # p_velo_xz.apply(simu)
+
+for op in operators_list:
+    if box.is_on_task(op.task_id()):
+        op.finalize()
+
+velo.finalize()
+if box.is_on_task(TASK_SCALAR):
+    scal.finalize()
+if box.is_on_task(TASK_UW):
+    vorti.finalize()
+main_comm.Barrier()
diff --git a/Examples/Multiphase/create_random_arrays.py b/Examples/Multiphase/create_random_arrays.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb68fc081cc057b807251de4de8231bb6ecdf601
--- /dev/null
+++ b/Examples/Multiphase/create_random_arrays.py
@@ -0,0 +1,39 @@
+import os
+import numpy as np
+from hysop.constants import HYSOP_REAL, ORDER
+
+
+def random_init(shape, mpi_comm):
+    # Create a folder to store all random arrays
+    d = 'rand_init'
+    if mpi_comm.Get_rank() == 0:
+        if not os.path.exists(d):
+            os.makedirs(d)
+    mpi_comm.Barrier()
+    file_name = "{0}_{1}_{2}".format(*shape)
+    file_name += "_{0}p_{1}.dat".format(mpi_comm.Get_size(),
+                                        mpi_comm.Get_rank())
+    try:
+        randX = np.asarray(
+            np.reshape(np.fromfile(os.path.join(d, 'randX_' + file_name),
+                                   dtype=HYSOP_REAL), shape),
+            dtype=HYSOP_REAL, order=ORDER)
+        randY = np.asarray(
+            np.reshape(np.fromfile(os.path.join(d, 'randY_' + file_name),
+                                   dtype=HYSOP_REAL), shape),
+            dtype=HYSOP_REAL, order=ORDER)
+        randZ = np.asarray(
+            np.reshape(np.fromfile(os.path.join(d, 'randZ_' + file_name),
+                                   dtype=HYSOP_REAL), shape),
+            dtype=HYSOP_REAL, order=ORDER)
+    except IOError:
+        randX = np.asarray(np.random.random(shape),
+                           dtype=HYSOP_REAL, order=ORDER) - 0.5
+        randY = np.asarray(np.random.random(shape),
+                           dtype=HYSOP_REAL, order=ORDER) - 0.5
+        randZ = np.asarray(np.random.random(shape),
+                           dtype=HYSOP_REAL, order=ORDER) - 0.5
+        randX.tofile(os.path.join(d, 'randX_' + file_name))
+        randY.tofile(os.path.join(d, 'randY_' + file_name))
+        randZ.tofile(os.path.join(d, 'randZ_' + file_name))
+    return randX, randY, randZ
diff --git a/HySoP/hysop/gpu/gpu_kernel.py b/HySoP/hysop/gpu/gpu_kernel.py
index 411e141ce0e176fc0c52a10a1fce4f77a3de454d..3508f316f69b905aaf2d7f918544b9c6b10deee8 100644
--- a/HySoP/hysop/gpu/gpu_kernel.py
+++ b/HySoP/hysop/gpu/gpu_kernel.py
@@ -2,10 +2,9 @@
 @file gpu_kernel.py
 """
 from hysop.constants import debug, S_DIR
-#from hysop import __VERBOSE__
+from hysop import __VERBOSE__
 from hysop.gpu import cl, CL_PROFILE
 from hysop.tools.profiler import FProfiler
-__VERBOSE__ = True
 
 class KernelListLauncher(object):
     """
diff --git a/HySoP/hysop/gpu/multi_gpu_particle_advection.py b/HySoP/hysop/gpu/multi_gpu_particle_advection.py
index 53f9803ab65d25ae6fc7f2ca293c3a151553a0c1..3a7c68c77c02ffbfa97fb3f117489a8b3523728f 100644
--- a/HySoP/hysop/gpu/multi_gpu_particle_advection.py
+++ b/HySoP/hysop/gpu/multi_gpu_particle_advection.py
@@ -126,16 +126,16 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
                 self.max_cfl_s = int(max_cfl * scale_factor) + 1
                 self.max_cfl_v = int(max_cfl) + 1
         else:
-        try:
-            self.max_cfl_s = int(max_velocity[self.direction] * max_dt /
-                                 self._space_step[self.direction]) + 1
-            self.max_cfl_v = int(max_velocity[self.direction] * max_dt /
-                                 self._v_space_step[self.direction]) + 1
-        except TypeError:
-            self.max_cfl_s = int(max_velocity * max_dt /
-                                 self._space_step[self.direction]) + 1
-            self.max_cfl_v = int(max_velocity * max_dt /
-                                 self._v_space_step[self.direction]) + 1
+            try:
+                self.max_cfl_s = int(max_velocity[self.direction] * max_dt /
+                                     self._space_step[self.direction]) + 1
+                self.max_cfl_v = int(max_velocity[self.direction] * max_dt /
+                                     self._v_space_step[self.direction]) + 1
+            except TypeError:
+                self.max_cfl_s = int(max_velocity * max_dt /
+                                     self._space_step[self.direction]) + 1
+                self.max_cfl_v = int(max_velocity * max_dt /
+                                     self._v_space_step[self.direction]) + 1
 
         # Slice
         self._sl_dim = slice(self.dim, 2 * self.dim)