diff --git a/examples/analytic/analytic.py b/examples/analytic/analytic.py
index 56958347975f4e323f9a7b5addb1cd0365519962..631e14b44f8f8e36b9752f6786f7be6440dab4dc 100755
--- a/examples/analytic/analytic.py
+++ b/examples/analytic/analytic.py
@@ -36,9 +36,12 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
         
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, platform_id=args.cl_platform_id, 
-                                                                 device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
+
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
         method = {OpenClKernelConfig: args.opencl_kernel_config}
diff --git a/examples/bubble/periodic_bubble.py b/examples/bubble/periodic_bubble.py
index cc8f7a138c35d66e8ff29fcdb3b932a18c432eb0..af738c800b67e56f6653af9c5a652238dd4a9d76 100644
--- a/examples/bubble/periodic_bubble.py
+++ b/examples/bubble/periodic_bubble.py
@@ -94,10 +94,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, 
-                                          platform_id=args.cl_platform_id, 
-                                          device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
diff --git a/examples/bubble/periodic_bubble_levelset.py b/examples/bubble/periodic_bubble_levelset.py
index ced517f22adad4f3e8e7af7fb61f3c01be2100ab..810a926e963a76e365ee9bd4a85ef3e90951b4f4 100644
--- a/examples/bubble/periodic_bubble_levelset.py
+++ b/examples/bubble/periodic_bubble_levelset.py
@@ -80,10 +80,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, 
-                                          platform_id=args.cl_platform_id, 
-                                          device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
diff --git a/examples/bubble/periodic_bubble_levelset_penalization.py b/examples/bubble/periodic_bubble_levelset_penalization.py
index 74c3ba1366828bc71ffedf45b4ee8af59e5ecb37..05a3ef5d5763c25bfc0396b2a224d31b1faa3f8f 100644
--- a/examples/bubble/periodic_bubble_levelset_penalization.py
+++ b/examples/bubble/periodic_bubble_levelset_penalization.py
@@ -89,10 +89,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, 
-                                          platform_id=args.cl_platform_id, 
-                                          device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
diff --git a/examples/bubble/periodic_jet_levelset.py b/examples/bubble/periodic_jet_levelset.py
index 9a3c95be85fae69acfad6f04a483f38f625e9ab0..ac99a865cfba414847367fbdf70957f0faf134e1 100644
--- a/examples/bubble/periodic_jet_levelset.py
+++ b/examples/bubble/periodic_jet_levelset.py
@@ -73,10 +73,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, 
-                                          platform_id=args.cl_platform_id, 
-                                          device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
diff --git a/examples/cylinder/oscillating_cylinder.py b/examples/cylinder/oscillating_cylinder.py
index d23498e1d855c474596a0e6bf8cb34a6c0c5845b..73f3e41df6bbdac3ff9ea2bdeaeacf9872797974 100644
--- a/examples/cylinder/oscillating_cylinder.py
+++ b/examples/cylinder/oscillating_cylinder.py
@@ -79,10 +79,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, 
-                                          platform_id=args.cl_platform_id, 
-                                          device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
diff --git a/examples/fixed_point/heat_equation.py b/examples/fixed_point/heat_equation.py
index b11af4ab32a0c4f787a3366e532f4ef832a233a6..518a773813524c64f33b4f973e1ddfeca2a0cecd 100644
--- a/examples/fixed_point/heat_equation.py
+++ b/examples/fixed_point/heat_equation.py
@@ -8,12 +8,14 @@ iterative method is used to compute  steady state solution.
 import numpy as np
 import sympy as sp
 
+pos = [0.5, 0.5, 0.5]
+RADIUS = 0.2
+chi  = lambda x,y,z: np.sqrt((x-pos[0])*(x-pos[0])+(y-pos[1])*(y-pos[1])+0.*z)<=RADIUS
+schi = lambda x,y,z: sp.sqrt((x-pos[0])*(x-pos[0])+(y-pos[1])*(y-pos[1])+0.*z)<=RADIUS
+
 # Analytic operator for computing source term
 def CS( data, coords, t, component):
     (x, y, z) = coords
-    pos = [0.5, 0.5, 0.5]
-    RADIUS = 0.2
-    chi = lambda x,y,z: np.sqrt((x-pos[0])*(x-pos[0])+(y-pos[1])*(y-pos[1])+0.*z)<=RADIUS
     data[...] = 0.
     data[chi(x,y,z)] = np.cos(t())
 def init_u(data, coords, component):
@@ -56,9 +58,10 @@ def compute(args):
 
         # Create an explicit OpenCL context from user parameters
         from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params,
-                                          platform_id=args.cl_platform_id,
-                                          device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
 
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
@@ -89,8 +92,7 @@ def compute(args):
             name="BCAndSourceTerm",
             exprs=(
                 # Source term
-                Assignment(utemp, Select(u.s(), uSources.s(),
-                                         sp.sqrt((x0-0.5)*(x0-0.5)+(x1-0.5)*(x1-0.5)+0.*x2)<=0.2)),
+                Assignment(utemp, Select(u.s(), uSources.s(), schi(x0, x1, x2))),
                 # BC enforcement
                 Assignment(utemp, Select(utemp, 0., x0<0.1)),
                 Assignment(utemp, Select(utemp, 0., x0>0.9)),
@@ -101,8 +103,14 @@ def compute(args):
             **extra_op_kwds)
     else:
         def computeSRC(src_in, u_in, u_out):
-            print src_in.shape, u_in.shape, u_out.shape
-            assert False
+            (x, y, z) =  src_in.mesh_coords
+            # Source term
+            u_out.data[0][chi(x,y,z)] = src_in.data[0][chi(x,y,z)]
+            # BC enforcment
+            u_out.data[0][:,:,x[0,0,:]<0.1] = 0.
+            u_out.data[0][:,:,x[0,0,:]>0.9] = 0.
+            u_out.data[0][:,y[0,:,0]<0.1,:] = 0.
+            u_out.data[0][:,y[0,:,0]>0.9,:] = 0.
         source = CustomOperator(
             implementation=Implementation.PYTHON,
             func=computeSRC,
@@ -110,16 +118,26 @@ def compute(args):
             invars=(uSources, u), outvars=(u,),
             **extra_op_kwds)
     # Diffusion operator
-    diffuse_op = DirectionalDiffusion(
-        implementation=impl,
-        name='diffuse',
-        fields=(u),
-        coeffs=(.1,),
-        variables={u: npts},
-        dt=pseudo_dt,
-        **extra_op_kwds)
-    diffuse = StrangSplitting(splitting_dim=dim, order=args.strang_order)
-    diffuse.push_operators(diffuse_op)
+    if impl is Implementation.FORTRAN or impl is Implementation.PYTHON:
+        diffuse = Diffusion(
+            implementation=impl,
+            name='diffuse',
+            Fin=u, Fout=u,
+            nu=.1,
+            variables={u: npts},
+            dt=pseudo_dt,
+            **extra_op_kwds)
+    else:
+        diffuse_op = DirectionalDiffusion(
+            implementation=impl,
+            name='diffuse',
+            fields=(u),
+            coeffs=(.1,),
+            variables={u: npts},
+            dt=pseudo_dt,
+            **extra_op_kwds)
+        diffuse = StrangSplitting(splitting_dim=dim, order=args.strang_order)
+        diffuse.push_operators(diffuse_op)
     # Convergence operator with absolute error
     conv = Convergence(convergence=convergence,
                        method={ResidualError: ResidualError.ABSOLUTE},
diff --git a/examples/flow_around_sphere/flow_around_sphere.py b/examples/flow_around_sphere/flow_around_sphere.py
index 66bb40b6dbf3eed5654262c6458a3ed7f2892da4..807d0d26c20d2b8278d0fbced56498d3d18fcb2c 100644
--- a/examples/flow_around_sphere/flow_around_sphere.py
+++ b/examples/flow_around_sphere/flow_around_sphere.py
@@ -19,7 +19,8 @@ def compute(args):
                                 PoissonCurl, AdaptiveTimeStep, HDF_Writer,          \
                                 Enstrophy, MinMaxFieldStatistics, StrangSplitting,    \
                                 ParameterPlotter, PenalizeVorticity, FlowRateCorrection, \
-                                VorticityAbsorption, CustomOperator
+                                VorticityAbsorption, CustomOperator, DirectionalAdvection, \
+                                DirectionalStretching
     from hysop.numerics.odesolvers.runge_kutta import RK2
     from hysop.methods import SpaceDiscretization, Remesh, TimeIntegrator, \
                               ComputeGranularity, Interpolation, StrangOrder
@@ -42,9 +43,32 @@ def compute(args):
                            task_id=box.current_task())
 
     # Setup usual implementation specific variables
-    impl = None
+    impl = args.impl
     extra_op_kwds = {'mpi_params': mpi_params}
+    implIsFortran = impl is Implementation.FORTRAN
     backend = Backend.HOST
+    if (impl is Implementation.PYTHON or implIsFortran):
+        method = {}
+    elif (impl is Implementation.OPENCL):
+        # For the OpenCL implementation we need to setup the compute device
+        # and configure how the code is generated and compiled at runtime.
+                
+        # Create an explicit OpenCL context from user parameters
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
+        
+        # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
+        from hysop.methods import OpenClKernelConfig
+        method = { OpenClKernelConfig: args.opencl_kernel_config }
+        
+        # Setup opencl specific extra operator keyword arguments
+        extra_op_kwds['cl_env'] = cl_env
+    else:
+        msg='Unknown implementation \'{}\'.'.format(impl)
+        raise ValueError(msg)
 
 
     # ====== Sphere inside the domain ======
@@ -135,16 +159,30 @@ def compute(args):
 
     ### Build the directional operators
     #> Directional advection
-    advec = Advection(
-        implementation=Implementation.FORTRAN,
-        name='advec',
-        velocity=velo,
-        advected_fields=(vorti,),
-        variables={velo: npts, vorti: npts},
-        dt=dt, **extra_op_kwds)
+    if implIsFortran:
+        advec = Advection(
+            implementation=impl,
+            name='advec',
+            velocity=velo,
+            advected_fields=(vorti,),
+            variables={velo: npts, vorti: npts},
+            dt=dt, **extra_op_kwds)
+    else:
+        advec_dir = DirectionalAdvection(
+            implementation=impl,
+            name='advec',
+            velocity = velo,       
+            advected_fields = (vorti, ),
+            velocity_cfl = args.cfl,
+            variables = {velo: npts, vorti: npts},
+            dt=dt, **extra_op_kwds)
     #> Directional stretching + diffusion
-    stretch = StaticDirectionalStretching(
-        implementation=Implementation.PYTHON,
+    if impl is Implementation.OPENCL:
+        StretchOp = DirectionalStretching
+    else:
+        StretchOp = StaticDirectionalStretching
+    stretch = StretchOp(
+        implementation=Implementation.PYTHON if implIsFortran else impl, 
         name='stretch',
         formulation=StretchingFormulation.CONSERVATIVE,
         velocity=velo,
@@ -154,6 +192,8 @@ def compute(args):
     #> Directional splitting operator subgraph
     splitting = StrangSplitting(splitting_dim=dim,
                                 order=StrangOrder.STRANG_FIRST_ORDER)
+    if not implIsFortran:
+        splitting.push_operators(advec_dir)
     splitting.push_operators(stretch)
     #> Penalization
     penal = PenalizeVorticity(
@@ -165,7 +205,7 @@ def compute(args):
         dt=dt, **extra_op_kwds)
     #> Diffusion operator
     diffuse = Diffusion(
-        implementation=Implementation.FORTRAN,
+        implementation=impl,
         name='diffuse',
         nu=viscosity,
         Fin=vorti,
@@ -182,7 +222,7 @@ def compute(args):
         dt=dt, **extra_op_kwds)
     #> Poisson operator to recover the velocity from the vorticity
     poisson = PoissonCurl(
-        implementation=Implementation.FORTRAN,
+        implementation=impl,
         name='poisson',
         velocity=velo,
         vorticity=vorti,
@@ -210,18 +250,18 @@ def compute(args):
 
     #> Operator to compute the infinite norm of the velocity
     min_max_U = MinMaxFieldStatistics(name='min_max_U', field=velo,
-            Finf=True, implementation=Implementation.PYTHON, variables={velo:npts},
+            Finf=True, implementation=Implementation.PYTHON if implIsFortran else impl, variables={velo:npts},
             **extra_op_kwds)
     #> Operator to compute the infinite norm of the vorticity
     min_max_W = MinMaxFieldStatistics(name='min_max_W', field=vorti,
-            Finf=True, implementation=Implementation.PYTHON, variables={vorti:npts},
+            Finf=True, implementation=Implementation.PYTHON if implIsFortran else impl, variables={vorti:npts},
             **extra_op_kwds)
     #> Operator to compute the enstrophy
     enstrophy_op = Enstrophy(
         name='enstrophy',
         vorticity=vorti, enstrophy=enstrophy, WdotW=wdotw,
         variables={vorti:npts, wdotw: npts},
-        implementation=Implementation.PYTHON, **extra_op_kwds)
+        implementation=Implementation.PYTHON if implIsFortran else impl, **extra_op_kwds)
 
     ### Adaptive timestep operator
     #TODO: move advection to GRAD_U
@@ -252,14 +292,16 @@ def compute(args):
         computeFlowrate,
         penal,
         splitting,
-        diffuse,
-        advec,
-        absorption,
-        poisson,
-        correctFlowrate,
-        enstrophy_op, min_max_U, min_max_W, dump_fields,
-        adapt_dt
-    )
+        diffuse)
+    if implIsFortran:
+        problem.insert(advec)
+    problem.insert(
+            absorption,
+            poisson,
+            correctFlowrate,
+            enstrophy_op, min_max_U, min_max_W, dump_fields,
+            adapt_dt
+        )
     problem.build()
 
     ## Create a simulation
diff --git a/examples/multiresolution/scalar_advection.py b/examples/multiresolution/scalar_advection.py
index 6b68a8a7e73117f967c4b375f748db381eb846bd..82e805e2b91c43f20e59fa2ae97522377c65aa29 100644
--- a/examples/multiresolution/scalar_advection.py
+++ b/examples/multiresolution/scalar_advection.py
@@ -61,9 +61,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, platform_id=args.cl_platform_id, 
-                                                                 device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning method
         # (already done by HysopArgParser for simplicity)
diff --git a/examples/particles_above_salt/particles_above_salt_bc.py b/examples/particles_above_salt/particles_above_salt_bc.py
index 5bc28c2415ee722e5bdb189a6d8ee57274b66e45..ce823f8c30da311706dc48e207ad71466d90c092 100644
--- a/examples/particles_above_salt/particles_above_salt_bc.py
+++ b/examples/particles_above_salt/particles_above_salt_bc.py
@@ -109,10 +109,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, 
-                                          platform_id=args.cl_platform_id, 
-                                          device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
diff --git a/examples/scalar_advection/levelset.py b/examples/scalar_advection/levelset.py
index 49565999eeb8dfb48a85fe24442962e7508c14a0..77cf72567e42c1663afd7c9394d18e355db12839 100644
--- a/examples/scalar_advection/levelset.py
+++ b/examples/scalar_advection/levelset.py
@@ -52,10 +52,12 @@ def compute(args):
     method = {}
     extra_op_kwds = { 'mpi_params': mpi_params }
     if (impl is Implementation.OPENCL):
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
         from hysop.methods import OpenClKernelConfig
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, platform_id=args.cl_platform_id,
-                                                                 device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         extra_op_kwds['cl_env'] = cl_env
         method[OpenClKernelConfig] = args.opencl_kernel_config
         backend = Backend.OPENCL
diff --git a/examples/scalar_advection/scalar_advection.py b/examples/scalar_advection/scalar_advection.py
index ab4f58743099ba20fa49c74560ce777fc5c12aab..7b133555eaf2fd9759bded5a4b3c4cfd5aed4a43 100644
--- a/examples/scalar_advection/scalar_advection.py
+++ b/examples/scalar_advection/scalar_advection.py
@@ -51,9 +51,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, platform_id=args.cl_platform_id, 
-                                                                 device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning method
         # (already done by HysopArgParser for simplicity)
diff --git a/examples/scalar_advection/turbulent_scalar_advection.py b/examples/scalar_advection/turbulent_scalar_advection.py
index a74186eafbef9517ad64407fa7db78e3e91aba52..22f4ffb7c5640d71a358f8c5b53a302908a65a5c 100644
--- a/examples/scalar_advection/turbulent_scalar_advection.py
+++ b/examples/scalar_advection/turbulent_scalar_advection.py
@@ -65,9 +65,9 @@ from hysop.tools.parameters import CartesianDiscretization
 
 # Define the domain
 dim  = 3
-npts_uw = (64, )*dim
-npts_s = (128, )*dim
-box  = Box(origin=(0., )*dim, length=(1.,)*dim, dim=dim)
+npts_uw = (64, 64, 128)
+npts_s = (128, 128, 256)
+box  = Box(origin=(0., )*dim, length=(1.,1., 2.), dim=dim)
 # Physical parameters:
 # Flow viscosity
 VISCOSITY = 1e-4
@@ -84,10 +84,16 @@ dump_freq = 100
 mpi_params = MPIParams(comm=box.task_comm,
                        task_id=box.current_task())
 
+# Create an explicit OpenCL context from user parameters
+from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+cl_env = get_or_create_opencl_env(
+    mpi_params=mpi_params,
+    platform_id=0,
+    device_id=box.machine_rank%get_device_number())
 # Setup usual implementation specific variables
 impl = None
-extra_op_kwds = {'mpi_params': mpi_params}
-
+extra_op_kwds = {'mpi_params': mpi_params,
+                 'cl_env': cl_env}
 method = {}
 
 # Define parameters and field (time, timestep, velocity, vorticity, enstrophy)
@@ -103,7 +109,7 @@ topo_nogh = CartesianTopology(domain=box,
                               discretization=CartesianDiscretization(npts_uw,
                                   default_boundaries=True),
                               mpi_params=mpi_params,
-                              cutdirs=[False, False, True])
+                              cutdirs=[True, False, False])
 
 ### Build the directional operators
 #> Directional advection
diff --git a/examples/scalar_diffusion/scalar_diffusion.py b/examples/scalar_diffusion/scalar_diffusion.py
index e1db7dabc16712c598cf87c4fa9c36d8c34a110a..170359e4181a670c6c324018f24bc6f0e6f583ae 100755
--- a/examples/scalar_diffusion/scalar_diffusion.py
+++ b/examples/scalar_diffusion/scalar_diffusion.py
@@ -63,9 +63,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, platform_id=args.cl_platform_id,
-                                                                 device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
 
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
diff --git a/examples/sediment_deposit/sediment_deposit.py b/examples/sediment_deposit/sediment_deposit.py
index 239ac4221c8de2448290239cbae314dd3a62686e..3041c495b42494cd0d85ac6d65d2b07e00708a80 100644
--- a/examples/sediment_deposit/sediment_deposit.py
+++ b/examples/sediment_deposit/sediment_deposit.py
@@ -156,10 +156,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, 
-                                          platform_id=args.cl_platform_id, 
-                                          device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
diff --git a/examples/sediment_deposit/sediment_deposit_levelset.py b/examples/sediment_deposit/sediment_deposit_levelset.py
index 3ff078a5da6721326b1015293dcd2b75d0b3ac07..88db48550a170c19c4f0ce6e610436e458464fa8 100644
--- a/examples/sediment_deposit/sediment_deposit_levelset.py
+++ b/examples/sediment_deposit/sediment_deposit_levelset.py
@@ -175,10 +175,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, 
-                                          platform_id=args.cl_platform_id, 
-                                          device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
diff --git a/examples/shear_layer/shear_layer.py b/examples/shear_layer/shear_layer.py
index 03fd4e7d42acdd0b31eabda1dbd3e5795c6adfed..75d894648d74abb582af8ee22a6565e03f2adc26 100644
--- a/examples/shear_layer/shear_layer.py
+++ b/examples/shear_layer/shear_layer.py
@@ -41,10 +41,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
                 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params, 
-                                          platform_id=args.cl_platform_id, 
-                                          device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
         
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig
diff --git a/examples/taylor_green/taylor_green.py b/examples/taylor_green/taylor_green.py
index c00e88b984d138c00b2109326d01f0ef9ff6f2c3..2221b66c92b20898ccd3b5495b3b5a6efc9be18c 100644
--- a/examples/taylor_green/taylor_green.py
+++ b/examples/taylor_green/taylor_green.py
@@ -66,10 +66,11 @@ def compute(args):
         # and configure how the code is generated and compiled at runtime.
 
         # Create an explicit OpenCL context from user parameters
-        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env
-        cl_env = get_or_create_opencl_env(mpi_params=mpi_params,
-                                          platform_id=args.cl_platform_id,
-                                          device_id=args.cl_device_id)
+        from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number
+        cl_env = get_or_create_opencl_env(
+            mpi_params=mpi_params,
+            platform_id=args.cl_platform_id, 
+            device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None)
 
         # Configure OpenCL kernel generation and tuning (already done by HysopArgParser)
         from hysop.methods import OpenClKernelConfig