diff --git a/hysop/backend/device/opencl/opencl_copy_kernel_launchers.py b/hysop/backend/device/opencl/opencl_copy_kernel_launchers.py
index b75dd13efb15aa28ce0c8b8e305d924ddf8d884f..c472495cf2f8b42f9b00c5a4c38a999d9bdeff59 100644
--- a/hysop/backend/device/opencl/opencl_copy_kernel_launchers.py
+++ b/hysop/backend/device/opencl/opencl_copy_kernel_launchers.py
@@ -3,6 +3,7 @@ from hysop import vprint, dprint
 from hysop.deps import np
 from hysop.tools.decorators import debug
 from hysop.tools.types import check_instance, first_not_None
+from hysop.tools.misc import prod
 from hysop.tools.numpywrappers import npw
 from hysop.backend.device.opencl import cl, clArray
 from hysop.backend.device.opencl.opencl_kernel_launcher import OpenClKernelLauncher
@@ -27,7 +28,7 @@ class OpenClCopyKernelLauncher(OpenClKernelLauncher):
         assert 'is_blocking' not in kwds
 
         enqueue_copy_kwds['dest'] = dst
-        enqueue_copy_kwds['src'] = src
+        enqueue_copy_kwds['src']  = src
         if isinstance(src, np.ndarray) or isinstance(dst, np.ndarray):
             enqueue_copy_kwds['is_blocking'] = False
 
@@ -200,3 +201,171 @@ class OpenClCopyDevice2DeviceLauncher(OpenClCopyBufferLauncher):
         super(OpenClCopyDevice2DeviceLauncher, self).__init__(varname=varname, src=src, dst=dst, 
                 src_device_offset=src_device_offset, dst_device_offset=dst_device_offset,
                 byte_count=byte_count)
+
+
+
+
+
+
+
+
+
+
+
+class OpenClCopyBufferRectLauncher(OpenClCopyKernelLauncher):
+    """
+    Non-blocking OpenCL copy kernel between host buffers and/or opencl device 
+    rectangle subregions of buffers (OpenCL 1.1 and newer).
+    """
+    def __init__(self, varname, src, dst,
+            src_device_offset=None, 
+            dst_device_offset=None,
+            byte_count=None,
+            **kwds):
+        """
+        Initialize a (HOST <-> DEVICE) or a (DEVICE <-> DEVICE) rectangle
+        subregions copy kernel.
+
+        Parameters
+        ----------
+        varname: str
+            Name of the variable copied for loggin purposes.
+        src: cl.MemoryObjectHolder or np.ndarray
+            The source buffer.
+        dst: cl.MemoryObjectHolder or np.ndarray
+            The destination buffer.
+        region: tuple of ints
+            The 3D region to copy in terms of bytes for the 
+            first dimension and of elements for the two last dimensions.
+        src_origin: tuple of ints
+            The 3D offset in number of elements of the region associated with src buffer. 
+            The final src offset in bytes is computed from src_origin and src_pitch.
+        dst_origin: tuple of ints
+            The 3D offset in number of elements of the region associated with dst buffer. 
+            The final dst offset in bytes is computed from dst_origin and dst_pitch.
+        src_pitches: tuple of ints
+            The 2D pictches used to compute src offsets in bytes for 
+            the second and the third dimension.
+        dst_pitches: tuple of ints
+            The 2D pitches used to compute dst offsets in bytes for 
+            the second and the third dimension.
+        """
+        check_instance(src, (cl.MemoryObjectHolder, np.ndarray))
+        check_instance(dst, (cl.MemoryObjectHolder, np.ndarray))
+        check_instance(src_offset,  tuple, values=(int, np.integer), size=3)
+        check_instance(dst_offset,  tuple, values=(int, np.integer), size=3)
+        check_instance(src_pitches, tuple, values=(int, np.integer), size=2)
+        check_instance(dst_pitches, tuple, values=(int, np.integer), size=2)
+        
+        enqueue_copy_kwds = {}
+        enqueue_copy_kwds['region'] = region
+
+        if isinstance(src, np.ndarray) and \
+           isinstance(dst, np.ndarray):
+            msg='Host to host copy is not supported.'
+            raise RuntimeError(msg)
+        elif isinstance(src, cl.MemoryObjectHolder) and \
+             isinstance(dst, cl.MemoryObjectHolder):
+            enqueue_copy_kwds['src_origin']  = src_origin
+            enqueue_copy_kwds['src_pitches'] = src_pitches
+            enqueue_copy_kwds['dst_origin']  = dst_origin
+            enqueue_copy_kwds['dst_pitches'] = dst_pitches
+        elif isinstance(src, cl.MemoryObjectHolder) and \
+             isinstance(dst, np.ndarray):
+            enqueue_copy_kwds['host_origin']  = dst_origin
+            enqueue_copy_kwds['host_pitches'] = dst_pitches
+            enqueue_copy_kwds['buffer_origin']  = src_origin
+            enqueue_copy_kwds['buffer_pitches'] = src_pitches
+        elif isinstance(src, np.ndarray) and \
+             isinstance(dst, cl.MemoryObjectHolder):
+            enqueue_copy_kwds['host_origin']  = src_origin
+            enqueue_copy_kwds['host_pitches'] = src_pitches
+            enqueue_copy_kwds['buffer_origin']  = dst_origin
+            enqueue_copy_kwds['buffer_pitches'] = dst_pitches
+        else:
+            msg='The impossible happened.\n *src={}\n *dst={}'
+            msg=msg.format(type(src), type(dst))
+            raise ValueError(msg)
+
+        assert 'name' not in kwds
+        name = 'enqueue_copy_rect_{}__{}_to_{}'.format(varname,
+                'host' if isinstance(src, np.ndarray) else 'device',
+                'host' if isinstance(dst, np.ndarray) else 'device')
+        apply_msg='{}<<<{}>>>'.format(name, region)
+
+        super(OpenClCopyBufferLauncher, self).__init__(dst=dst, src=src, 
+                enqueue_copy_kwds=enqueue_copy_kwds, 
+                name=name, apply_msg=apply_msg, **kwds)
+
+    @classmethod
+    def _format_slices(cls, a, slices):
+        check_instance(a, (np.ndarray, Array))
+
+        shape = a.shape 
+        dtype = a.dtype
+        ndim  = a.ndim
+
+        if (not slices) or (slices is Ellipsis):
+            slices = (Ellipsis,)
+        check_instance(slices, tuple)
+
+        # expand ellipsis
+        if (Ellipsis in slices):
+            nellipsis = slices.count(Ellipsis):
+            msg='Only one Ellipsis can be passed.'
+            assert nellipsis==1, msg
+            eid = slices.find(Ellipsis)
+            missing_count = len(slices)-1 
+            missing_slices = tuple(slice(s) for s in xrange(eid, eid+missing_count))
+            full_slices = slices[:eid]+missing_slices+slices[eid+1:]
+            slices = full_slices
+        check_instance(slices, tuple, values=(int,slice), size=ndim)
+
+        # compute indices
+        indices = ()
+        for slc, si in zip(slices, shape):
+            if isinstance(slc, slice):
+                indices += slc.indices(si)
+            else: 
+                indices += (slc, slc+1, 1)
+        
+        # compute nelems
+        nelems = tuple( (idx[1]-idx[0]+idx[2]-1)//idx[2] for idx in indices )
+        estart = tuple( idx[0] for idx in indices )
+        offset = 0
+        region, origin, pitches = ()
+        for ne, es, si in zip(nelems, estart, shape):
+            offset *= si
+            pitches = tuple(p*si for p in pitches)
+            if (ne<=0) or (ne>=si):
+                msg='ne={}, si={}'.format(ne, si)
+                raise ValueError(msg)
+            elif (not region) and (ne==1):
+                offset += es
+            elif (not region) or (ne < si): 
+                origin  += (es,)
+                region  += (ne,)
+                pitches += (1,) 
+            else: 
+                assert ne == si, 'ne={}, si={}'.format(ne, si)
+                region[-1]  *= ne
+                pitches[-1] //= si
+
+         
+        nelems = prod(nelems)
+        nbytes = nelems * dtype.itemsize
+        return sdim, slices, dtype, nbytes
+
+        
+    @classmethod
+    def from_slices(src, dst, src_slices=None, dst_slices=None):
+        src_sdim, src_slices, src_dtype, src_bytes = cls._format_slices(src, src_slices)
+        dst_sdim, dst_slices, dst_dtype, dst_bytes = cls._format_slices(dst, dst_slices)
+        if (src_ndim != dst_ndim):
+            msg='Dimension mismatch between source and destination slices:'
+            msg+='\n src_slices: {}'
+            msg+='\n dst_slices: {}'
+            msg=msg.format(src_slices, dst_slices)
+
+
+