diff --git a/hysop/__init__.py b/hysop/__init__.py
index d877538ec4dceb20559c6473c599f7edcb6c6f27..279e86abeb02be2bfb61b0e3665f0412e9c2564c 100644
--- a/hysop/__init__.py
+++ b/hysop/__init__.py
@@ -20,7 +20,7 @@ __VERBOSE__        = False
 __DEBUG__          = False
 __TRACE__          = False
 __TRACE_WARNINGS__ = False
-__KERNEL_DEBUG__   = False
+__KERNEL_DEBUG__   = True
 __PROFILE__        = True
 
 __ENABLE_LONG_TESTS__ = "OFF" is "ON"
diff --git a/hysop/backend/device/autotunable_kernel.py b/hysop/backend/device/autotunable_kernel.py
index b253355bb735da350ff5a10326ad21028b5226c9..f41023994586f0d74eb3c6b68b537599441bd860 100644
--- a/hysop/backend/device/autotunable_kernel.py
+++ b/hysop/backend/device/autotunable_kernel.py
@@ -643,4 +643,13 @@ class AutotunerWorkConfiguration(object):
                 return False
             oldval = val
         return True
-
+    
+    @abstractmethod
+    def make_array_offset(self, dim):
+        pass
+    @abstractmethod
+    def make_array_strides(self, dim):
+        pass
+    @abstractmethod
+    def make_array_args(self, **arrays):
+        pass
diff --git a/hysop/backend/device/codegen/kernels/directional_remesh.py b/hysop/backend/device/codegen/kernels/directional_remesh.py
index f8f93aa9292cf4524a381f12ed95e94bb39cce47..10a4141e96f17b1925c67e0182e6aa8b1b010903 100644
--- a/hysop/backend/device/codegen/kernels/directional_remesh.py
+++ b/hysop/backend/device/codegen/kernels/directional_remesh.py
@@ -2,44 +2,37 @@
 import contextlib
 from contextlib import contextmanager
 
-from hysop.deps import math, operator, hashlib
-
 from hysop import __VERBOSE__, __KERNEL_DEBUG__
+from hysop.deps import np, math, operator, hashlib
 
 from hysop.tools.misc import Utils, upper_pow2_or_3
 from hysop.tools.types import check_instance
 from hysop.tools.numpywrappers import npw
-
-from hysop.deps import np
 from hysop.constants import DirectionLabels, BoundaryCondition, Backend, Precision
+
 from hysop.core.arrays.all import OpenClArray
+from hysop.numerics.remesh.remesh import RemeshKernel
+from hysop.fields.continuous_field import Field
+from hysop.fields.discrete_field import DiscreteFieldView
 
-from hysop.backend.device.opencl import cl
+from hysop.backend.device.opencl                      import cl, clTools, clCharacterize
+from hysop.backend.device.opencl.opencl_env           import OpenClEnvironment
+from hysop.backend.device.opencl.opencl_types         import OpenClTypeGen
+from hysop.backend.device.opencl.opencl_array_backend import OpenClArrayBackend
 
-from hysop.backend.device.codegen import CodeGeneratorWarning
+from hysop.backend.device.codegen                     import CodeGeneratorWarning
+from hysop.backend.device.codegen.base.utils          import WriteOnceDict, ArgDict
+from hysop.backend.device.codegen.base.statistics     import WorkStatistics
+from hysop.backend.device.codegen.base.variables      import CodegenStruct
+from hysop.backend.device.codegen.structs.mesh_info   import MeshBaseStruct, MeshInfoStruct
 from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator
 from hysop.backend.device.codegen.base.kernel_codegen import KernelCodeGenerator
 from hysop.backend.device.codegen.base.variables      import CodegenVariable, \
         CodegenVectorClBuiltin, CodegenArray
-from hysop.backend.device.opencl import cl, clTools
-from hysop.backend.device.opencl.opencl_types          import OpenClTypeGen
-from hysop.backend.device.codegen.base.utils          import WriteOnceDict, ArgDict
-from hysop.backend.device.codegen.base.statistics     import WorkStatistics
-
-from hysop.backend.device.codegen.base.variables          import CodegenStruct
-from hysop.backend.device.codegen.structs.mesh_info       import MeshBaseStruct, MeshInfoStruct
-
-from hysop.backend.device.opencl import cl, clCharacterize
-from hysop.backend.device.opencl.opencl_env import OpenClEnvironment
-
-from hysop.fields.continuous_field import Field
-from hysop.fields.discrete_field import DiscreteFieldView
-from hysop.core.arrays.all import OpenClArrayBackend
-from hysop.numerics.remesh.remesh import RemeshKernel
-from hysop.constants import DirectionLabels
 
 from hysop.backend.device.codegen.functions.directional_remesh import DirectionalRemeshFunction
 
+
 class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
 
     @staticmethod
diff --git a/hysop/backend/device/codegen/kernels/transpose.py b/hysop/backend/device/codegen/kernels/transpose.py
index 83049193b69f52bf40dbbb9a12a46c83c4749327..e193d89edb51e0a5090c2ec0ee00b3da65493884 100644
--- a/hysop/backend/device/codegen/kernels/transpose.py
+++ b/hysop/backend/device/codegen/kernels/transpose.py
@@ -1,13 +1,12 @@
-import operator
-import numpy as np
 from contextlib import contextmanager, nested
-
+from hysop.deps import np, operator
 from hysop.tools.misc import upper_pow2_or_3, prod
 from hysop.tools.decorators import static_vars
 from hysop.tools.numpywrappers import npw
 from hysop.tools.types import check_instance
 from hysop.tools.misc import upper_pow2
 from hysop.tools.units import bytes2str
+from hysop.backend.device.opencl.opencl_array_backend import OpenClArrayBackend
 from hysop.backend.device.opencl import clCharacterize
 from hysop.backend.device.opencl.opencl_types import OpenClTypeGen
 from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator
@@ -152,7 +151,7 @@ class TransposeKernelGenerator(KernelCodeGenerator):
         return (sc,dc,tc)
    
     def __init__(self, typegen, ctype, vectorization,
-            axes, tile_size, tile_padding,
+            axes, tile_size, tile_padding, symbolic_mode,
             use_diagonal_coordinates = True,
             is_inplace = False,
             known_vars = None,
@@ -225,7 +224,8 @@ class TransposeKernelGenerator(KernelCodeGenerator):
         name = TransposeKernelGenerator.codegen_name(is_inplace, axes, ctype,
                 tile_size, tile_padding, vectorization, use_diagonal_coordinates)
 
-        kernel_args = self.gen_kernel_arguments(typegen, ctype, Pdim, debug_mode, is_inplace)
+        kernel_args = self.gen_kernel_arguments(typegen, ctype, Pdim, debug_mode, is_inplace,
+                known_vars, symbolic_mode)
 
         super(self.__class__,self).__init__(
                 name=name,
@@ -233,6 +233,7 @@ class TransposeKernelGenerator(KernelCodeGenerator):
                 work_dim=work_dim, 
                 known_vars = known_vars,
                 kernel_args = kernel_args,
+                symbolic_mode=symbolic_mode,
                 **kargs)
 
         if debug_mode:
@@ -288,22 +289,33 @@ class TransposeKernelGenerator(KernelCodeGenerator):
         return reqs
 
     
-    def gen_kernel_arguments(self, typegen, ctype, Pdim, debug_mode, is_inplace):
+    def gen_kernel_arguments(self, typegen, ctype, Pdim, debug_mode, is_inplace,
+            known_vars, symbolic_mode):
         _global = OpenClCodeGenerator.default_keywords['global']
         tg = typegen
+        mesh_dim = Pdim
         
         kargs  = ArgDict()
         if is_inplace:
-            kargs['inout'] = CodegenVariable(ctype=ctype, name='inout',
-                    typegen=tg, storage=_global, ptr=True, 
-                    ptr_const=True, ptr_restrict=True, nl=True)
+            data, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='inout',
+                    known_vars=known_vars, symbolic_mode=symbolic_mode,
+                    storage=self._global, ctype=ctype, typegen=typegen,
+                    mesh_dim=mesh_dim, const=False, ptr_restrict=True)
+            self.inout_strides = strides
+            self.inout_data = data
         else:
-            kargs['in']    = CodegenVariable(ctype=ctype, name='in',
-                    typegen=tg, storage=_global, const=True, ptr=True, 
-                    add_impl_const=True, ptr_restrict=True, nl=True)
-            kargs['out']   = CodegenVariable(ctype=ctype, name='out',
-                    typegen=tg, storage=_global, ptr=True, 
-                    ptr_const=True, ptr_restrict=True, nl=True)
+            in_data, in_strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='in',
+                    known_vars=known_vars, symbolic_mode=symbolic_mode,
+                    storage=self._global, ctype=ctype, typegen=typegen,
+                    mesh_dim=mesh_dim, const=True, ptr_restrict=True)
+            out_data, out_strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='out',
+                    known_vars=known_vars, symbolic_mode=symbolic_mode,
+                    storage=self._global, ctype=ctype, typegen=typegen,
+                    mesh_dim=mesh_dim, const=False, ptr_restrict=True)
+            self.in_data     = in_data
+            self.out_data    = out_data
+            self.in_strides  = in_strides
+            self.out_strides = out_strides
 
         if debug_mode:
             n_dbg_arrays = self.n_dbg_arrays
@@ -372,12 +384,15 @@ class TransposeKernelGenerator(KernelCodeGenerator):
         
         S = s.vars['shape']
         if is_inplace:
-            _inout = s.vars['inout']
-            _in  = _inout
-            _out = _inout
+            _in          = self.inout_data
+            _out         = self.inout_data
+            _in_strides  = self.inout_strides
+            _out_strides = self.inout_strides
         else:
-            _in    = s.vars['in']
-            _out   = s.vars['out']
+            _in          = self.in_data
+            _out         = self.out_data
+            _in_strides  = self.in_strides
+            _out_strides = self.out_strides
 
         if debug_mode:
             dbg = [ s.vars['dbg{}'.format(i)] for i in xrange(n_dbg_arrays) ]
@@ -420,29 +435,25 @@ class TransposeKernelGenerator(KernelCodeGenerator):
             i = pdim-1-k
             j = pdim-1-axes[k]
             if i==pdim-1:
-                tile_offset_in  = '{}'.format(idx[i])
-                tile_offset_out = '{}'.format(idx[j])
+                tile_offset_in  = '{}*{}'.format(_in_strides[i],  idx[i])
+                tile_offset_out = '{}*{}'.format(_out_strides[i], idx[j])
             else:
-                tile_offset_in  = '({}*{}+{})'.format(tile_offset_in,  S[i], idx[i])
-                tile_offset_out = '({}*{}+{})'.format(tile_offset_out, S[j], idx[j])
+                tile_offset_in  += ' $+ {}*{}'.format(_in_strides[i],  idx[i])
+                tile_offset_out += ' $+ {}*{}'.format(_out_strides[i], idx[j])
 
             if i in tile_indexes:
                 if ki==tdim-1:
-                    local_offset_in  = '{}'.format(lidx[ki])
+                    local_offset_in = '{}*{}'.format(_in_strides[i], lidx[ki])
                 else:
-                    local_offset_in  = '({}*{}+{})'.format(local_offset_in,  S[i], lidx[ki])
+                    local_offset_in += ' $+ {}*{}'.format(_in_strides[i], lidx[ki])
                 ki-=1
-            elif local_offset_in != '':
-                local_offset_in  = '{}*{}'.format(local_offset_in,  S[i])
             
             if j in tile_indexes:
                 if kj==tdim-1:
-                    local_offset_out = '{}'.format(lidx[kj])
+                    local_offset_out = '{}*{}'.format(_out_strides[i], lidx[kj])
                 else:
-                    local_offset_out = '({}*{}+{})'.format(local_offset_out, S[j], lidx[kj])
+                    local_offset_out += ' $+ {}*{}'.format(_out_strides[i], lidx[kj])
                 kj -= 1
-            elif local_offset_out != '':
-                local_offset_out = '{}*{}'.format(local_offset_out, S[j])
 
         assert ki==-1
         assert kj==-1
@@ -464,13 +475,13 @@ class TransposeKernelGenerator(KernelCodeGenerator):
             else:
                 loc_id = '({}*{}+{})'.format(loc_id, local_size[i], local_id[i])
 
-        tile_offset_in  = CodegenVariable('tile_offset_in',  'int', tg, 
+        tile_offset_in  = CodegenVariable('tile_offset_in',  'ulong', tg, 
                 init=tile_offset_in, const=True)
-        tile_offset_out = CodegenVariable('tile_offset_out', 'int', tg,
+        tile_offset_out = CodegenVariable('tile_offset_out', 'ulong', tg,
                 init=tile_offset_out, const=True)
-        local_offset_in = CodegenVariable('local_offset_in', 'int', tg,
+        local_offset_in = CodegenVariable('local_offset_in', 'ulong', tg,
                 init=local_offset_in, const=True)
-        local_offset_out = CodegenVariable('local_offset_out', 'int', tg,
+        local_offset_out = CodegenVariable('local_offset_out', 'ulong', tg,
                 init=local_offset_out, const=True)
        
         TID = CodegenVariable('TID', 'int', tg, const=True,
@@ -568,6 +579,10 @@ class TransposeKernelGenerator(KernelCodeGenerator):
             s.decl_aligned_vars(global_id, local_id, group_id,
                                 global_size, local_size, group_size, 
                                 const=True)
+            ptrs = (_in,)
+            if not is_inplace:
+                ptrs+=(_out,)
+            s.decl_aligned_vars(*ptrs)
             s.jumpline()
             nwork.declare(s)
             ntiles.declare(s)
@@ -696,7 +711,7 @@ to prevent memory camping that may occur during global input read or output writ
 if __name__ == '__main__':
     from hysop.backend.device.codegen.base.test import _test_typegen
     tg = _test_typegen('float')
-    ek = TransposeKernelGenerator(typegen=tg, ctype='char', vectorization=2,
+    ek = TransposeKernelGenerator(typegen=tg, ctype='short', vectorization=2,
             axes=(2,1,0,4,3),
             tile_size=8, tile_padding=1,
             is_inplace=False,
diff --git a/hysop/backend/device/kernel_autotuner.py b/hysop/backend/device/kernel_autotuner.py
index 1417617e49e9012b6fcd46a257d3cd94e7806183..a5e0fb0efae54999a66ffff340eda6b979cc6495 100644
--- a/hysop/backend/device/kernel_autotuner.py
+++ b/hysop/backend/device/kernel_autotuner.py
@@ -346,6 +346,13 @@ class KernelAutotuner(object):
             raise ValueError(msg)
 
         args_list = [None,]*len(args_mapping)
+        arg_indices = set(ka[0] for ka in args_mapping.values())
+        if arg_indices != set(xrange(len(arg_indices))):
+            msg='Illformed argument position mapping:\n'
+            msg+='\n'.join('  >argument {}: {}'.format(argpos, argname) for (argname, argpos)
+                    in zip(args_mapping.keys(), arg_indices))
+            msg+='\nExpected contiguous integer argument positions.'
+            raise ValueError(msg)
         for (arg_name, arg_value) in kernel_args.iteritems():
             if (arg_name not in args_mapping):
                 msg='Unknown argument {}, valid ones are {}.'
diff --git a/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py b/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py
index 5a4404e34265227e1d33efa6c71de4879f9eefa8..8cabda7e2f9359fa92a5dbadb3982cd26cbb0b04 100644
--- a/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py
+++ b/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py
@@ -122,28 +122,17 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
                 msg=msg.format(dsin.name, dsin.dtype, dsout.name, dsout.dtype, precision.__name__)
                 raise NotImplementedError(msg)
        
-        typegen    = self.cl_env.typegen
-        ndim       = upper_pow2_or_3(position.dim)
-        ivecn      = typegen.uintn(ndim) 
-        def make_ivecn(bstrides, dtype): 
-            msg='Invalid strides {} for dtype {} (itemsize={}).'.format(bstrides,
-                    dtype.__class__.__name__, dtype.itemsize)
-            assert (npw.mod(bstrides, dtype.itemsize) == 0).all(), msg
-            data = typegen.make_uintn(
-                vals=tuple(x//dtype.itemsize for x in bstrides[::-1]), 
-                n=ndim, dval=0)
-            if hardcode_arrays:
-                return data.tolist()[:ndim]
-            else:
-                return data
+        make_offset, offset_dtype = self.make_array_offset()
+        make_strides, strides_dtype = self.make_array_strides(position.dim, 
+                hardcode_arrays=hardcode_arrays)
         
         kernel_args = {}
         known_args = {}
         target_args = known_args if hardcode_arrays else kernel_args
 
         kernel_args['position_base']    = position[0].base_data
-        target_args['position_strides'] = make_ivecn(position[0].strides, position.dtype)
-        target_args['position_offset']  = npw.uint64(position[0].offset)
+        target_args['position_strides'] = make_strides(position[0].strides, position.dtype)
+        target_args['position_offset']  = make_offset(position[0].offset, position.dtype)
         mesh_info_vars = { 'position_mesh_info':
                                 self.mesh_info('position_mesh_info', position.mesh) }
 
@@ -155,8 +144,8 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
                 for j in xrange(dsinout.nb_components):
                     prefix = 'S{}_{}_inout'.format(i,j)
                     kernel_args[prefix+'_base']    = dsinout[j].base_data
-                    target_args[prefix+'_strides'] = make_ivecn(dsinout[j].strides, dsinout.dtype)
-                    target_args[prefix+'_offset']  = npw.uint64(dsinout[j].offset)
+                    target_args[prefix+'_strides'] = make_strides(dsinout[j].strides, dsinout.dtype)
+                    target_args[prefix+'_offset']  = make_offset(dsinout[j].offset, dsinout.dtype)
                     arg_index += 1 + 2*(1-hardcode_arrays)
             assert i == nfields-1
         else:
@@ -166,8 +155,8 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
                 for j in xrange(dsin.nb_components):
                     prefix = 'S{}_{}_in'.format(i,j)
                     kernel_args[prefix+'_base']    = dsin[j].base_data
-                    target_args[prefix+'_strides'] = make_ivecn(dsin[j].strides, dsin.dtype)
-                    target_args[prefix+'_offset']  = npw.uint64(dsin[j].offset)
+                    target_args[prefix+'_strides'] = make_strides(dsin[j].strides, dsin.dtype)
+                    target_args[prefix+'_offset']  = make_offset(dsin[j].offset, dsin.dtype)
                     arg_index += 1 + 2*(1-hardcode_arrays)
             assert i == nfields-1
             for (i,dsout) in enumerate(scalars_out):
@@ -176,8 +165,8 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
                 for j in xrange(dsout.nb_components):
                     prefix = 'S{}_{}_out'.format(i,j)
                     kernel_args[prefix+'_base']    = dsout[j].base_data
-                    target_args[prefix+'_strides'] = make_ivecn(dsout[j].strides, dsout.dtype)
-                    target_args[prefix+'_offset']  = npw.uint64(dsout[j].offset)
+                    target_args[prefix+'_strides'] = make_strides(dsout[j].strides, dsout.dtype)
+                    target_args[prefix+'_offset']  = make_offset(dsout[j].offset, dsout.dtype)
                     arg_index += 1 + 2*(1-hardcode_arrays)
             assert i == nfields-1
         assert len(kernel_args) == arg_index
@@ -192,7 +181,8 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
                 force_atomics=force_atomics, min_nparticles=min_nparticles, ftype=ftype,
                 scalar_cfl=scalar_cfl, kernel_args=kernel_args, mesh_info_vars=mesh_info_vars,
                 work_dim=work_dim, work_size=work_size, min_wg_size=min_wg_size, 
-                known_args=known_args, hardcode_arrays=hardcode_arrays, ivecn=ivecn, **kwds)
+                known_args=known_args, hardcode_arrays=hardcode_arrays,
+                offset_dtype=offset_dtype, strides_dtype=strides_dtype, **kwds)
 
 
     def compute_args_mapping(self, extra_kwds, extra_parameters):
@@ -209,7 +199,8 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
         scalars_in      = extra_kwds['scalars_in']
         scalars_out     = extra_kwds['scalars_out']
         nscalars        = extra_kwds['nscalars']
-        ivecn           = extra_kwds['ivecn']
+        strides_dtype   = extra_kwds['strides_dtype']
+        offset_dtype    = extra_kwds['offset_dtype']
         hardcode_arrays = extra_kwds['hardcode_arrays']
         
         args_mapping = {}
@@ -218,8 +209,8 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
         args_mapping['position_base'] = (0, cl.MemoryObjectHolder)
         arg_index += 1
         if not hardcode_arrays:
-            args_mapping['position_strides'] = (1, ivecn) 
-            args_mapping['position_offset']  = (2, npw.uint64) 
+            args_mapping['position_strides'] = (1, strides_dtype) 
+            args_mapping['position_offset']  = (2, offset_dtype) 
             arg_index += 2
 
         if is_inplace:
@@ -229,8 +220,8 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
                     args_mapping[prefix+'_base'] = (arg_index, cl.MemoryObjectHolder)
                     arg_index+=1
                     if not hardcode_arrays:
-                        args_mapping[prefix+'_strides'] = (arg_index+0, ivecn)
-                        args_mapping[prefix+'_offset']  = (arg_index+1, npw.uint64)
+                        args_mapping[prefix+'_strides'] = (arg_index+0, strides_dtype)
+                        args_mapping[prefix+'_offset']  = (arg_index+1, offset_dtype)
                         arg_index += 2
         else:
             for (i,dsin) in enumerate(scalars_in):
@@ -239,8 +230,8 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
                     args_mapping[prefix+'_base']   = (arg_index, cl.MemoryObjectHolder)
                     arg_index += 1
                     if not hardcode_arrays:
-                        args_mapping[prefix+'_strides'] = (arg_index+0, ivecn)
-                        args_mapping[prefix+'_offset']  = (arg_index+1, npw.uint64)
+                        args_mapping[prefix+'_strides'] = (arg_index+0, strides_dtype)
+                        args_mapping[prefix+'_offset']  = (arg_index+1, offset_dtype)
                         arg_index += 2
             for (i,dsout) in enumerate(scalars_out):
                 for j in xrange(dsout.nb_components):
@@ -248,8 +239,8 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
                     args_mapping[prefix+'_base']   = (arg_index, cl.MemoryObjectHolder)
                     arg_index += 1
                     if not hardcode_arrays:
-                        args_mapping[prefix+'_strides'] = (arg_index+0, ivecn)
-                        args_mapping[prefix+'_offset']  = (arg_index+1, npw.uint64)
+                        args_mapping[prefix+'_strides'] = (arg_index+0, strides_dtype)
+                        args_mapping[prefix+'_offset']  = (arg_index+1, offset_dtype)
                         arg_index += 2
         assert len(args_mapping)==arg_index
         assert arg_index == (1+2*(1-hardcode_arrays))*(1+(2-is_inplace)*nscalars)
diff --git a/hysop/backend/device/opencl/autotunable_kernels/transpose.py b/hysop/backend/device/opencl/autotunable_kernels/transpose.py
index 78d282253f3b14bb3a8dda1b03f6a6e7dd233b07..d03e4232757c6fa79f4d682771ef56382a67638f 100644
--- a/hysop/backend/device/opencl/autotunable_kernels/transpose.py
+++ b/hysop/backend/device/opencl/autotunable_kernels/transpose.py
@@ -1,7 +1,7 @@
 
 from hysop.tools.numpywrappers import npw
 from hysop.tools.types import check_instance
-from hysop.tools.misc import upper_pow2, previous_pow2
+from hysop.tools.misc import upper_pow2, previous_pow2, upper_pow2_or_3
 from hysop.tools.units import bytes2str
 from hysop.constants import AutotunerFlags
 from hysop.backend.device.opencl import cl, clTools
@@ -31,7 +31,8 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel):
 
     def autotune(self, is_inplace, 
             input_field, output_field, 
-            axes, name=None, **kwds):
+            axes, hardcode_arrays,
+            name=None, **kwds):
         """Autotune this kernel with specified axes, inputs and outputs."""
 
         check_instance(axes, tuple, values=int)
@@ -51,18 +52,46 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel):
         assert axes != tuple(range(dim))
         
         # check if is_inplace is allowed
-        assert (is_inplace == (input_field.dfield == output_field.dfield))
         if is_inplace:
-            #Only 2D square matrix inplace transposition is supported
-            compute_inplace  = (dim == 2)
-            compute_inplace &= all(shape[0]==shape)
-        else:
-            compute_inplace = False
+            can_compute_inplace  = (dim == 2)
+            can_compute_inplace &= all(shape[0]==shape)
+            msg='Inplace was specified but this is only possible for 2D square arrays.'
+            if not can_compute_inplace:
+                raise ValueError(msg)
+            assert input_field.dfield == output_field.dfield
+        
+        # get vector size for strides
+        make_offset, offset_dtype = self.make_array_offset()
+        make_strides, strides_dtype = self.make_array_strides(input_field.dim, 
+                hardcode_arrays=hardcode_arrays)
+        
+        # check that all component share strides and offsets
+        if hardcode_arrays:
+            assert all(npw.array_equal(input_field[i].offset, input_field[0].offset) 
+                        for i in xrange(input_field.nb_components)), 'Cannot hardcode mismatching array offsets.'
+            assert all(npw.array_equal(input_field[i].strides, input_field[0].strides) 
+                        for i in xrange(input_field.nb_components)), 'Cannot hardcode mismatching array strides.'
+            assert all(npw.array_equal(output_field[i].offset, output_field[0].offset) 
+                        for i in xrange(output_field.nb_components)), 'Cannot hardcode mismatching array offsets.'
+            assert all(npw.array_equal(output_field[i].strides, output_field[0].strides) 
+                        for i in xrange(output_field.nb_components)), 'Cannot hardcode mismatching array strides.'
+        
+        kernel_args = {}
+        known_args = {}
+        target_args = known_args if hardcode_arrays else kernel_args
         
-        if compute_inplace:
-            kernel_args = dict(inout=input_field(0).data)
+        if is_inplace:
+            kernel_args['inout_base']    = input_field(0).data
+            target_args['inout_strides'] = make_strides(input_field(0).strides, input_field.dtype)
+            target_args['inout_offset']  = make_offset(input_field(0).offset, input_field.dtype)
         else:
-            kernel_args = dict(input=input_field(0).data, output=output_field(0).data)
+            kernel_args['in_base']     = input_field(0).data
+            target_args['in_strides']  = make_strides(input_field(0).strides, input_field.dtype)
+            target_args['in_offset']   = make_offset(input_field(0).offset, input_field.dtype)
+
+            kernel_args['out_base']    = output_field(0).data
+            target_args['out_strides'] = make_strides(output_field(0).strides, output_field.dtype)
+            target_args['out_offset']  = make_offset(output_field(0).offset, output_field.dtype)
 
         if (name is None):
             name = 'transpose_{}_[{}]_{}'.format(ctype,
@@ -75,11 +104,11 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel):
             TransposeKernelGenerator.characterize_permutation(shape, axes, 
                     self.max_device_work_dim())
         
-        
         # keyword arguments will be agregated into extra_kwds dictionnary
         return super(OpenClAutotunableTransposeKernel, self).autotune(name=name, 
                 kernel_args=kernel_args, 
-                compute_inplace=compute_inplace,
+                known_args=known_args, hardcode_arrays=hardcode_arrays,
+                offset_dtype=offset_dtype, strides_dtype=strides_dtype,
                 axes=axes, 
                 dtype=dtype, 
                 ctype=ctype,
@@ -89,6 +118,7 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel):
                 tile_indices=tile_indices, 
                 work_dim=work_dim,
                 work_size=work_shape,
+                is_inplace=is_inplace,
                 last_axe_permuted=last_axe_permuted, **kwds)
 
     def compute_parameters(self, extra_kwds): 
@@ -188,7 +218,8 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel):
         ## Extract usefull variables
         axes       = extra_kwds['axes']
         ctype      = extra_kwds['ctype']
-        is_inplace = extra_kwds['compute_inplace']
+        is_inplace = extra_kwds['is_inplace']
+        known_args = extra_kwds['known_args']
 
         ## Get compile time OpenCL known variables
         known_vars = super(OpenClAutotunableTransposeKernel, self).generate_kernel_src(
@@ -196,7 +227,7 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel):
                 local_work_size=local_work_size, 
                 extra_parameters=extra_parameters, 
                 extra_kwds=extra_kwds)
-
+        known_vars.update(known_args)
         known_vars['shape'] = self.to_vecn(extra_kwds['shape'], 0)
             
         ## Generate OpenCL source code
@@ -214,30 +245,31 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel):
         return (kernel_name, kernel_src)
 
     def compute_args_mapping(self, extra_kwds, extra_parameters):
-        if extra_kwds['compute_inplace']:
-            args_mapping = { 'inout': (0, cl.MemoryObjectHolder) }
+        args_mapping = {}
+        offset_dtype    = extra_kwds['offset_dtype']
+        strides_dtype   = extra_kwds['strides_dtype']
+        hardcode_arrays = extra_kwds['hardcode_arrays']
+        if extra_kwds['is_inplace']:
+            args_mapping['inout_base'] = (0, cl.MemoryObjectHolder) 
+            if not hardcode_arrays:
+                args_mapping['inout_strides'] = (1, strides_dtype) 
+                args_mapping['inout_offset']  = (2, offset_dtype)
         else:
-            args_mapping = { 'input' : (0, cl.MemoryObjectHolder), 
-                             'output': (1, cl.MemoryObjectHolder) }
-        return args_mapping
-
-    def format_best_candidate(self, **kwds):
-        """
-        Post treatment callback for autotuner results.
-        Transform autotuner results in user friendly kernel wrappers.
+            args_mapping['in_base'] = (0, cl.MemoryObjectHolder) 
+            if not hardcode_arrays:
+                args_mapping['in_strides'] = (1, strides_dtype) 
+                args_mapping['in_offset']  = (2, offset_dtype)
 
-        Return a OpenClKernel with default_queue and default_args set to None.
-        Only default_global_size, default_local_size, and args_mapping are set.
+            args_mapping['out_base'] = (1 + 2*(not hardcode_arrays), cl.MemoryObjectHolder) 
+            if not hardcode_arrays:
+                args_mapping['out_strides'] = (4, strides_dtype) 
+                args_mapping['out_offset']  = (5, offset_dtype)
+        return args_mapping
 
-        Use the build_launcher method to build OpenClKernelLauncher from this OpenClKernel.
-        """
-        res = super(OpenClAutotunableTransposeKernel, self).format_best_candidate(**kwds)
-        compute_inplace = kwds['extra_kwds']['compute_inplace']
-        return res + (compute_inplace,)
-    
     def hash_extra_kwds(self, extra_kwds):
         """Hash extra_kwds dictionnary for caching purposes."""
-        return hash((extra_kwds['ctype'], 
+        return self.custom_hash(extra_kwds['ctype'], 
                      extra_kwds['axes'], 
-                     tuple(extra_kwds['shape'].tolist()),
-                     extra_kwds['compute_inplace']))
+                     extra_kwds['shape'],
+                     extra_kwds['is_inplace'],
+                     extra_kwds['known_args'])
diff --git a/hysop/backend/device/opencl/opencl_array_backend.py b/hysop/backend/device/opencl/opencl_array_backend.py
index 61dea8d155234c29129162f2f18371f9e34911e5..29016dd56b484c2da549dad6f36aaecf8705a4e1 100644
--- a/hysop/backend/device/opencl/opencl_array_backend.py
+++ b/hysop/backend/device/opencl/opencl_array_backend.py
@@ -3175,7 +3175,8 @@ class OpenClArrayBackend(ArrayBackend):
             offset_str = typegen.dump(np.uint64(known_vars[offset]))
         else:
             offset_str = offset
-        init = '({})(({})({})+{})'.format(ctype_alias, char_alias, base, offset_str)
+        # init = '({})(({})({})+{})'.format(ctype_alias, char_alias, base, offset_str)
+        init = '{} $+ {}'.format(base, offset_str)
 
         array = CodegenVariable(name=name, typegen=typegen, 
                 ctype=ctype, ptr=ptr, const=const,
diff --git a/hysop/backend/device/opencl/opencl_autotunable_kernel.py b/hysop/backend/device/opencl/opencl_autotunable_kernel.py
index a261662ca08295bb7c2d4f1d0f4e141a76127ac5..5d84e363b0748c7270045ebea65f1309f92d3ee8 100644
--- a/hysop/backend/device/opencl/opencl_autotunable_kernel.py
+++ b/hysop/backend/device/opencl/opencl_autotunable_kernel.py
@@ -10,10 +10,11 @@ from hysop.tools.units import bytes2str
 
 from hysop.backend.device.kernel_autotuner import KernelGenerationError
 from hysop.backend.device.autotunable_kernel import AutotunableKernel, AutotunerWorkConfiguration
-from hysop.backend.device.opencl import cl, clTools, clCharacterize
+from hysop.backend.device.opencl import cl, clTools, clCharacterize, clArray
 from hysop.backend.device.opencl.opencl_env import OpenClEnvironment
 from hysop.backend.device.opencl.opencl_types import OpenClTypeGen
 from hysop.backend.device.opencl.opencl_kernel import OpenClKernel
+from hysop.backend.device.opencl.opencl_array import OpenClArray
 from hysop.backend.device.opencl.opencl_kernel_statistics import OpenClKernelStatistics
 
 class OpenClAutotunableKernel(AutotunableKernel):
@@ -211,4 +212,52 @@ class OpenClAutotunableKernel(AutotunableKernel):
         res = npw.full(shape=(vsize,), dtype=vec.dtype, fill_value=extend)
         res[:vec.size] = vec
         return res
+    
+    def make_array_offset(self):
+        offset_dtype = npw.uint64
+        def make_offset(offset, dtype):
+            """Build an offset in number of elements instead of bytes."""
+            msg='Unaligned offset {} for dtype {} (itemsize={}).'.format(offset, 
+                    dtype, dtype.itemsize)
+            assert (offset % dtype.itemsize) == 0
+            return offset_dtype(offset // dtype.itemsize)
+        return make_offset, offset_dtype
+
+    def make_array_strides(self, dim, hardcode_arrays):
+        """Build array strides in number of elements instead of bytes."""
+        typegen       = self.cl_env.typegen
+        ndim          = upper_pow2_or_3(dim)
+        strides_dtype = typegen.uintn(ndim) 
+        def make_strides(bstrides, dtype):
+            msg='Invalid strides {} for dtype {} (itemsize={}).'.format(bstrides,
+                    dtype.__class__.__name__, dtype.itemsize)
+            assert (npw.mod(bstrides, dtype.itemsize) == 0).all(), msg
+            data = typegen.make_uintn(
+                vals=tuple(x//dtype.itemsize for x in bstrides[::-1]), 
+                n=ndim, dval=0)
+            if hardcode_arrays:
+                return data.tolist()[:ndim]
+            else:
+                return data
+        return make_strides, strides_dtype
+
+    def build_array_args(self, hardcode_arrays=False, **arrays):
+        kernel_args = {}
+        for name, data in arrays.iteritems():
+            check_instance(data, (OpenClArray, clArray.Array))
+            base = '{}_base'.format(name)
+            kernel_args[base] = data.base_data
+            if not hardcode_arrays:
+                dim = data.ndim
+                make_offset, _  = self.make_array_offset()
+                make_strides, _ = self.make_array_strides(dim=dim, 
+                        hardcode_arrays=hardcode_arrays)
+                offset  = '{}_offset'.format(name)
+                strides = '{}_strides'.format(name)
+                kernel_args[offset]  = make_offset(data.offset, data.dtype)
+                kernel_args[strides] = make_strides(data.strides, data.dtype)
+        return kernel_args 
+
+
+            
 
diff --git a/hysop/backend/device/opencl/opencl_kernel.py b/hysop/backend/device/opencl/opencl_kernel.py
index 3154d6398a33c8f2b6b24c88b9993e8e40fe88f7..360ebe3c060c34c8c57c81706f24ef4ebbe46d8f 100644
--- a/hysop/backend/device/opencl/opencl_kernel.py
+++ b/hysop/backend/device/opencl/opencl_kernel.py
@@ -100,7 +100,7 @@ class OpenClKernel(object):
                 raise ValueError(msg)
         for argname, (argpos, argtype) in args_mapping.iteritems():
             assert isinstance(argpos, int)
-            if not isinstance(argtype, type):
+            if not isinstance(argtype, (type, npw.dtype)):
                 check_instance(argtype, tuple, values=type)
             if argname in default_args:
                 argval = default_args[argname]
@@ -188,7 +188,20 @@ class OpenClKernel(object):
                 msg=msg.format(arg_name, ', '.join(args_mapping.keys()))
                 raise ValueError(msg)
             (arg_index, arg_types) = args_mapping[arg_name]
-            if not isinstance(arg_value, arg_types):
+            if isinstance(arg_types, npw.dtype):
+                msg=None
+                if not isinstance(arg_value, npw.ndarray):
+                    msg='Argument {} at position {} should be a np.ndarray, got a {}.'
+                    msg=msg.format(arg_name, arg_index, type(arg_value))
+                elif not arg_value.dtype == arg_types:
+                    msg='Argument {} at position {} is a np.ndarray of wrong dtype, got a {}, expected a {}.'
+                    msg=msg.format(arg_name, arg_index, type(arg_value), arg_types)
+                elif not arg_value.size == 1:
+                    msg='Argument {} at position {} is not a scalar np.ndarray, shape={}, size={}.'
+                    msg=msg.format(arg_name, arg_index, arg_value.shape, arg_value.size)
+                if (msg is not None):
+                    raise ValueError(msg)
+            elif not isinstance(arg_value, arg_types):
                 msg='Argument {} at position {} should be of type {} but got a {}.'
                 msg=msg.format(arg_name, arg_index, arg_types, type(arg_value))
                 raise TypeError(msg)
diff --git a/hysop/backend/device/opencl/operator/directional/advection_dir.py b/hysop/backend/device/opencl/operator/directional/advection_dir.py
index fd7acfe208973b1ad40b803c1c5aff5510f7034e..cd5c9b28a2fb53d6b81aeb23e35a3c9259cbd13d 100644
--- a/hysop/backend/device/opencl/operator/directional/advection_dir.py
+++ b/hysop/backend/device/opencl/operator/directional/advection_dir.py
@@ -66,8 +66,8 @@ class OpenClDirectionalAdvection(DirectionalAdvectionBase, OpenClDirectionalOper
         self.relax_min_particles = relax_min_particles
         self.remesh_criteria_eps = remesh_criteria_eps
 
-        self._force_autotuner_verbose = False
-        self._force_autotuner_debug   = False
+        self._force_autotuner_verbose = None
+        self._force_autotuner_debug   = None
 
     @debug
     def get_work_properties(self):
diff --git a/hysop/backend/device/opencl/operator/transpose.py b/hysop/backend/device/opencl/operator/transpose.py
index ba9a415a6551713196d2a078d53289bb393a6d4c..6c20bda7d6a45d0810f0a5b9b9a1d395c0417b2d 100644
--- a/hysop/backend/device/opencl/operator/transpose.py
+++ b/hysop/backend/device/opencl/operator/transpose.py
@@ -11,10 +11,6 @@ class OpenClTranspose(TransposeOperatorBase, OpenClOperator):
     def __init__(self, **kwds):
         super(OpenClTranspose, self).__init__(**kwds)
 
-    @debug
-    def discretize(self):
-        super(OpenClTranspose,self).discretize()
-    
     @debug
     def setup(self, work):
         super(OpenClTranspose, self).setup(work)
@@ -36,21 +32,33 @@ class OpenClTranspose(TransposeOperatorBase, OpenClOperator):
 
         kernel = OpenClAutotunableTransposeKernel(cl_env, build_opts, autotuner_config)
 
-        (transpose, _, compute_inplace) = kernel.autotune(axes=axes, 
-                is_inplace=is_inplace, input_field=input_field, output_field=output_field)
+        if is_inplace:
+            #Only 2D square matrix inplace transposition is supported
+            compute_inplace  = (input_field.dim == 2)
+            compute_inplace &= all(input_field.shape[0]==input_field.shape)
+        else:
+            compute_inplace = False
+
+        hardcode_arrays = (compute_inplace or not is_inplace)
+        transpose, _ = kernel.autotune(axes=axes, force_debug=True,
+                hardcode_arrays=hardcode_arrays,
+                is_inplace=compute_inplace, input_field=input_field, output_field=output_field)
         
         launcher = OpenClKernelListLauncher(name=transpose.name)
         for i in xrange(self.nb_components):
             if compute_inplace:
-                launcher += transpose.build_launcher(inout=input_field[i].data)
+                assert hardcode_arrays 
+                launcher += transpose.build_launcher(inout_base=input_field[i].base_data)
             elif is_inplace:
-                launcher += transpose.build_launcher(input=input_field[i].data, 
-                                                        output=self.dtmp.data)
+                assert not hardcode_arrays 
+                kernel_kargs = kernel.build_array_args(**{'in':input_field[i], 'out':self.dtmp})
+                launcher += transpose.build_launcher(**kernel_kargs)
                 launcher.push_copy_device_to_device(varname='tmp', src=self.dtmp, 
                                                                    dst=input_field[i])
             else:
-                launcher += transpose.build_launcher(input=input_field[i].data, 
-                                                    output=output_field[i].data)
+                assert hardcode_arrays 
+                launcher += transpose.build_launcher(in_base=input_field[i].base_data, 
+                                                     out_base=output_field[i].base_data)
         self._kernel_launcher = launcher
     
     def enqueue_copy_kernel(self, _dst, _src, queue):