diff --git a/hysop/backend/device/codegen/functions/directional_remesh.py b/hysop/backend/device/codegen/functions/directional_remesh.py
index ff439bb0dc387302ef7cd89c6ab7c87148e0b0d4..68defa89f3af24678e34b3f65f4853c580d8cf5f 100644
--- a/hysop/backend/device/codegen/functions/directional_remesh.py
+++ b/hysop/backend/device/codegen/functions/directional_remesh.py
@@ -1,61 +1,63 @@
 from hysop.deps import sm, np, contextlib
 from hysop.tools.types import check_instance, first_not_None
-from hysop.backend.device.opencl.opencl_types           import OpenClTypeGen
+from hysop.backend.device.opencl.opencl_types import OpenClTypeGen
 
-from hysop.backend.device.codegen.base.opencl_codegen   import OpenClCodeGenerator
+from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator
 from hysop.backend.device.codegen.base.function_codegen import OpenClFunctionCodeGenerator
-from hysop.backend.device.codegen.base.variables        import CodegenVariable, CodegenVectorClBuiltin
-from hysop.backend.device.codegen.base.utils            import WriteOnceDict, ArgDict
-from hysop.backend.device.codegen.base.statistics       import WorkStatistics
+from hysop.backend.device.codegen.base.variables import CodegenVariable, CodegenVectorClBuiltin
+from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict
+from hysop.backend.device.codegen.base.statistics import WorkStatistics
 
 from hysop.backend.device.codegen.functions.polynomial import PolynomialFunction
 from hysop.backend.device.codegen.functions.custom_atomics import CustomAtomicFunction
 
 from hysop.constants import BoundaryCondition
 from hysop.numerics.remesh.remesh import RemeshKernel
+from hysop.numerics.remesh.kernel_generator import Kernel
 
 from hysop.numerics.stencil.stencil_generator import StencilGenerator
 
+
 class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
 
     def __init__(self, typegen, work_dim, itype, ftype,
-            nparticles, nscalars,
-            sboundary, remesh_kernel,
-            use_atomics, remesh_criteria_eps,
-            use_short_circuit=None,
-            known_args=None,
-            debug_mode=False):
-
-        check_instance(sboundary,tuple,values=BoundaryCondition)
-        check_instance(remesh_kernel, RemeshKernel)
+                 nparticles, nscalars,
+                 sboundary, remesh_kernel,
+                 use_atomics, remesh_criteria_eps,
+                 use_short_circuit=None,
+                 known_args=None,
+                 debug_mode=False):
+
+        check_instance(sboundary, tuple, values=BoundaryCondition)
+        check_instance(remesh_kernel, (RemeshKernel, Kernel))
         assert remesh_kernel.n % 2 == 0 or remesh_kernel.n == 1
         assert remesh_kernel.n > 0
 
         use_short_circuit = first_not_None(use_short_circuit, typegen.use_short_circuit_ops)
 
-        is_periodic     = (sboundary[0]==BoundaryCondition.PERIODIC) and \
-                          (sboundary[1]==BoundaryCondition.PERIODIC)
+        is_periodic = (sboundary[0] == BoundaryCondition.PERIODIC) and \
+            (sboundary[1] == BoundaryCondition.PERIODIC)
 
         ivtype = typegen.vtype(itype, nparticles)
         fvtype = typegen.vtype(ftype, nparticles)
 
         reqs = self.build_requirements(typegen, work_dim, itype, ftype,
-                ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel,
-                use_atomics, remesh_criteria_eps)
+                                       ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel,
+                                       use_atomics, remesh_criteria_eps)
 
-        (args,basename) = self.build_prototype(reqs, typegen, work_dim, itype, ftype,
-                ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel,
-                use_atomics, remesh_criteria_eps, is_periodic)
+        (args, basename) = self.build_prototype(reqs, typegen, work_dim, itype, ftype,
+                                                ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel,
+                                                use_atomics, remesh_criteria_eps, is_periodic)
 
-        super(DirectionalRemeshFunction,self).__init__(basename=basename,
-                output=None, typegen=typegen, inline=True,
-                args=args, known_args=known_args)
+        super(DirectionalRemeshFunction, self).__init__(basename=basename,
+                                                        output=None, typegen=typegen, inline=True,
+                                                        args=args, known_args=known_args)
 
         self.update_requirements(reqs)
 
         self.work_dim = work_dim
-        self.itype  = itype
-        self.ftype  = ftype
+        self.itype = itype
+        self.ftype = ftype
         self.ivtype = ivtype
         self.fvtype = fvtype
 
@@ -74,72 +76,70 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
 
     @staticmethod
     def _printv(ncomponents):
-        assert ncomponents in [1,2,4,8,16]
-        if ncomponents==1:
+        assert ncomponents in [1, 2, 4, 8, 16]
+        if ncomponents == 1:
             return ''
         else:
             return 'v{}'.format(ncomponents)
 
-
     def build_prototype(self, reqs, typegen, work_dim, itype, ftype,
-                ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel,
-                use_atomics, remesh_criteria_eps, is_periodic):
+                        ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel,
+                        use_atomics, remesh_criteria_eps, is_periodic):
 
         args = ArgDict()
 
-        atomic  = 'atomic_'  if use_atomics else ''
-        criteria = '{}eps__'.format(remesh_criteria_eps) if (remesh_criteria_eps is not None) else 'full'
+        atomic = 'atomic_' if use_atomics else ''
+        criteria = '{}eps__'.format(remesh_criteria_eps) if (
+            remesh_criteria_eps is not None) else 'full'
 
-        basename =  '__{}remesh_{}d__lambda_{}_{}__{}__{}p__{}s__{}'.format(
-                atomic, work_dim,
-                remesh_kernel.n, remesh_kernel.r, ftype,
-                nparticles, nscalars,
-                criteria)
+        basename = '__{}remesh_{}d__lambda_{}_{}__{}__{}p__{}s__{}'.format(
+            atomic, work_dim,
+            remesh_kernel.n, remesh_kernel.r, ftype,
+            nparticles, nscalars,
+            criteria)
 
         args['p'] = CodegenVectorClBuiltin(name='p', btype=ftype, dim=nparticles, typegen=typegen,
-                add_impl_const=True, nl=True)
+                                           add_impl_const=True, nl=True)
 
         scalars = []
         for i in xrange(nscalars):
             Si = CodegenVectorClBuiltin(name='s{}'.format(i), btype=ftype, dim=nparticles, typegen=typegen,
-                        add_impl_const=True, nl=(i==nscalars-1))
+                                        add_impl_const=True, nl=(i == nscalars-1))
             scalars.append(Si)
             args[Si.name] = Si
 
         args['dx'] = CodegenVariable(name='dx', ctype=ftype, typegen=typegen,
-                    add_impl_const=True)
+                                     add_impl_const=True)
         args['inv_dx'] = CodegenVariable(name='inv_dx', ctype=ftype, typegen=typegen,
-                    add_impl_const=True, nl=True)
+                                         add_impl_const=True, nl=True)
 
         if is_periodic:
             args['grid_size'] = CodegenVariable(name='grid_size', ctype=itype, typegen=typegen,
-                        add_impl_const=True)
+                                                add_impl_const=True)
         args['line_offset'] = CodegenVariable(name='line_offset', ctype=itype, typegen=typegen,
-                    add_impl_const=True)
-        args['cache_width']  = CodegenVariable('cache_width', ctype=itype, typegen=typegen)
+                                              add_impl_const=True)
+        args['cache_width'] = CodegenVariable('cache_width', ctype=itype, typegen=typegen)
         args['cache_ghosts'] = CodegenVariable(name='cache_ghosts', ctype=itype, typegen=typegen,
-                    add_impl_const=True)
+                                               add_impl_const=True)
         args['active'] = CodegenVariable(name='active', ctype='bool', typegen=typegen,
-                    add_impl_const=True, nl=True)
-
+                                         add_impl_const=True, nl=True)
 
         cached_scalars = []
         for i in xrange(nscalars):
             Si = CodegenVariable(name='S{}'.format(i), ctype=ftype, typegen=typegen,
-                        ptr_restrict=True, ptr=True, storage=self._local,
-                        ptr_const=False, add_impl_const=True, nl=True)
+                                 ptr_restrict=True, ptr=True, storage=self._local,
+                                 ptr_const=False, add_impl_const=True, nl=True)
             cached_scalars.append(Si)
             args[Si.name] = Si
 
         self.scalars = scalars
         self.cached_scalars = cached_scalars
 
-        return (args,basename)
-
+        return (args, basename)
 
     def build_requirements(self, typegen, work_dim, itype, ftype,
-                ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel,
-                use_atomics, remesh_criteria_eps):
+                           ivtype, fvtype, nparticles, nscalars, sboundary, remesh_kernel,
+                           use_atomics, remesh_criteria_eps):
 
         reqs = WriteOnceDict()
 
@@ -147,24 +147,24 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
         kernel = remesh_kernel
         if kernel.poly_splitted:
             self.poly_splitted = True
-            name='lambda_{}_{}__{}'.format(kernel.n, kernel.r, '{}_{}')
+            name = 'lambda_{}_{}__{}'.format(kernel.n, kernel.r, '{}_{}')
             for i, _ in enumerate(kernel.Pt_l):
-                ilname = name.format(i,'left')
-                irname = name.format(i,'right')
+                ilname = name.format(i, 'left')
+                irname = name.format(i, 'right')
                 Pil = PolynomialFunction(typegen, ftype, nparticles,
-                        kernel.Cl[:,i], ilname, use_fma=True, var='y')
+                                         kernel.Cl[:, i], ilname, use_fma=True, var='y')
                 Pir = PolynomialFunction(typegen, ftype, nparticles,
-                        kernel.Cr[:,i], irname, use_fma=True, var='y')
-                P.append((Pil,Pir))
+                                         kernel.Cr[:, i], irname, use_fma=True, var='y')
+                P.append((Pil, Pir))
                 reqs[ilname] = Pil
                 reqs[irname] = Pir
         else:
             self.poly_splitted = False
-            name='lambda_{}_{}__{}'.format(kernel.n, kernel.r, '{}')
+            name = 'lambda_{}_{}__{}'.format(kernel.n, kernel.r, '{}')
             for i, _ in enumerate(kernel.Pt):
                 iname = name.format(i)
                 Pi = PolynomialFunction(typegen, ftype, nparticles,
-                        kernel.C[:,i], iname, use_fma=True, var='y')
+                                        kernel.C[:, i], iname, use_fma=True, var='y')
                 P.append((Pi,))
                 reqs[iname] = Pi
         self.polynomials = P
@@ -190,9 +190,9 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
         ivtype = s.ivtype
         fvtype = s.fvtype
 
-        nparticles  = s.nparticles
-        nscalars    = s.nscalars
-        sboundary   = s.sboundary
+        nparticles = s.nparticles
+        nscalars = s.nscalars
+        sboundary = s.sboundary
         use_atomics = s.use_atomics
         use_short_circuit = s.use_short_circuit
         remesh_criteria_eps = s.remesh_criteria_eps
@@ -215,13 +215,14 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
 
         poly_splitted = s.poly_splitted
 
-        lb = '[' if (nparticles>1) else ''
-        rb = ']' if (nparticles>1) else ''
+        lb = '[' if (nparticles > 1) else ''
+        rb = ']' if (nparticles > 1) else ''
         vnf = '{}{}{}'.format(lb, ', '.join('%2.2f' for _ in xrange(nparticles)), rb)
         vni = '{}{}{}'.format(lb, ', '.join('%i' for _ in xrange(nparticles)), rb)
-        expand_printf_vector = lambda x: str(x) if (nparticles==1) else ','.join(
-                '({}).s{}'.format(x, '0123456789abcdef'[i])
-                if isinstance(x, str) else x[i] for i in xrange(nparticles))
+
+        def expand_printf_vector(x): return str(x) if (nparticles == 1) else ','.join(
+            '({}).s{}'.format(x, '0123456789abcdef'[i])
+            if isinstance(x, str) else x[i] for i in xrange(nparticles))
         epv = expand_printf_vector
 
         @contextlib.contextmanager
@@ -234,17 +235,18 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
 
         dtype = tg.np_dtype(ftype)
 
-        P   = CodegenVariable(name='P', ctype=itype, typegen=tg,
-                const=True, value=1+(self.kernel.n/2))
+        P = CodegenVariable(name='P', ctype=itype, typegen=tg,
+                            const=True, value=1+(self.kernel.n/2))
         eps = CodegenVariable(name='eps', ctype=ftype, typegen=tg,
-                const=True, value=np.finfo(dtype).eps)
+                              const=True, value=np.finfo(dtype).eps)
 
-        rp   = CodegenVectorClBuiltin(name='rp',    btype=ftype, dim=nparticles, typegen=tg)
-        y    = CodegenVectorClBuiltin(name='y',    btype=ftype, dim=nparticles, typegen=tg)
-        ind  = CodegenVectorClBuiltin(name='ind',  btype=itype, dim=nparticles, typegen=tg)
+        rp = CodegenVectorClBuiltin(name='rp',    btype=ftype, dim=nparticles, typegen=tg)
+        y = CodegenVectorClBuiltin(name='y',    btype=ftype, dim=nparticles, typegen=tg)
+        ind = CodegenVectorClBuiltin(name='ind',  btype=itype, dim=nparticles, typegen=tg)
         find = CodegenVectorClBuiltin(name='find', btype=ftype, dim=nparticles, typegen=tg)
-        vone = CodegenVectorClBuiltin(name='one', btype=ftype, dim=nparticles, typegen=tg, value=(1,)*nparticles)
-        
+        vone = CodegenVectorClBuiltin(name='one', btype=ftype,
+                                      dim=nparticles, typegen=tg, value=(1,)*nparticles)
+
         if debug_mode:
             tst0 = CodegenVectorClBuiltin(name='tst0',    btype=ftype, dim=nparticles, typegen=tg)
             tst1 = CodegenVectorClBuiltin(name='tst1',    btype=ftype, dim=nparticles, typegen=tg)
@@ -253,7 +255,7 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
             wl = CodegenVectorClBuiltin(name='Wl', btype=ftype, dim=nparticles, typegen=tg)
             wr = CodegenVectorClBuiltin(name='Wr', btype=ftype, dim=nparticles, typegen=tg)
             w = CodegenVectorClBuiltin(name='W', btype=ftype, dim=nparticles, typegen=tg)
-            weights = (wl,wr,w)
+            weights = (wl, wr, w)
         else:
             w = CodegenVectorClBuiltin(name='W', btype=ftype, dim=nparticles, typegen=tg)
             weights = (w,)
@@ -263,7 +265,7 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
             if (remesh_criteria_eps is not None):
                 eps.declare(s)
             s.jumpline()
-            
+
             if debug_mode:
                 s.decl_aligned_vars(tst0, tst1)
             s.decl_aligned_vars(rp, find, ind, y)
@@ -280,10 +282,11 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
                     s.append('{} = {rp} - floor({rp});'.format(tst1, rp=rp))
                     with self._ordered_wi_execution_(barrier=False):
                         code = 'printf("%lu p={vnf}, rp={vnf}, floor(rp)={vnf}, rp-floor(rp)={vnf}, ind={vni}, y={vnf}, s0={vnf}.\\n", {}, {}, {}, {}, {}, {}, {}, {});'.format(
-                                'get_local_id(0)', epv(pos), epv(rp), epv(tst0), epv(tst1), epv(ind), epv(y), epv(scalars[0]),
-                                vnf=vnf, vni=vni)
+                            'get_local_id(0)', epv(pos), epv(rp), epv(tst0), epv(
+                                tst1), epv(ind), epv(y), epv(scalars[0]),
+                            vnf=vnf, vni=vni)
                         s.append(code)
-                y.affect(s, init='{}-{}'.format(vone,y))
+                y.affect(s, init='{}-{}'.format(vone, y))
                 ind.affect(s, init='{} - {} - {}'.format(ind, P, line_offset))
             if debug_mode:
                 s.barrier(_local=True)
@@ -299,33 +302,35 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
                 with if_thread_active():
                     s.append('{} += 1;'.format(ind))
                     with s._align_() as al:
-                        for wj,Pij,yj in zip(weights, Pi, (y,'1-{}'.format(y))):
+                        for wj, Pij, yj in zip(weights, Pi, (y, '1-{}'.format(y))):
                             wj.affect(al, align=True, init=Pij(y=yj))
                         if poly_splitted:
-                            if nparticles>1:
+                            if nparticles > 1:
                                 w.affect(al, align=True, init='select({}, {}, ({}<{}))'.format(
-                                   wr, wl, y, tg.dump(0.5)))
+                                    wr, wl, y, tg.dump(0.5)))
                             else:
                                 w.affect(al, align=True, init='convert_{fvtype}({}<{})*{} + convert_{fvtype}({}>={})*{}'.format(
-                                    y,tg.dump(0.5), wl, y, tg.dump(0.5), wr, fvtype=fvtype))
+                                    y, tg.dump(0.5), wl, y, tg.dump(0.5), wr, fvtype=fvtype))
 
                     for iscal, (cached_scalar, scalar) in enumerate(zip(cached_scalars, scalars)):
-                        if nparticles>1:
-                            comment='Remeshing scalar {}, {} particles at a time.'.format(iscal, nparticles)
+                        if nparticles > 1:
+                            comment = 'Remeshing scalar {}, {} particles at a time.'.format(
+                                iscal, nparticles)
                             s.jumpline()
                             s.comment(comment)
                         criterias = []
                         with s._block_():
                             for ipart in xrange(nparticles):
-                                cache_idx='{}+{}'.format(cache_ghosts, ind[ipart])
-                                val = '{}*{}'.format(w[ipart],scalar[ipart])
+                                cache_idx = '{}+{}'.format(cache_ghosts, ind[ipart])
+                                val = '{}*{}'.format(w[ipart], scalar[ipart])
                                 if (remesh_criteria_eps is not None):
                                     criteria = 'fabs({}) > {}*eps'.format(val, remesh_criteria_eps)
                                     criterias.append(criteria)
 
                                 if use_atomics:
                                     atomic_add = s.reqs['atomic_add']
-                                    atom_add = atomic_add(p='{}+{}'.format(cached_scalar,cache_idx), val=val)
+                                    atom_add = atomic_add(
+                                        p='{}+{}'.format(cached_scalar, cache_idx), val=val)
                                     if (remesh_criteria_eps is not None):
                                         if use_short_circuit:
                                             code = '({}) && ({},true);'.format(criteria, atom_add)
@@ -337,22 +342,23 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
                                     inplace_add = '{} += {}'.format(cached_scalar[cache_idx], val)
                                     if (remesh_criteria_eps is not None):
                                         if use_short_circuit:
-                                            code = '({}) && (({}),true);'.format(criteria, inplace_add)
+                                            code = '({}) && (({}),true);'.format(
+                                                criteria, inplace_add)
                                         else:
                                             code = 'if ({}) {{ {}; }}'.format(criteria, inplace_add)
                                     else:
                                         code = '{};'.format(inplace_add)
                                 s.append(code)
                     if debug_mode:
-                        val='{}*{}'.format(w,scalars[0])
-                        cache_idx='{}+{}'.format(cache_ghosts, ind[0])
-                        
+                        val = '{}*{}'.format(w, scalars[0])
+                        cache_idx = '{}+{}'.format(cache_ghosts, ind[0])
+
                         if poly_splitted:
                             printf = 'printf("BATCH {}: %lu remeshed {vnf} at idx={vni} with Wl={vnf}, Wr={vnf}, W={vnf}, cond={vni}.\\n",{},{},{},{},{},{},{});'.format(
-                                    i, 'get_local_id(0)', epv(val), epv(ind), epv(wl), epv(wr), epv(w), epv('(y<0.5f)'), vnf=vnf, vni=vni)
+                                i, 'get_local_id(0)', epv(val), epv(ind), epv(wl), epv(wr), epv(w), epv('(y<0.5f)'), vnf=vnf, vni=vni)
                         else:
                             printf = 'printf("BATCH {}: %lu remeshed {vnf} at idx {vni} with W({vnf})={vnf}, new value is S0[{vni}]={vnf}.\\n",{},{},{},{},{},{},{});'.format(
-                                    i, 'get_local_id(0)', epv(val), epv(ind), epv(y), epv(w), cache_idx, epv(cached_scalars[0][cache_idx]), vni=vni, vnf=vnf)
+                                i, 'get_local_id(0)', epv(val), epv(ind), epv(y), epv(w), cache_idx, epv(cached_scalars[0][cache_idx]), vni=vni, vnf=vnf)
                         with s._first_wg_execution_():
                             if criterias:
                                 with s._if_(' || '.join(map(lambda x: '({})'.format(x), criterias))):
@@ -370,6 +376,7 @@ class DirectionalRemeshFunction(OpenClFunctionCodeGenerator):
             if use_atomics:
                 s.barrier(_local=True)
 
+
 if __name__ == '__main__':
     from hysop.backend.device.codegen.base.test import _test_typegen
     from hysop.numerics.remesh.remesh import RemeshKernel
@@ -386,7 +393,7 @@ if __name__ == '__main__':
     remesh_criteria_eps = None
 
     drf = DirectionalRemeshFunction(tg, work_dim, 'int', tg.fbtype,
-            nparticles, nscalars,
-            (BoundaryCondition.PERIODIC,BoundaryCondition.PERIODIC),
-            kernel, use_atomics, remesh_criteria_eps, debug_mode=False)
+                                    nparticles, nscalars,
+                                    (BoundaryCondition.PERIODIC, BoundaryCondition.PERIODIC),
+                                    kernel, use_atomics, remesh_criteria_eps, debug_mode=False)
     drf.edit()
diff --git a/hysop/backend/device/codegen/kernels/directional_remesh.py b/hysop/backend/device/codegen/kernels/directional_remesh.py
index e28b37f828e074427853760b4ec10021fe69058d..9c1a437ac945caafa4301a7d9550efc2e6b9069f 100644
--- a/hysop/backend/device/codegen/kernels/directional_remesh.py
+++ b/hysop/backend/device/codegen/kernels/directional_remesh.py
@@ -11,23 +11,24 @@ from hysop.constants import DirectionLabels, BoundaryCondition, Backend, Precisi
 
 from hysop.core.arrays.all import OpenClArray
 from hysop.numerics.remesh.remesh import RemeshKernel
+from hysop.numerics.remesh.kernel_generator import Kernel
 from hysop.fields.continuous_field import Field
 from hysop.fields.discrete_field import DiscreteScalarFieldView
 
-from hysop.backend.device.opencl                      import cl, clTools, clCharacterize
-from hysop.backend.device.opencl.opencl_env           import OpenClEnvironment
-from hysop.backend.device.opencl.opencl_types         import OpenClTypeGen
+from hysop.backend.device.opencl import cl, clTools, clCharacterize
+from hysop.backend.device.opencl.opencl_env import OpenClEnvironment
+from hysop.backend.device.opencl.opencl_types import OpenClTypeGen
 from hysop.backend.device.opencl.opencl_array_backend import OpenClArrayBackend
 
-from hysop.backend.device.codegen                     import CodeGeneratorWarning
-from hysop.backend.device.codegen.base.utils          import WriteOnceDict, ArgDict
-from hysop.backend.device.codegen.base.statistics     import WorkStatistics
-from hysop.backend.device.codegen.base.variables      import CodegenStruct
-from hysop.backend.device.codegen.structs.mesh_info   import MeshBaseStruct, MeshInfoStruct
+from hysop.backend.device.codegen import CodeGeneratorWarning
+from hysop.backend.device.codegen.base.utils import WriteOnceDict, ArgDict
+from hysop.backend.device.codegen.base.statistics import WorkStatistics
+from hysop.backend.device.codegen.base.variables import CodegenStruct
+from hysop.backend.device.codegen.structs.mesh_info import MeshBaseStruct, MeshInfoStruct
 from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator
 from hysop.backend.device.codegen.base.kernel_codegen import KernelCodeGenerator
-from hysop.backend.device.codegen.base.variables      import CodegenVariable, \
-        CodegenVectorClBuiltin, CodegenArray
+from hysop.backend.device.codegen.base.variables import CodegenVariable, \
+    CodegenVectorClBuiltin, CodegenArray
 
 from hysop.backend.device.codegen.functions.directional_remesh import DirectionalRemeshFunction
 
@@ -36,23 +37,24 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
 
     @staticmethod
     def codegen_name(work_dim,
-            remesh_kernel, ftype,
-            nparticles, nscalars,
-            remesh_criteria_eps,
-            use_atomics, is_inplace):
+                     remesh_kernel, ftype,
+                     nparticles, nscalars,
+                     remesh_criteria_eps,
+                     use_atomics, is_inplace):
         inplace = 'inplace_' if is_inplace else ''
-        atomic  = 'atomic_'  if use_atomics else ''
-        criteria = '{}eps__'.format(remesh_criteria_eps) if (remesh_criteria_eps is not None) else 'full'
+        atomic = 'atomic_' if use_atomics else ''
+        criteria = '{}eps__'.format(remesh_criteria_eps) if (
+            remesh_criteria_eps is not None) else 'full'
         return 'directional_{}{}remesh_{}d__lambda_{}_{}__{}__{}p__{}s__{}'.format(
-                inplace, atomic, work_dim,
-                remesh_kernel.n, remesh_kernel.r, ftype,
-                nparticles, nscalars,
-                criteria)
+            inplace, atomic, work_dim,
+            remesh_kernel.n, remesh_kernel.r, ftype,
+            nparticles, nscalars,
+            criteria)
 
     @classmethod
     def scalars_out_cache_ghosts(cls, scalar_cfl, remesh_kernel):
         assert scalar_cfl > 0.0, 'cfl <= 0.0'
-        assert remesh_kernel.n >= 1 , 'Bad remeshing kernel.'
+        assert remesh_kernel.n >= 1, 'Bad remeshing kernel.'
         if remesh_kernel.n > 1:
             assert remesh_kernel.n % 2 == 0, 'Odd remeshing kernel moments.'
         min_ghosts = int(1+npw.floor(scalar_cfl)+remesh_kernel.n/2)
@@ -79,21 +81,21 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
         Return global_work_size from effective work_size and given local_work_size
         global_work_size will be a multiple of local_work_size
         """
-        work_dim        = self.work_dim
-        work_load       = [1]*work_dim if (work_load is None) else work_load
+        work_dim = self.work_dim
+        work_load = [1]*work_dim if (work_load is None) else work_load
 
-        work_size       = np.asarray(work_size)
-        work_load       = np.asarray(work_load)
+        work_size = np.asarray(work_size)
+        work_load = np.asarray(work_load)
         local_work_size = np.asarray(local_work_size)
 
         nparticles = self.nparticles
 
-        for i in xrange(1,work_dim):
+        for i in xrange(1, work_dim):
             assert (local_work_size[i] == 1), 'local_work_size error!'
 
         if 'local_size' in self.known_vars:
             assert (self.known_vars['local_size'] == local_work_size[:work_dim]).all(),\
-                    'local_work_size mismatch!'
+                'local_work_size mismatch!'
 
         max_global_size = self.get_max_global_size(work_size, work_load, nparticles)
         global_size = ((max_global_size+local_work_size-1)/local_work_size) * local_work_size
@@ -105,13 +107,13 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
         """
         Return a tuple of required (static,dynamic,total) cache bytes per workgroup
         """
-        work_dim        = self.work_dim
-        ftype           = self.ftype
-        flt_bytes       = self.typegen.FLT_BYTES[ftype]
+        work_dim = self.work_dim
+        ftype = self.ftype
+        flt_bytes = self.typegen.FLT_BYTES[ftype]
 
         local_work_size = np.asarray(local_work_size)
 
-        sc,dc = 0,0
+        sc, dc = 0, 0
         count = self.nscalars*(self.nparticles*local_work_size[0]+2*self.min_ghosts)
         if self.local_size_known:
             assert (self.known_vars['local_size'] == local_work_size[:work_dim]).all()
@@ -123,28 +125,27 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
         dc *= flt_bytes
         tc = sc+dc
 
-        return (sc,dc,tc)
-
+        return (sc, dc, tc)
 
     def __init__(self, typegen, work_dim, ftype,
-                       nparticles, nscalars, sboundary, is_inplace,
-                       scalar_cfl, remesh_kernel,
-                       use_short_circuit=None,
-                       unroll_loops=None,
-                       group_scalars=None,
-                       remesh_criteria_eps=None,
-                       use_atomics   = False,
-                       symbolic_mode = False,
-                       debug_mode    = False,
-                       tuning_mode   = False,
-                       known_vars    = None):
-
-        assert work_dim>0 and work_dim<=3
-        assert nscalars>0
-        assert nparticles in [1,2,4,8,16]
-        check_instance(sboundary,tuple,values=BoundaryCondition)
-        check_instance(remesh_kernel, RemeshKernel)
-        
+                 nparticles, nscalars, sboundary, is_inplace,
+                 scalar_cfl, remesh_kernel,
+                 use_short_circuit=None,
+                 unroll_loops=None,
+                 group_scalars=None,
+                 remesh_criteria_eps=None,
+                 use_atomics=False,
+                 symbolic_mode=False,
+                 debug_mode=False,
+                 tuning_mode=False,
+                 known_vars=None):
+
+        assert work_dim > 0 and work_dim <= 3
+        assert nscalars > 0
+        assert nparticles in [1, 2, 4, 8, 16]
+        check_instance(sboundary, tuple, values=BoundaryCondition)
+        check_instance(remesh_kernel, (RemeshKernel, Kernel))
+
         use_short_circuit = first_not_None(use_short_circuit, typegen.use_short_circuit_ops)
         unroll_loops = first_not_None(unroll_loops, typegen.unroll_loops)
 
@@ -159,11 +160,11 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
 
         assert sboundary[0] in [BoundaryCondition.PERIODIC, BoundaryCondition.NONE]
         assert sboundary[1] in [BoundaryCondition.PERIODIC, BoundaryCondition.NONE]
-        is_periodic = (sboundary[0]==BoundaryCondition.PERIODIC \
-                   and sboundary[1]==BoundaryCondition.PERIODIC)
+        is_periodic = (sboundary[0] == BoundaryCondition.PERIODIC
+                       and sboundary[1] == BoundaryCondition.PERIODIC)
 
         if is_periodic:
-            msg='Local periodic boundary have been deprecated, use BoundaryCondition.NONE instead.'
+            msg = 'Local periodic boundary have been deprecated, use BoundaryCondition.NONE instead.'
             raise RuntimeError(msg)
 
         known_vars = known_vars or dict()
@@ -174,58 +175,58 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
         vitype = typegen.vtype(itype, nparticles)
 
         name = DirectionalRemeshKernelGenerator.codegen_name(work_dim,
-                remesh_kernel, ftype,
-                nparticles, nscalars, remesh_criteria_eps,
-                use_atomics, is_inplace)
+                                                             remesh_kernel, ftype,
+                                                             nparticles, nscalars, remesh_criteria_eps,
+                                                             use_atomics, is_inplace)
 
         kernel_reqs = self.build_requirements(typegen, work_dim, itype, ftype,
-                sboundary, nparticles, nscalars, nfields, group_scalars,
-                symbolic_mode,
-                remesh_criteria_eps, use_atomics, remesh_kernel,
-                use_short_circuit, known_vars, debug_mode)
+                                              sboundary, nparticles, nscalars, nfields, group_scalars,
+                                              symbolic_mode,
+                                              remesh_criteria_eps, use_atomics, remesh_kernel,
+                                              use_short_circuit, known_vars, debug_mode)
 
         kernel_args = self.gen_kernel_arguments(typegen, work_dim, itype, ftype,
-                nparticles, nscalars, nfields, group_scalars, local_size_known, is_inplace,
-                debug_mode, kernel_reqs, known_vars, symbolic_mode)
-
-        super(DirectionalRemeshKernelGenerator,self).__init__(
-                name=name,
-                typegen=typegen,
-                work_dim=work_dim,
-                kernel_args=kernel_args,
-                known_vars=known_vars,
-                vec_type_hint=ftype,
-                symbolic_mode=symbolic_mode)
+                                                nparticles, nscalars, nfields, group_scalars, local_size_known, is_inplace,
+                                                debug_mode, kernel_reqs, known_vars, symbolic_mode)
+
+        super(DirectionalRemeshKernelGenerator, self).__init__(
+            name=name,
+            typegen=typegen,
+            work_dim=work_dim,
+            kernel_args=kernel_args,
+            known_vars=known_vars,
+            vec_type_hint=ftype,
+            symbolic_mode=symbolic_mode)
 
         self.update_requirements(kernel_reqs)
 
-        self.min_ghosts       = self.scalars_out_cache_ghosts(scalar_cfl, remesh_kernel)
-        self.itype            = itype
-        self.ftype            = ftype
-        self.vitype           = vitype
-        self.vftype           = vftype
-        self.work_dim         = work_dim
-        self.sboundary        = sboundary
-        self.nparticles       = nparticles
-        self.nscalars         = nscalars
-        self.nfields          = nfields
-        self.group_scalars    = group_scalars
+        self.min_ghosts = self.scalars_out_cache_ghosts(scalar_cfl, remesh_kernel)
+        self.itype = itype
+        self.ftype = ftype
+        self.vitype = vitype
+        self.vftype = vftype
+        self.work_dim = work_dim
+        self.sboundary = sboundary
+        self.nparticles = nparticles
+        self.nscalars = nscalars
+        self.nfields = nfields
+        self.group_scalars = group_scalars
         self.local_size_known = local_size_known
-        self.is_inplace       = is_inplace
-        self.use_atomics      = use_atomics
+        self.is_inplace = is_inplace
+        self.use_atomics = use_atomics
         self.use_short_circuit = use_short_circuit
-        self.unroll_loops     = unroll_loops
-        self.remesh_kernel    = remesh_kernel
-        self.debug_mode       = debug_mode
-        self.tuning_mode      = tuning_mode
+        self.unroll_loops = unroll_loops
+        self.remesh_kernel = remesh_kernel
+        self.debug_mode = debug_mode
+        self.tuning_mode = tuning_mode
 
         self.gencode()
-        #self.edit()
+        # self.edit()
 
     def build_requirements(self, typegen, work_dim, itype, ftype,
-            sboundary, nparticles, nscalars, nfields, group_scalars, symbolic_mode,
-            remesh_criteria_eps, use_atomics, remesh_kernel, use_short_circuit,
-            known_vars, debug_mode):
+                           sboundary, nparticles, nscalars, nfields, group_scalars, symbolic_mode,
+                           remesh_criteria_eps, use_atomics, remesh_kernel, use_short_circuit,
+                           known_vars, debug_mode):
         reqs = WriteOnceDict()
 
         vsize = upper_pow2_or_3(work_dim)
@@ -239,49 +240,49 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
         # without atomics we can only remesh on particle at a time
         nparticles_remeshed = nparticles if use_atomics else 1
         reqs['remesh'] = DirectionalRemeshFunction(typegen=typegen, work_dim=work_dim,
-                itype=itype, ftype=ftype, nparticles=nparticles_remeshed, nscalars=nscalars,
-                sboundary=sboundary, remesh_kernel=remesh_kernel, use_atomics=use_atomics,
-                remesh_criteria_eps=remesh_criteria_eps, debug_mode=debug_mode,
-                use_short_circuit=use_short_circuit)
+                                                   itype=itype, ftype=ftype, nparticles=nparticles_remeshed, nscalars=nscalars,
+                                                   sboundary=sboundary, remesh_kernel=remesh_kernel, use_atomics=use_atomics,
+                                                   remesh_criteria_eps=remesh_criteria_eps, debug_mode=debug_mode,
+                                                   use_short_circuit=use_short_circuit)
 
         return reqs
 
     def gen_kernel_arguments(self, typegen, work_dim, itype, ftype,
-            nparticles, nscalars, nfields, group_scalars, local_size_known, is_inplace,
-            debug_mode, kernel_reqs, known_vars, symbolic_mode):
+                             nparticles, nscalars, nfields, group_scalars, local_size_known, is_inplace,
+                             debug_mode, kernel_reqs, known_vars, symbolic_mode):
 
         kargs = ArgDict()
         mesh_dim = upper_pow2_or_3(work_dim)
         self.position, self.position_strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='position',
-                    known_vars=known_vars, symbolic_mode=symbolic_mode,
-                    storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim,
-                    ptr_restrict=True, const=True)
+                                                                                          known_vars=known_vars, symbolic_mode=symbolic_mode,
+                                                                                          storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim,
+                                                                                          ptr_restrict=True, const=True)
 
-        scalars_data_in  = []
+        scalars_data_in = []
         scalars_strides_in = []
         for i in xrange(nfields):
-            args_in  = []
+            args_in = []
             strides_in = []
             for j in xrange(group_scalars[i]):
                 if is_inplace:
-                    arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_inout'.format(i,j),
-                        known_vars=known_vars, symbolic_mode=symbolic_mode,
-                        storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim,
-                        const=False, ptr_restrict=True)
+                    arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_inout'.format(i, j),
+                                                                              known_vars=known_vars, symbolic_mode=symbolic_mode,
+                                                                              storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim,
+                                                                              const=False, ptr_restrict=True)
                 else:
-                    arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_in'.format(i,j),
-                        known_vars=known_vars, symbolic_mode=symbolic_mode,
-                        storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim,
-                        const=True, ptr_restrict=True)
+                    arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_in'.format(i, j),
+                                                                              known_vars=known_vars, symbolic_mode=symbolic_mode,
+                                                                              storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim,
+                                                                              const=True, ptr_restrict=True)
                 args_in.append(arg)
                 strides_in.append(strides)
             scalars_data_in.append(tuple(args_in))
             scalars_strides_in.append(tuple(strides_in))
-        scalars_data_in    = tuple(scalars_data_in)
+        scalars_data_in = tuple(scalars_data_in)
         scalars_strides_in = tuple(scalars_strides_in)
 
         if is_inplace:
-            scalars_data_out    = scalars_data_in
+            scalars_data_out = scalars_data_in
             scalars_strides_out = scalars_strides_in
         else:
             scalars_data_out = []
@@ -290,10 +291,10 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                 args_out = []
                 strides_out = []
                 for j in xrange(group_scalars[i]):
-                    arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_out'.format(i,j),
-                        known_vars=known_vars, symbolic_mode=symbolic_mode,
-                        storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim,
-                        const=False, ptr_restrict=True)
+                    arg, strides = OpenClArrayBackend.build_codegen_arguments(kargs, name='S{}_{}_out'.format(i, j),
+                                                                              known_vars=known_vars, symbolic_mode=symbolic_mode,
+                                                                              storage=self._global, ctype=ftype, typegen=typegen, mesh_dim=mesh_dim,
+                                                                              const=False, ptr_restrict=True)
                     args_out.append(arg)
                     strides_out.append(strides)
                 scalars_data_out.append(tuple(args_out))
@@ -301,60 +302,60 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
             scalars_data_out = tuple(scalars_data_out)
             scalars_strides_out = tuple(scalars_strides_out)
 
-        self.scalars_data_in  = scalars_data_in
+        self.scalars_data_in = scalars_data_in
         self.scalars_data_out = scalars_data_out
         self.scalars_strides_in = scalars_strides_in
         self.scalars_strides_out = scalars_strides_out
 
-        #if debug_mode:
-            #kargs['dbg0'] = CodegenVariable(storage=self._global,name='dbg0',ctype=itype,
-                    #typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True)
-            #kargs['dbg1'] = CodegenVariable(storage=self._global,name='dbg1',ctype=itype,
-                    #typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True)
+        # if debug_mode:
+        # kargs['dbg0'] = CodegenVariable(storage=self._global,name='dbg0',ctype=itype,
+        # typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True)
+        # kargs['dbg1'] = CodegenVariable(storage=self._global,name='dbg1',ctype=itype,
+        # typegen=typegen, ptr_restrict=True,ptr=True,const=False,add_impl_const=True)
 
         kargs['position_mesh_info'] = kernel_reqs['MeshInfoStruct'].build_codegen_variable(
-                const=True, name='position_mesh_info')
+            const=True, name='position_mesh_info')
 
         if is_inplace:
             for i in xrange(nfields):
                 kargs['S{}_inout_mesh_info'.format(i)] = \
-                        kernel_reqs['MeshInfoStruct'].build_codegen_variable(
-                            const=True, name='S{}_inout_mesh_info'.format(i), nl=True)
+                    kernel_reqs['MeshInfoStruct'].build_codegen_variable(
+                    const=True, name='S{}_inout_mesh_info'.format(i), nl=True)
         else:
             for i in xrange(nfields):
                 kargs['S{}_in_mesh_info'.format(i)] = \
-                        kernel_reqs['MeshInfoStruct'].build_codegen_variable(
-                            const=True, name='S{}_in_mesh_info'.format(i), nl=True)
+                    kernel_reqs['MeshInfoStruct'].build_codegen_variable(
+                    const=True, name='S{}_in_mesh_info'.format(i), nl=True)
             for i in xrange(nfields):
                 kargs['S{}_out_mesh_info'.format(i)] = \
-                        kernel_reqs['MeshInfoStruct'].build_codegen_variable(
-                            const=True, name='S{}_out_mesh_info'.format(i), nl=True)
+                    kernel_reqs['MeshInfoStruct'].build_codegen_variable(
+                    const=True, name='S{}_out_mesh_info'.format(i), nl=True)
 
         if not local_size_known:
-             kargs['buffer'] = CodegenVariable(storage=self._local, ctype=ftype,
-                     add_impl_const=True, name='buffer', ptr=True, ptr_restrict=True,
-                     typegen=typegen, nl=False)
+            kargs['buffer'] = CodegenVariable(storage=self._local, ctype=ftype,
+                                              add_impl_const=True, name='buffer', ptr=True, ptr_restrict=True,
+                                              typegen=typegen, nl=False)
 
         return kargs
 
     def gencode(self):
-        s  = self
+        s = self
         tg = s.typegen
 
-        dim         = s.work_dim
-        itype       = s.itype
-        ftype       = s.ftype
-        vitype      = s.vitype
-        vftype      = s.vftype
-        sboundary   = s.sboundary
-        nparticles  = s.nparticles
-        nscalars    = s.nscalars
-        nfields     = s.nfields
-        min_ghosts  = s.min_ghosts
-        is_inplace  = s.is_inplace
+        dim = s.work_dim
+        itype = s.itype
+        ftype = s.ftype
+        vitype = s.vitype
+        vftype = s.vftype
+        sboundary = s.sboundary
+        nparticles = s.nparticles
+        nscalars = s.nscalars
+        nfields = s.nfields
+        min_ghosts = s.min_ghosts
+        is_inplace = s.is_inplace
         use_atomics = s.use_atomics
-        work_dim    = s.work_dim
-        debug_mode  = s.debug_mode
+        work_dim = s.work_dim
+        debug_mode = s.debug_mode
         tuning_mode = s.tuning_mode
 
         use_short_circuit = s.use_short_circuit
@@ -365,127 +366,128 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
 
         local_size_known = s.local_size_known
 
-        global_id     = s.vars['global_id']
-        local_id      = s.vars['local_id']
-        group_id      = s.vars['group_id']
+        global_id = s.vars['global_id']
+        local_id = s.vars['local_id']
+        group_id = s.vars['group_id']
 
-        global_index  = s.vars['global_index']
-        local_index   = s.vars['local_index']
+        global_index = s.vars['global_index']
+        local_index = s.vars['local_index']
 
-        global_size   = s.vars['global_size']
-        local_size    = s.vars['local_size']
+        global_size = s.vars['global_size']
+        local_size = s.vars['local_size']
 
-        #if debug_mode:
-            #dbg0 = s.vars['dbg0']
-            #dbg1 = s.vars['dbg1']
+        # if debug_mode:
+        #dbg0 = s.vars['dbg0']
+        #dbg1 = s.vars['dbg1']
 
-        lb = '[' if (nparticles>1) else ''
-        rb = ']' if (nparticles>1) else ''
+        lb = '[' if (nparticles > 1) else ''
+        rb = ']' if (nparticles > 1) else ''
         vnf = '{}{}{}'.format(lb, ', '.join('%2.2f' for _ in xrange(nparticles)), rb)
         vni = '{}{}{}'.format(lb, ', '.join('%i' for _ in xrange(nparticles)), rb)
-        expand_printf_vector = lambda x: str(x) if (nparticles==1) else ','.join(
-                '({}).s{}'.format(x, '0123456789abcdef'[i])
-                if isinstance(x, str) else x[i] for i in xrange(nparticles))
+
+        def expand_printf_vector(x): return str(x) if (nparticles == 1) else ','.join(
+            '({}).s{}'.format(x, '0123456789abcdef'[i])
+            if isinstance(x, str) else x[i] for i in xrange(nparticles))
         epv = expand_printf_vector
 
         position_mesh_info = s.vars['position_mesh_info']
 
         if is_inplace:
-            scalars_mesh_info_in  = [s.vars['S{}_inout_mesh_info'.format(i)] for i in xrange(nfields)]
+            scalars_mesh_info_in = [
+                s.vars['S{}_inout_mesh_info'.format(i)] for i in xrange(nfields)]
             scalars_mesh_info_out = scalars_mesh_info_in
         else:
-            scalars_mesh_info_in   = [s.vars['S{}_in_mesh_info'.format(i)] for i in xrange(nfields)]
-            scalars_mesh_info_out  = [s.vars['S{}_out_mesh_info'.format(i)] for i in xrange(nfields)]
+            scalars_mesh_info_in = [s.vars['S{}_in_mesh_info'.format(i)] for i in xrange(nfields)]
+            scalars_mesh_info_out = [s.vars['S{}_out_mesh_info'.format(i)] for i in xrange(nfields)]
 
-        position_base    = s.vars['position_base']
-        position_offset  = s.vars['position_offset']
+        position_base = s.vars['position_base']
+        position_offset = s.vars['position_offset']
         position_strides = s.vars['position_strides']
 
         position = s.position
-        scalars_data_in  = s.scalars_data_in
+        scalars_data_in = s.scalars_data_in
         scalars_data_out = s.scalars_data_out
-        scalars_strides_in  = s.scalars_strides_in
+        scalars_strides_in = s.scalars_strides_in
         scalars_strides_out = s.scalars_strides_out
 
-        compute_grid_size  = position_mesh_info['local_mesh']['compute_resolution'].view(
-                                            'compute_grid_size',slice(None,work_dim),const=True)
+        compute_grid_size = position_mesh_info['local_mesh']['compute_resolution'].view(
+            'compute_grid_size', slice(None, work_dim), const=True)
 
         position_grid_size = position_mesh_info['local_mesh']['resolution'].view(
-                                     'pos_grid_size', slice(0,work_dim), const=True)
+            'pos_grid_size', slice(0, work_dim), const=True)
         position_grid_ghosts = position_mesh_info['ghosts'].view(
-                                     'pos_grid_ghosts', slice(0,work_dim), const=True)
+            'pos_grid_ghosts', slice(0, work_dim), const=True)
         position_global_id = CodegenVectorClBuiltin('pos_gid', itype, work_dim, typegen=tg)
 
-        dx     = position_mesh_info['dx'].view('dx', slice(0,1), const=True)
-        inv_dx = position_mesh_info['inv_dx'].view('inv_dx', slice(0,1), const=True)
-        xmin   = position_mesh_info['local_mesh']['xmin'].view('xmin', slice(0,1), const=True)
+        dx = position_mesh_info['dx'].view('dx', slice(0, 1), const=True)
+        inv_dx = position_mesh_info['inv_dx'].view('inv_dx', slice(0, 1), const=True)
+        xmin = position_mesh_info['local_mesh']['xmin'].view('xmin', slice(0, 1), const=True)
 
         if is_inplace:
             scalars_in_grid_size = tuple(smi['local_mesh']['resolution'].view(
-                'S{}_inout_grid_size'.format(i), slice(0,work_dim), const=True)
-                for (i,smi) in enumerate(scalars_mesh_info_out))
+                'S{}_inout_grid_size'.format(i), slice(0, work_dim), const=True)
+                for (i, smi) in enumerate(scalars_mesh_info_out))
             scalars_in_grid_ghosts = tuple(smi['ghosts'].view(
-                'S{}_inout_grid_ghosts'.format(i),slice(0,work_dim), const=True)
-                for (i,smi) in enumerate(scalars_mesh_info_out))
+                'S{}_inout_grid_ghosts'.format(i), slice(0, work_dim), const=True)
+                for (i, smi) in enumerate(scalars_mesh_info_out))
 
             scalars_in_global_id = tuple(CodegenVectorClBuiltin('S{}_inout_gid'.format(i),
-                itype, work_dim, typegen=tg) for i in xrange(nfields))
+                                                                itype, work_dim, typegen=tg) for i in xrange(nfields))
 
-            scalars_data_out_grid_size   = scalars_in_grid_size
+            scalars_data_out_grid_size = scalars_in_grid_size
             scalars_data_out_grid_ghosts = scalars_in_grid_ghosts
-            scalars_data_out_global_id   = scalars_in_global_id
+            scalars_data_out_global_id = scalars_in_global_id
 
             grid_ghosts = (position_grid_ghosts,) + scalars_in_grid_ghosts
-            grid_sizes  = (position_grid_size,)   + scalars_in_grid_size
-            global_ids  = (position_global_id,)   + scalars_in_global_id
+            grid_sizes = (position_grid_size,) + scalars_in_grid_size
+            global_ids = (position_global_id,) + scalars_in_global_id
         else:
             scalars_in_grid_size = tuple(smi['local_mesh']['resolution'].view(
-                'S{}_in_grid_size'.format(i), slice(0,work_dim), const=True)
-                for (i,smi) in enumerate(scalars_mesh_info_in))
+                'S{}_in_grid_size'.format(i), slice(0, work_dim), const=True)
+                for (i, smi) in enumerate(scalars_mesh_info_in))
             scalars_data_out_grid_size = tuple(smi['local_mesh']['resolution'].view(
-                'S{}_out_grid_size'.format(i), slice(0,work_dim), const=True)
-                for (i,smi) in enumerate(scalars_mesh_info_out))
+                'S{}_out_grid_size'.format(i), slice(0, work_dim), const=True)
+                for (i, smi) in enumerate(scalars_mesh_info_out))
 
             scalars_in_grid_ghosts = tuple(smi['ghosts'].view('S{}_in_grid_ghosts'.format(i),
-                slice(0,work_dim), const=True) for (i,smi) in enumerate(scalars_mesh_info_in))
+                                                              slice(0, work_dim), const=True) for (i, smi) in enumerate(scalars_mesh_info_in))
             scalars_data_out_grid_ghosts = tuple(smi['ghosts'].view('S{}_out_grid_ghosts'.format(i),
-                slice(0,work_dim), const=True) for (i,smi) in enumerate(scalars_mesh_info_out))
+                                                                    slice(0, work_dim), const=True) for (i, smi) in enumerate(scalars_mesh_info_out))
 
             scalars_in_global_id = tuple(CodegenVectorClBuiltin('S{}_in_gid'.format(i),
-                itype, work_dim, typegen=tg) for i in xrange(nfields))
+                                                                itype, work_dim, typegen=tg) for i in xrange(nfields))
             scalars_data_out_global_id = tuple(CodegenVectorClBuiltin('S{}_out_gid'.format(i),
-                itype, work_dim, typegen=tg) for i in xrange(nfields))
+                                                                      itype, work_dim, typegen=tg) for i in xrange(nfields))
 
             grid_ghosts = (position_grid_ghosts,) + scalars_in_grid_ghosts + \
-                    scalars_data_out_grid_ghosts
-            grid_sizes  = (position_grid_size,)   + scalars_in_grid_size + scalars_data_out_grid_size
-            global_ids  = (position_global_id,)   + scalars_in_global_id + scalars_data_out_global_id
+                scalars_data_out_grid_ghosts
+            grid_sizes = (position_grid_size,) + scalars_in_grid_size + scalars_data_out_grid_size
+            global_ids = (position_global_id,) + scalars_in_global_id + scalars_data_out_global_id
 
         s.update_vars(position=position,
-                inv_dx=inv_dx,
-                position_grid_ghosts=position_grid_ghosts, compute_grid_size=compute_grid_size)
-        s.update_vars(**dict((sij.name,sij) for si in scalars_data_in for sij in si))
+                      inv_dx=inv_dx,
+                      position_grid_ghosts=position_grid_ghosts, compute_grid_size=compute_grid_size)
+        s.update_vars(**dict((sij.name, sij) for si in scalars_data_in for sij in si))
         if not is_inplace:
-            s.update_vars(**dict((sij.name,sij) for si in scalars_data_out for sij in si))
-
+            s.update_vars(**dict((sij.name, sij) for si in scalars_data_out for sij in si))
 
         npart = CodegenVariable(name='nparticles', ctype=itype, typegen=tg, value=nparticles)
 
         cache_ghosts = CodegenVariable('cache_ghosts', itype, typegen=tg,
-                            value=min_ghosts,
-                            symbolic_mode=symbolic_mode)
-        cache_width  = CodegenVariable('cache_width', itype, typegen=tg,
-                init='{}*{} + 2*{}'.format(npart, local_size[0], cache_ghosts))
+                                       value=min_ghosts,
+                                       symbolic_mode=symbolic_mode)
+        cache_width = CodegenVariable('cache_width', itype, typegen=tg,
+                                      init='{}*{} + 2*{}'.format(npart, local_size[0], cache_ghosts))
 
         local_work = CodegenVariable('lwork', 'int', tg, const=True,
-                init='{}*{}'.format(nparticles,local_size[0]))
+                                     init='{}*{}'.format(nparticles, local_size[0]))
 
-        local_offset = CodegenVariable('local_offset',itype,tg)
-        line_offset = CodegenVariable('line_offset',itype,tg)
-        particle_offset = CodegenVariable('particle_offset',itype,tg)
+        local_offset = CodegenVariable('local_offset', itype, tg)
+        line_offset = CodegenVariable('line_offset', itype, tg)
+        particle_offset = CodegenVariable('particle_offset', itype, tg)
 
         def _line_init(base_ptr, global_id, grid_strides):
-            _id=''
+            _id = ''
             if work_dim > 1:
                 _id += '$ + ({} $* {})'.format(global_id[work_dim-1], grid_strides[work_dim-1])
                 for i in xrange(work_dim-2, 0, -1):
@@ -494,16 +496,16 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
             return '{}{}'.format(base_ptr, _id)
 
         line_position = position.newvar('line_position',
-                init=_line_init(position, position_global_id, position_strides))
+                                        init=_line_init(position, position_global_id, position_strides))
 
-        line_scalars_in  = []
+        line_scalars_in = []
         line_scalars_data_out = []
         for (Si, Si_global_id, Si_grid_strides) in \
                 zip(scalars_data_in, scalars_in_global_id, scalars_strides_in):
             Li = []
             for (Sij, Sij_strides) in zip(Si, Si_grid_strides):
                 lij = Sij.newvar('line_{}'.format(Sij.name),
-                        init=_line_init(Sij, Si_global_id, Sij_strides))
+                                 init=_line_init(Sij, Si_global_id, Sij_strides))
                 Li.append(lij)
             line_scalars_in.append(tuple(Li))
         for (Si, Si_global_id, Si_grid_strides) in \
@@ -511,10 +513,10 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
             Li = []
             for (Sij, Sij_strides) in zip(Si, Si_grid_strides):
                 lij = Sij.newvar('line_{}'.format(Sij.name),
-                        init=_line_init(Sij, Si_global_id, Sij_strides))
+                                 init=_line_init(Sij, Si_global_id, Sij_strides))
                 Li.append(lij)
             line_scalars_data_out.append(tuple(Li))
-        line_scalars_in  = tuple(line_scalars_in)
+        line_scalars_in = tuple(line_scalars_in)
         line_scalars_data_out = tuple(line_scalars_data_out)
 
         line_vars = ((line_position,),) + line_scalars_in
@@ -526,85 +528,85 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
         if local_size_known:
             L = s.known_vars['local_size']
             for i in xrange(nfields):
-                Si  = []
+                Si = []
                 BSi = []
                 for j in xrange(group_scalars[i]):
-                    Sij = CodegenArray(name='S{}_{}'.format(i,j),
-                                dim=1, ctype=ftype, typegen=tg,
-                                shape=(nparticles*L[0]+2*min_ghosts,), storage=self._local)
+                    Sij = CodegenArray(name='S{}_{}'.format(i, j),
+                                       dim=1, ctype=ftype, typegen=tg,
+                                       shape=(nparticles*L[0]+2*min_ghosts,), storage=self._local)
                     Si.append(Sij)
 
                 cached_scalars.append(tuple(Si))
         else:
             buf = self.vars['buffer']
-            k=0
+            k = 0
             for i in xrange(nfields):
-                Si  = []
+                Si = []
                 BSi = []
                 for j in xrange(group_scalars[i]):
-                    Sij = CodegenVariable(name='S{}_{}'.format(i,j),ctype=ftype,typegen=tg,
-                                ptr_restrict=True, ptr=True, storage=self._local,
-                                ptr_const=True,
-                                init='{} + {}*{}'.format(buf,k,cache_width))
+                    Sij = CodegenVariable(name='S{}_{}'.format(i, j), ctype=ftype, typegen=tg,
+                                          ptr_restrict=True, ptr=True, storage=self._local,
+                                          ptr_const=True,
+                                          init='{} + {}*{}'.format(buf, k, cache_width))
                     Si.append(Sij)
 
-                    k+=1
+                    k += 1
                 cached_scalars.append(tuple(Si))
-        cache_scalars    = tuple(cached_scalars)
+        cache_scalars = tuple(cached_scalars)
         boundary_scalars = tuple(boundary_scalars)
 
-        pos         = CodegenVectorClBuiltin('p', ftype, nparticles, tg)
-        tuning_pos  = CodegenVectorClBuiltin('tp', ftype, nparticles, tg)
+        pos = CodegenVectorClBuiltin('p', ftype, nparticles, tg)
+        tuning_pos = CodegenVectorClBuiltin('tp', ftype, nparticles, tg)
         scalars = []
         for i in xrange(nfields):
             si = []
             for j in xrange(group_scalars[i]):
-                sij = CodegenVectorClBuiltin('s{}_{}'.format(i,j), ftype, nparticles, tg)
+                sij = CodegenVectorClBuiltin('s{}_{}'.format(i, j), ftype, nparticles, tg)
                 si.append(sij)
             scalars.append(tuple(si))
         scalars = tuple(scalars)
 
-
-        vzero   = CodegenVectorClBuiltin('vzero',   ftype, nparticles, tg,
-                value=np.zeros(shape=(nparticles,), dtype=np.float64))
+        vzero = CodegenVectorClBuiltin('vzero',   ftype, nparticles, tg,
+                                       value=np.zeros(shape=(nparticles,), dtype=np.float64))
 
         kmax = CodegenVariable('kmax', itype, tg, const=True,
-                init='(({}+{lwork}-1)/{lwork})'.format(
-                            compute_grid_size[0], lwork=local_work))
+                               init='(({}+{lwork}-1)/{lwork})'.format(
+                                   compute_grid_size[0], lwork=local_work))
 
-        loopvars      = 'kji'
-        first         = CodegenVariable('first', 'bool', tg, const=True, init='({}==0)'.format(loopvars[0]))
-        last          = CodegenVariable('last',  'bool', tg, const=True, init='({}=={}-1)'.format(loopvars[0],kmax))
-        active        = CodegenVariable('active','bool', tg, const=True)
-        last_active   = CodegenVariable('last_active','bool', tg, const=True)
+        loopvars = 'kji'
+        first = CodegenVariable('first', 'bool', tg, const=True, init='({}==0)'.format(loopvars[0]))
+        last = CodegenVariable('last',  'bool', tg, const=True,
+                               init='({}=={}-1)'.format(loopvars[0], kmax))
+        active = CodegenVariable('active', 'bool', tg, const=True)
+        last_active = CodegenVariable('last_active', 'bool', tg, const=True)
 
         last_particle = CodegenVariable('last_particle', itype, tg, const=True,
-                init='{} - {}*({}-1)*{}'.format(compute_grid_size[0], nparticles, kmax, local_size[0]))
+                                        init='{} - {}*({}-1)*{}'.format(compute_grid_size[0], nparticles, kmax, local_size[0]))
 
         @contextlib.contextmanager
         def _work_iterate_(i):
             try:
-                if i==0:
+                if i == 0:
                     fval = '0'
                     gsize = '1'
                     N = kmax
-                    ghosts = '({}-{})'.format(position_grid_ghosts[i],cache_ghosts)
+                    ghosts = '({}-{})'.format(position_grid_ghosts[i], cache_ghosts)
                 else:
                     fval = global_id.fval(i)
                     gsize = global_size[i]
-                    N      = '{Sx}'.format(Sx=compute_grid_size[i])
+                    N = '{Sx}'.format(Sx=compute_grid_size[i])
                     ghosts = position_grid_ghosts[i]
 
                 with s._for_('int {i}={fval}; {i}<{N}; {i}+={gsize}'.format(
-                    i=loopvars[i], fval=fval, gsize=gsize,N=N),
-                        unroll=(i==0) and unroll_loops) as ctx:
+                    i=loopvars[i], fval=fval, gsize=gsize, N=N),
+                        unroll=(i == 0) and unroll_loops) as ctx:
 
-                    if i==0:
+                    if i == 0:
                         with s._align_() as al:
                             line_offset.declare(al, align=True, const=True, init='{}*{}'.format(
                                 loopvars[0], local_work))
                             local_offset.declare(al, align=True, const=True, init='{}*{}'.format(
-                                nparticles,local_id[0]))
+                                nparticles, local_id[0]))
                             particle_offset.declare(al, align=True, const=True, init='{} + {}'.format(
                                 line_offset, local_offset))
                         s.jumpline()
@@ -612,19 +614,19 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                             first.declare(al, align=True)
                             last.declare(al, align=True)
                             active.declare(al, init='({} < {})'.format(particle_offset,
-                                compute_grid_size[0]), align=True)
+                                                                       compute_grid_size[0]), align=True)
                             last_active.declare(al, init='{} && ({}+{}-1 >= {})'.format(active, particle_offset,
-                                nparticles, compute_grid_size[0]), align=True)
-                    elif i==1:
+                                                                                        nparticles, compute_grid_size[0]), align=True)
+                    elif i == 1:
                         kmax.declare(s)
                         last_particle.declare(s)
                     s.jumpline()
 
-                    if i>0:
+                    if i > 0:
                         with s._align_() as al:
-                            for field_gid, field_ghosts in zip(global_ids, grid_ghosts) :
-                                    al.append('{} $= {} $+ {};'.format(field_gid[i],
-                                        loopvars[i], field_ghosts[i]))
+                            for field_gid, field_ghosts in zip(global_ids, grid_ghosts):
+                                al.append('{} $= {} $+ {};'.format(field_gid[i],
+                                                                   loopvars[i], field_ghosts[i]))
                             al.jumpline()
                     yield ctx
             except:
@@ -651,7 +653,7 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
 
         def if_first_or_last_thread_active():
             #cond= '{} || {}'.format(last_active, first)
-            cond= '{}'.format(last_active)
+            cond = '{}'.format(last_active)
             return if_last_thread_active(cond)
 
         with s._kernel_():
@@ -683,15 +685,15 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
             s.decl_vars(*global_ids)
 
             with s._align_() as al:
-                _pos=(pos,)
+                _pos = (pos,)
                 if tuning_mode:
-                    _pos+=(tuning_pos,)
-                s.decl_vars(*(_pos+tuple(sij for si in scalars for sij in si)), align=True, codegen=al)
+                    _pos += (tuning_pos,)
+                s.decl_vars(*(_pos+tuple(sij for si in scalars for sij in si)),
+                            align=True, codegen=al)
             s.jumpline()
 
-
-            nested_loops = [_work_iterate_(i) for i in xrange(dim-1,-1,-1)]
-            if work_dim==1:
+            nested_loops = [_work_iterate_(i) for i in xrange(dim-1, -1, -1)]
+            if work_dim == 1:
                 kmax.declare(s)
                 last_particle.declare(s)
             with contextlib.nested(*nested_loops):
@@ -699,38 +701,39 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                 s.comment('Compute global offsets and line pointers')
                 with s._align_() as al:
                     position_global_id.affect(al, i=0, align=True,
-                            init='{} + {}'.format(particle_offset, position_grid_ghosts[0]))
+                                              init='{} + {}'.format(particle_offset, position_grid_ghosts[0]))
                     if not is_inplace:
                         for _gid, _ghosts in zip(scalars_in_global_id, scalars_in_grid_ghosts):
                             _gid.affect(al, i=0, align=True,
-                                init='{} + {}'.format(particle_offset, _ghosts[0]))
+                                        init='{} + {}'.format(particle_offset, _ghosts[0]))
                     for _gid, _ghosts in zip(scalars_data_out_global_id, scalars_data_out_grid_ghosts):
                         _gid.affect(al, i=0, align=True,
-                            init='{} + {} - {}'.format(particle_offset, _ghosts[0],
-                                cache_ghosts))
+                                    init='{} + {} - {}'.format(particle_offset, _ghosts[0],
+                                                               cache_ghosts))
                 s.jumpline()
                 s.decl_aligned_vars(*(lvij for lvi in line_vars for lvij in lvi))
 
                 s.comment('Get back left cache from right cache')
                 with s._for_('int l={}; l<2*{}; l+={}'.format(local_id[0],
-                    cache_ghosts, local_size[0]), unroll=unroll_loops):
+                                                              cache_ghosts, local_size[0]), unroll=unroll_loops):
                     with s._if_('{}'.format(first)):
-                       with s._align_() as al:
-                           for csi in cached_scalars:
-                               for csij in csi:
-                                   csij.affect(al, align=True,
-                                       i='l',
-                                       init=tg.dump(+0.0))
+                        with s._align_() as al:
+                            for csi in cached_scalars:
+                                for csij in csi:
+                                    csij.affect(al, align=True,
+                                                i='l',
+                                                init=tg.dump(+0.0))
                     with s._else_():
-                        end_id='{}+{}'.format(local_work, 'l')
+                        end_id = '{}+{}'.format(local_work, 'l')
                         if debug_mode:
-                            s.append('printf("%i loaded back %2.2f from position %i.\\n", {}, {}, {});'.format(local_id[0], cached_scalars[0][0][end_id], end_id))
+                            s.append('printf("%i loaded back %2.2f from position %i.\\n", {}, {}, {});'.format(
+                                local_id[0], cached_scalars[0][0][end_id], end_id))
                         with s._align_() as al:
                             for csi in cached_scalars:
-                               for csij in csi:
+                                for csij in csi:
                                     csij.affect(al, align=True,
-                                        i='l',
-                                        init=csij[end_id])
+                                                i='l',
+                                                init=csij[end_id])
                 s.barrier(_local=True)
                 s.jumpline()
 
@@ -741,14 +744,17 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                         s.append(s.vstore(nparticles, csij, offset, vzero))
                 s.jumpline()
 
-                s.comment('Load position and scalars at current index, {} particles at a time.'.format(nparticles))
-                for al, active_cond, k in  if_last_thread_active():
-                    load = self.vload(1, line_position, '{}+{}'.format(particle_offset,k), align=True)
+                s.comment(
+                    'Load position and scalars at current index, {} particles at a time.'.format(nparticles))
+                for al, active_cond, k in if_last_thread_active():
+                    load = self.vload(1, line_position,
+                                      '{}+{}'.format(particle_offset, k), align=True)
                     if use_short_circuit:
-                        code = '{} $= ({} $? {} $: {});'.format(pos[k], active_cond, load, tg.dump(0.0))
+                        code = '{} $= ({} $? {} $: {});'.format(
+                            pos[k], active_cond, load, tg.dump(0.0))
                     else:
                         code = 'if ({}) {{ {posk} $= {}; $}} $else {{ {posk} $= {}; $}}'.format(active_cond,
-                                load, tg.dump(0.0), posk=pos[k])
+                                                                                                load, tg.dump(0.0), posk=pos[k])
                     al.append(code)
                     if is_inplace:
                         _id = '{}+{}+{}'.format(particle_offset, cache_ghosts, k)
@@ -758,15 +764,16 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                         for sij, line_sij in zip(si, line_si):
                             load = self.vload(1, line_sij, _id, align=True)
                             if use_short_circuit:
-                                code = '{} $= ({} $? {} $: {});'.format(sij[k], active_cond, load, tg.dump(0.0))
+                                code = '{} $= ({} $? {} $: {});'.format(
+                                    sij[k], active_cond, load, tg.dump(0.0))
                             else:
                                 code = 'if ({}) {{ {sijk} $= {}; $}} $else {{ {sijk} $= {}; $}}'.format(active_cond,
-                                        load, tg.dump(0.0), sijk=sij[k])
+                                                                                                        load, tg.dump(0.0), sijk=sij[k])
                             al.append(code)
                 with if_thread_active():
                     with s._align_() as al:
                         pos.affect(al, align=True,
-                                init=self.vload(nparticles, line_position, particle_offset, align=True))
+                                   init=self.vload(nparticles, line_position, particle_offset, align=True))
 
                         if is_inplace:
                             _id = '{}+{}'.format(particle_offset, cache_ghosts)
@@ -775,7 +782,7 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                         for si, line_si in zip(scalars, line_scalars_in):
                             for sij, line_sij in zip(si, line_si):
                                 sij.affect(al, align=True,
-                                        init=self.vload(nparticles, line_sij, _id, align=True))
+                                           init=self.vload(nparticles, line_sij, _id, align=True))
                 with s._else_():
                     for k in xrange(nparticles):
                         code = '{} = NAN;'.format(pos[k])
@@ -790,8 +797,8 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
 
                 if tuning_mode:
                     code = '{} = max(min({pos}, ({pid}+{one})*{dx}), ({pid}-{one})*{dx});'.format(
-                            tuning_pos, pos=pos, dx=dx,
-                            pid=particle_offset, one=tg.dump(1.0))
+                        tuning_pos, pos=pos, dx=dx,
+                        pid=particle_offset, one=tg.dump(1.0))
                     s.append(code)
 
                 if debug_mode:
@@ -799,37 +806,38 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                         s.append('printf("\\n");')
                         s.append('printf("\\nGLOBAL SCALAR VALUES\\n");')
                         with s._for_('int ii=0; ii<{}; ii++'.format(compute_grid_size[0])):
-                            s.append('printf("%2.2f, ", {}[cache_ghosts+ii]);'.format(line_scalars_in[0][0]))
+                            s.append(
+                                'printf("%2.2f, ", {}[cache_ghosts+ii]);'.format(line_scalars_in[0][0]))
                         s.append('printf("\\n\\n");')
 
                     with s._ordered_wi_execution_(barrier=True):
-                        s.append('printf("lid.x=%i, k=%i/%i, line_offset=%i, local_offset=%i, poffset=%i, first=%i, last=%i, active=%i, last_active=%i, p={vnf}, s0={vnf}\\n", {},k,kmax,line_offset,local_offset,particle_offset,first,last,active,last_active,{},{});'.format(local_id[0], epv(pos), epv(scalars[0][0]),  vnf=vnf))
+                        s.append('printf("lid.x=%i, k=%i/%i, line_offset=%i, local_offset=%i, poffset=%i, first=%i, last=%i, active=%i, last_active=%i, p={vnf}, s0={vnf}\\n", {},k,kmax,line_offset,local_offset,particle_offset,first,last,active,last_active,{},{});'.format(
+                            local_id[0], epv(pos), epv(scalars[0][0]),  vnf=vnf))
                     s.barrier(_local=True)
-                    
+
                     with s._first_wi_execution_():
                         s.append('printf("\\nLEFT SCALAR CACHE (BEFORE REMESH)\\n");')
                         with s._for_('int ii=0; ii<cache_width; ii++'):
                             s.append('printf("%2.2f, ", S0_0[ii]);')
                         s.append('printf("\\n");')
 
-
                 s.comment('Remesh scalars in cache.')
                 with s._block_():
                     remesh = s.reqs['remesh']
                     remesh_kargs = {
-                        'dx':dx,
-                        'inv_dx':inv_dx,
+                        'dx': dx,
+                        'inv_dx': inv_dx,
                         'cache_width': cache_width,
                         'cache_ghosts': cache_ghosts,
                         'active': active,
                         'line_offset': line_offset
                     }
 
-                    k=0
+                    k = 0
                     for ci in cached_scalars:
                         for cij in ci:
                             remesh_kargs['S{}'.format(k)] = cij
-                            k+=1
+                            k += 1
 
                     if use_atomics:
                         # with atomic summation in cache, we can remesh everything at a time
@@ -837,11 +845,11 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                             remesh_kargs['p'] = tuning_pos
                         else:
                             remesh_kargs['p'] = pos
-                        k=0
+                        k = 0
                         for si in scalars:
                             for sij in si:
                                 remesh_kargs['s{}'.format(k)] = sij
-                                k+=1
+                                k += 1
                         call = remesh(**remesh_kargs)
                         s.append('{};'.format(call))
                     else:
@@ -849,16 +857,17 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                         for p in xrange(nparticles):
                             if debug_mode:
                                 with s._first_wi_execution_():
-                                    s.append('printf("\\nREMESHING PARTICLES {}*k+{}:\\n");'.format(nparticles, p))
+                                    s.append(
+                                        'printf("\\nREMESHING PARTICLES {}*k+{}:\\n");'.format(nparticles, p))
                             if tuning_mode:
                                 remesh_kargs['p'] = tuning_pos[p]
                             else:
                                 remesh_kargs['p'] = pos[p]
-                            k=0
+                            k = 0
                             for si in scalars:
                                 for sij in si:
                                     remesh_kargs['s{}'.format(k)] = sij[p]
-                                    k+=1
+                                    k += 1
                             call = remesh(**remesh_kargs)
                             s.append('{};'.format(call))
                 s.barrier(_local=True, _global=True)
@@ -879,21 +888,22 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                 for al, active_cond, k in if_first_or_last_thread_active():
                     for si, li in zip(cached_scalars, line_scalars_data_out):
                         for sij, lij in zip(si, li):
-                            load  = self.vload(1, sij, '{}+{}'.format(local_offset, k))
+                            load = self.vload(1, sij, '{}+{}'.format(local_offset, k))
                             _id = '{}+{}'.format(particle_offset, k)
-                            store = self.vstore(1, lij, _id, load, align=True, jmp=True, suppress_semicolon=True)
+                            store = self.vstore(1, lij, _id, load, align=True,
+                                                jmp=True, suppress_semicolon=True)
                             if use_short_circuit:
                                 code = '{} $&& (({}),true)'.format(active_cond, store)
                                 if debug_mode:
                                     code += ' && (printf("last_active %i wrote %2.2f from cache position %i to global id %i.\\n", {},{},{},{}));'.format(
-                                            local_id[0], load, '{}+{}'.format(local_offset,k), _id)
+                                            local_id[0], load, '{}+{}'.format(local_offset, k), _id)
                                 else:
                                     code += ';'
                             else:
                                 code = 'if ({}) ${{ {}; '.format(active_cond, store)
                                 if debug_mode:
                                     code += 'printf("last_active %i wrote %2.2f from cache position %i to global id %i.\\n", {},{},{},{});'.format(
-                                            local_id[0], load, '{}+{}'.format(local_offset,k), _id)
+                                            local_id[0], load, '{}+{}'.format(local_offset, k), _id)
                                 code += '}'
                             al.append(code)
                 with if_thread_active(not_first=False):
@@ -906,73 +916,78 @@ class DirectionalRemeshKernelGenerator(KernelCodeGenerator):
                                 vnf=vnf))
                         for ci, li in zip(cached_scalars, line_scalars_data_out):
                             for cij, lij in zip(ci, li):
-                                load  = self.vload(nparticles, cij, '{}'.format(local_offset))
+                                load = self.vload(nparticles, cij, '{}'.format(local_offset))
                                 _id = particle_offset
-                                store = self.vstore(nparticles, lij, _id, load, align=True, jmp=True)
+                                store = self.vstore(nparticles, lij, _id,
+                                                    load, align=True, jmp=True)
                                 al.append(store)
 
                 s.jumpline()
 
                 with s._if_('{}'.format(last)):
                     with s._for_('int l={}; l<2*{}; l+={}'.format(local_id[0],
-                        cache_ghosts, local_size[0]), unroll=unroll_loops):
+                                                                  cache_ghosts, local_size[0]), unroll=unroll_loops):
                         with s._align_() as al:
                             _gid = '{}+{}+{}'.format(line_offset, last_particle, 'l')
                             for ci, li in zip(cached_scalars, line_scalars_data_out):
-                                for cij, lij in zip(ci,li):
-                                    init=cij['{}+{}'.format(last_particle, 'l')]
+                                for cij, lij in zip(ci, li):
+                                    init = cij['{}+{}'.format(last_particle, 'l')]
                                     lij.affect(i=_gid, init=init, align=True, codegen=al)
                             if debug_mode:
                                 with s._ordered_wi_execution_(barrier=False):
                                     s.append('printf("%i initiated last write from cache position %i+%i=%i to global id %i+%i+%i=%i with value %2.2f.\\n", {}, {},{},{}, {},{},{},{}, {});'.format(
-                                        local_id[0], last_particle, 'l', '{}+l'.format(last_particle),
-                                        line_offset, last_particle, 'l', '{}+{}+l'.format(line_offset, last_particle),
+                                        local_id[0], last_particle, 'l', '{}+l'.format(
+                                            last_particle),
+                                        line_offset, last_particle, 'l', '{}+{}+l'.format(
+                                            line_offset, last_particle),
                                         'S0_0[{}+l]'.format(last_particle)))
                 s.barrier(_local=True, _global=True)
 
+
 if __name__ == '__main__':
     from hysop.backend.device.opencl import cl
     from hysop.backend.device.codegen.base.test import _test_mesh_info, _test_typegen
     from hysop.numerics.remesh.remesh import RemeshKernel
 
-    kernel = RemeshKernel(4,2, split_polys=False)
+    kernel = RemeshKernel(4, 2, split_polys=False)
 
-    work_dim=3
-    ghosts=(0,0,0)
-    sresolution =(1024,512,256)
-    local_size  = (128,1,1)
-    global_size = (16050,55,440)
+    work_dim = 3
+    ghosts = (0, 0, 0)
+    sresolution = (1024, 512, 256)
+    local_size = (128, 1, 1)
+    global_size = (16050, 55, 440)
 
     tg = _test_typegen('float', 'hex')
-    (_,smesh_info) = _test_mesh_info('scalars_mesh_info',tg,work_dim,ghosts,sresolution)
-    (_,pmesh_info) = _test_mesh_info('position_mesh_info',tg,work_dim,ghosts,sresolution)
+    (_, smesh_info) = _test_mesh_info('scalars_mesh_info', tg, work_dim, ghosts, sresolution)
+    (_, pmesh_info) = _test_mesh_info('position_mesh_info', tg, work_dim, ghosts, sresolution)
 
     scalar_cfl = 1.5
 
     dak = DirectionalRemeshKernelGenerator(typegen=tg, ftype=tg.fbtype,
-        work_dim=work_dim,
-        nparticles=4,
-        nscalars=2,
-        remesh_kernel=kernel,
-        scalar_cfl=scalar_cfl,
-        use_atomics=False,
-        is_inplace=True,
-        symbolic_mode=False,
-        debug_mode=False,
-        tuning_mode=False,
-        sboundary=(BoundaryCondition.NONE, BoundaryCondition.NONE),
-        known_vars=dict(
-            S0_inout_mesh_info=smesh_info,
-            S1_inout_mesh_info=smesh_info,
-            S0_in_mesh_info=smesh_info,
-            S1_in_mesh_info=smesh_info,
-            S0_out_mesh_info=smesh_info,
-            S1_out_mesh_info=smesh_info,
-            position_mesh_info=pmesh_info,
-            local_size=local_size[:work_dim],
-            global_size=global_size[:work_dim]
-        )
-    )
+                                           work_dim=work_dim,
+                                           nparticles=4,
+                                           nscalars=2,
+                                           remesh_kernel=kernel,
+                                           scalar_cfl=scalar_cfl,
+                                           use_atomics=False,
+                                           is_inplace=True,
+                                           symbolic_mode=False,
+                                           debug_mode=False,
+                                           tuning_mode=False,
+                                           sboundary=(BoundaryCondition.NONE,
+                                                      BoundaryCondition.NONE),
+                                           known_vars=dict(
+                                               S0_inout_mesh_info=smesh_info,
+                                               S1_inout_mesh_info=smesh_info,
+                                               S0_in_mesh_info=smesh_info,
+                                               S1_in_mesh_info=smesh_info,
+                                               S0_out_mesh_info=smesh_info,
+                                               S1_out_mesh_info=smesh_info,
+                                               position_mesh_info=pmesh_info,
+                                               local_size=local_size[:work_dim],
+                                               global_size=global_size[:work_dim]
+                                           )
+                                           )
 
     print 'scalars_out_min_gosts = {}'.format(dak.scalars_out_cache_ghosts(scalar_cfl, kernel))
     print 'required cache: {}'.format(dak.required_workgroup_cache_size(local_size))
diff --git a/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py b/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py
index aee89fd51eec7d32aa3a147e85fdaf8a78eb8911..4f92191c477f63c9b5f83ac01799983ec7cfb07a 100644
--- a/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py
+++ b/hysop/backend/device/opencl/autotunable_kernels/remesh_dir.py
@@ -1,10 +1,10 @@
-
 from hysop.deps import warnings
 from hysop.tools.numpywrappers import npw
 from hysop.tools.types import check_instance
 from hysop.tools.misc import upper_pow2_or_3
 from hysop.constants import AutotunerFlags, BoundaryCondition
 from hysop.numerics.remesh.remesh import RemeshKernel
+from hysop.numerics.remesh.kernel_generator import Kernel
 from hysop.fields.cartesian_discrete_field import CartesianDiscreteScalarFieldView
 
 from hysop.backend.device.codegen import CodeGeneratorWarning
@@ -15,16 +15,17 @@ from hysop.backend.device.opencl.opencl_array import OpenClArray
 from hysop.backend.device.opencl.opencl_autotunable_kernel import OpenClAutotunableKernel
 from hysop.backend.device.kernel_autotuner import KernelGenerationError
 
+
 class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
     """Autotunable interface for directional remeshing kernel code generators."""
 
-    def autotune(self, direction, scalar_cfl, 
-            position, scalars_in, scalars_out, is_inplace,
-            remesh_kernel, remesh_criteria_eps,
-            force_atomics, relax_min_particles, 
-            hardcode_arrays, **kwds):
+    def autotune(self, direction, scalar_cfl,
+                 position, scalars_in, scalars_out, is_inplace,
+                 remesh_kernel, remesh_criteria_eps,
+                 force_atomics, relax_min_particles,
+                 hardcode_arrays, **kwds):
         """Autotune this kernel with specified configuration.
-        
+
         hardcode_arrays means that array offset and strides can be hardcoded
         into the kernels as constants.
         """
@@ -32,152 +33,153 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
         check_instance(scalars_out, tuple, values=CartesianDiscreteScalarFieldView)
 
         precision = self.typegen.dtype
-           
+
         dim = position.dim
         if not (1 <= dim <= 3):
-            msg='Dimension mismatch {}.'.format(dim)
+            msg = 'Dimension mismatch {}.'.format(dim)
             raise ValueError(msg)
         if not (0 <= direction < dim):
-            msg='Invalid direction {}.'.format(direction)
+            msg = 'Invalid direction {}.'.format(direction)
             raise ValueError(msg)
         if not issubclass(precision, npw.floating):
-            msg='Precision is not a npw.floating subtype, got {}.'.format(precision)
+            msg = 'Precision is not a npw.floating subtype, got {}.'.format(precision)
             raise TypeError(msg)
-        if not isinstance(scalar_cfl, float) or scalar_cfl<=0.0:
-            msg='Invalid scalar_cfl value {}.'.format(scalar_cfl)
+        if not isinstance(scalar_cfl, float) or scalar_cfl <= 0.0:
+            msg = 'Invalid scalar_cfl value {}.'.format(scalar_cfl)
             raise ValueError(msg)
-        if len(scalars_in)!=len(scalars_out):
+        if len(scalars_in) != len(scalars_out):
             raise ValueError('Unmatched scalars input/output.')
-        if not isinstance(remesh_kernel, RemeshKernel):
-            msg='Invalid remesh_kernel type {}.'.format(type(remesh_kernel))
+        if not isinstance(remesh_kernel, RemeshKernel) and not isinstance(remesh_kernel, Kernel):
+            msg = 'Invalid remesh_kernel type {}.'.format(type(remesh_kernel))
             raise TypeError(msg)
         if (remesh_criteria_eps is not None) and \
-                (not isinstance(remesh_criteria_eps, float) or \
-                (remesh_criteria_eps<0.0)):
-            msg='Invalid remesh_criteria_eps value {}.'.format(remesh_criteria_eps)
+                (not isinstance(remesh_criteria_eps, float) or
+                 (remesh_criteria_eps < 0.0)):
+            msg = 'Invalid remesh_criteria_eps value {}.'.format(remesh_criteria_eps)
             raise ValueError(msg)
         if not isinstance(force_atomics, bool):
-            msg='Invalid force_atomics value {}, should be a boolean.'
-            msg=msg.format(force_atomics)
+            msg = 'Invalid force_atomics value {}, should be a boolean.'
+            msg = msg.format(force_atomics)
             raise ValueError(msg)
         if not isinstance(relax_min_particles, bool):
-            msg='Invalid relax_min_particles value {}, should be a boolean.'
-            msg=msg.format(relax_min_particles)
+            msg = 'Invalid relax_min_particles value {}, should be a boolean.'
+            msg = msg.format(relax_min_particles)
             raise ValueError(msg)
         if (force_atomics and relax_min_particles):
-            msg= 'Cannot relax min particles when force_atomics is set because '
-            msg+='there is no minimum particles to be used when using atomic accumulators.'
+            msg = 'Cannot relax min particles when force_atomics is set because '
+            msg += 'there is no minimum particles to be used when using atomic accumulators.'
             raise ValueError(msg)
         if (position.dtype != precision):
-            #TODO implement mixed precision kernels
-            msg='Array type for position {} do not match required operator precision {}.'
-            msg=msg.format(position.dtype, precision.__name__)
+            # TODO implement mixed precision kernels
+            msg = 'Array type for position {} do not match required operator precision {}.'
+            msg = msg.format(position.dtype, precision.__name__)
             raise NotImplementedError(msg)
-        
-        pshape        = position.compute_resolution
-        work_size     = npw.asarray(pshape, dtype=npw.int32).copy()
-        work_dim      = work_size.size
+
+        pshape = position.compute_resolution
+        work_size = npw.asarray(pshape, dtype=npw.int32).copy()
+        work_dim = work_size.size
         group_scalars = tuple(dfield.nb_components for dfield in scalars_in)
-        nfields       = len(group_scalars)
-        nscalars      = sum(group_scalars)
-        ftype         = clTools.dtype_to_ctype(precision)
-        
+        nfields = len(group_scalars)
+        nscalars = sum(group_scalars)
+        ftype = clTools.dtype_to_ctype(precision)
+
         min_s_ghosts = DirectionalRemeshKernelGenerator.scalars_out_cache_ghosts(scalar_cfl,
-                remesh_kernel)
-        
+                                                                                 remesh_kernel)
+
         min_nparticles = 2 if relax_min_particles else int(2*npw.ceil(scalar_cfl))
-        
-        assert (min_s_ghosts>=1)
-        assert (min_nparticles>=2)
-        
+
+        assert (min_s_ghosts >= 1)
+        assert (min_nparticles >= 2)
+
         name = DirectionalRemeshKernelGenerator.codegen_name(work_dim, remesh_kernel,
-                ftype, min_nparticles, nscalars, remesh_criteria_eps, False, is_inplace)
+                                                             ftype, min_nparticles, nscalars, remesh_criteria_eps, False, is_inplace)
 
         for dsin, dsout in zip(scalars_in, scalars_out):
             if (dsin.nb_components != dsout.nb_components):
-                msg='Components mismatch between input field {} and output field {}, '
-                msg+='got input={}, output={}, cannot remesh.'
-                msg=msg.format(dsin.name, dsout.name, dsin.nb_components, dsout.nb_components)
+                msg = 'Components mismatch between input field {} and output field {}, '
+                msg += 'got input={}, output={}, cannot remesh.'
+                msg = msg.format(dsin.name, dsout.name, dsin.nb_components, dsout.nb_components)
                 raise ValueError(msg)
             if (dsin.compute_resolution != pshape).any() or \
                     (dsout.compute_resolution != pshape).any():
-                msg='Resolution mismatch between particles and scalars, '
-                msg+='got input={}, output={} but pshape={}, cannot remesh.'
-                msg=msg.format(dsin.compute_resolution, dsout.compute_resolution, pshape)
+                msg = 'Resolution mismatch between particles and scalars, '
+                msg += 'got input={}, output={} but pshape={}, cannot remesh.'
+                msg = msg.format(dsin.compute_resolution, dsout.compute_resolution, pshape)
                 raise ValueError(msg)
             if (dsout.ghosts[-1] < min_s_ghosts):
-                msg= 'Given boundary condition implies minimum ghosts numbers to be at '
-                msg+='least {} in remeshed direction for output scalar but only {} ghosts '
-                msg+='are present in the grid for output scalar {}.'
-                msg=msg.format(min_s_ghosts, dsout.ghosts[-1], dsout.name)
+                msg = 'Given boundary condition implies minimum ghosts numbers to be at '
+                msg += 'least {} in remeshed direction for output scalar but only {} ghosts '
+                msg += 'are present in the grid for output scalar {}.'
+                msg = msg.format(min_s_ghosts, dsout.ghosts[-1], dsout.name)
                 raise ValueError(msg)
-            if is_inplace and (dsin.dfield !=dsout.dfield):
-                msg='Remeshing inplace but input and output discrete Field differs '
-                msg+='for {} and {}.'.format(dsin.name, dsout.name)
+            if is_inplace and (dsin.dfield != dsout.dfield):
+                msg = 'Remeshing inplace but input and output discrete Field differs '
+                msg += 'for {} and {}.'.format(dsin.name, dsout.name)
                 raise ValueError(msg)
-            if (dsin.dtype != precision) or (dsout.dtype!=precision):
-                #TODO implement mixed precision kernels
-                msg='Array types ({}={}, {}={}) do not match required operator precision {}.'
-                msg=msg.format(dsin.name, dsin.dtype, dsout.name, dsout.dtype, precision.__name__)
+            if (dsin.dtype != precision) or (dsout.dtype != precision):
+                # TODO implement mixed precision kernels
+                msg = 'Array types ({}={}, {}={}) do not match required operator precision {}.'
+                msg = msg.format(dsin.name, dsin.dtype, dsout.name, dsout.dtype, precision.__name__)
                 raise NotImplementedError(msg)
-       
+
         make_offset, offset_dtype = self.make_array_offset()
-        make_strides, strides_dtype = self.make_array_strides(position.dim, 
-                hardcode_arrays=hardcode_arrays)
-        
+        make_strides, strides_dtype = self.make_array_strides(position.dim,
+                                                              hardcode_arrays=hardcode_arrays)
+
         kernel_args = {}
         known_args = {}
         isolation_params = {}
         mesh_info_vars = {}
         target_args = known_args if hardcode_arrays else kernel_args
 
-        kernel_args['position_base']    = position.sdata.base_data
+        kernel_args['position_base'] = position.sdata.base_data
         target_args['position_strides'] = make_strides(position.sdata.strides, position.dtype)
-        target_args['position_offset']  = make_offset(position.sdata.offset, position.dtype)
+        target_args['position_offset'] = make_offset(position.sdata.offset, position.dtype)
         mesh_info_vars['position_mesh_info'] = \
-                                self.mesh_info('position_mesh_info', position.mesh)
-        isolation_params['position_base'] = dict(count=position.npoints, 
-                dtype=position.dtype, arg_value=position.sbuffer.get())
+            self.mesh_info('position_mesh_info', position.mesh)
+        isolation_params['position_base'] = dict(count=position.npoints,
+                                                 dtype=position.dtype, arg_value=position.sbuffer.get())
 
         arg_index = 1 + 2*(1-hardcode_arrays)
         if is_inplace:
-            for (i,dsinout) in enumerate(scalars_in):
+            for (i, dsinout) in enumerate(scalars_in):
                 mi = 'S{}_inout_mesh_info'.format(i)
                 mesh_info_vars[mi] = self.mesh_info(mi, dsinout.mesh)
                 for j in xrange(dsinout.nb_components):
-                    prefix = 'S{}_{}_inout'.format(i,j)
-                    kernel_args[prefix+'_base']    = dsinout.data[j].base_data
-                    target_args[prefix+'_strides'] = make_strides(dsinout.data[j].strides, 
-                            dsinout.dtype)
-                    target_args[prefix+'_offset']  = make_offset(dsinout.data[j].offset, 
-                            dsinout.dtype)
-                    isolation_params[prefix+'_base'] = dict(count=dsinout.data[j].size, 
-                            dtype=dsinout.dtype, fill=10*i+j)
+                    prefix = 'S{}_{}_inout'.format(i, j)
+                    kernel_args[prefix+'_base'] = dsinout.data[j].base_data
+                    target_args[prefix+'_strides'] = make_strides(dsinout.data[j].strides,
+                                                                  dsinout.dtype)
+                    target_args[prefix+'_offset'] = make_offset(dsinout.data[j].offset,
+                                                                dsinout.dtype)
+                    isolation_params[prefix+'_base'] = dict(count=dsinout.data[j].size,
+                                                            dtype=dsinout.dtype, fill=10*i+j)
                     arg_index += 1 + 2*(1-hardcode_arrays)
             assert i == nfields-1
         else:
-            for (i,dsin) in enumerate(scalars_in):
+            for (i, dsin) in enumerate(scalars_in):
                 mi = 'S{}_in_mesh_info'.format(i)
                 mesh_info_vars[mi] = self.mesh_info(mi, dsin.mesh)
                 for j in xrange(dsin.nb_components):
-                    prefix = 'S{}_{}_in'.format(i,j)
-                    kernel_args[prefix+'_base']    = dsin.data[j].base_data
+                    prefix = 'S{}_{}_in'.format(i, j)
+                    kernel_args[prefix+'_base'] = dsin.data[j].base_data
                     target_args[prefix+'_strides'] = make_strides(dsin.data[j].strides, dsin.dtype)
-                    target_args[prefix+'_offset']  = make_offset(dsin.data[j].offset, dsin.dtype)
-                    isolation_params[prefix+'_base'] = dict(count=dsin.data[j].size, 
-                            dtype=dsin.dtype, fill=10*i+j)
+                    target_args[prefix+'_offset'] = make_offset(dsin.data[j].offset, dsin.dtype)
+                    isolation_params[prefix+'_base'] = dict(count=dsin.data[j].size,
+                                                            dtype=dsin.dtype, fill=10*i+j)
                     arg_index += 1 + 2*(1-hardcode_arrays)
             assert i == nfields-1
-            for (i,dsout) in enumerate(scalars_out):
+            for (i, dsout) in enumerate(scalars_out):
                 mi = 'S{}_out_mesh_info'.format(i)
                 mesh_info_vars[mi] = self.mesh_info(mi, dsout.mesh)
                 for j in xrange(dsout.nb_components):
-                    prefix = 'S{}_{}_out'.format(i,j)
-                    kernel_args[prefix+'_base']    = dsout.data[j].base_data
-                    target_args[prefix+'_strides'] = make_strides(dsout.data[j].strides, dsout.dtype)
-                    target_args[prefix+'_offset']  = make_offset(dsout.data[j].offset, dsout.dtype)
-                    isolation_params[prefix+'_base'] = dict(count=dsout.data[j].size, 
-                            dtype=dsout.dtype, fill=0)
+                    prefix = 'S{}_{}_out'.format(i, j)
+                    kernel_args[prefix+'_base'] = dsout.data[j].base_data
+                    target_args[prefix +
+                                '_strides'] = make_strides(dsout.data[j].strides, dsout.dtype)
+                    target_args[prefix+'_offset'] = make_offset(dsout.data[j].offset, dsout.dtype)
+                    isolation_params[prefix+'_base'] = dict(count=dsout.data[j].size,
+                                                            dtype=dsout.dtype, fill=0)
                     arg_index += 1 + 2*(1-hardcode_arrays)
             assert i == nfields-1
         assert len(kernel_args) == arg_index
@@ -185,17 +187,16 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
         assert arg_index == (1+2*(1-hardcode_arrays))*(1+(2-is_inplace)*nscalars)
 
         return super(OpenClAutotunableDirectionalRemeshKernel, self).autotune(name=name,
-                position=position, scalars_in=scalars_in, scalars_out=scalars_out, 
-                is_inplace=is_inplace, min_s_ghosts=min_s_ghosts,
-                precision=precision, nscalars=nscalars, group_scalars=group_scalars,
-                remesh_kernel=remesh_kernel, remesh_criteria_eps=remesh_criteria_eps,
-                force_atomics=force_atomics, min_nparticles=min_nparticles, ftype=ftype,
-                scalar_cfl=scalar_cfl, kernel_args=kernel_args, mesh_info_vars=mesh_info_vars,
-                work_dim=work_dim, work_size=work_size, 
-                known_args=known_args, hardcode_arrays=hardcode_arrays,
-                offset_dtype=offset_dtype, strides_dtype=strides_dtype,
-                isolation_params=isolation_params, **kwds)
-
+                                                                              position=position, scalars_in=scalars_in, scalars_out=scalars_out,
+                                                                              is_inplace=is_inplace, min_s_ghosts=min_s_ghosts,
+                                                                              precision=precision, nscalars=nscalars, group_scalars=group_scalars,
+                                                                              remesh_kernel=remesh_kernel, remesh_criteria_eps=remesh_criteria_eps,
+                                                                              force_atomics=force_atomics, min_nparticles=min_nparticles, ftype=ftype,
+                                                                              scalar_cfl=scalar_cfl, kernel_args=kernel_args, mesh_info_vars=mesh_info_vars,
+                                                                              work_dim=work_dim, work_size=work_size,
+                                                                              known_args=known_args, hardcode_arrays=hardcode_arrays,
+                                                                              offset_dtype=offset_dtype, strides_dtype=strides_dtype,
+                                                                              isolation_params=isolation_params, **kwds)
 
     def compute_args_mapping(self, extra_kwds, extra_parameters):
         """
@@ -206,87 +207,86 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
         arg_position being an int and arg_type(s) a type or
         tuple of types which will be checked against.
         """
-        
-        is_inplace      = extra_kwds['is_inplace']
-        scalars_in      = extra_kwds['scalars_in']
-        scalars_out     = extra_kwds['scalars_out']
-        nscalars        = extra_kwds['nscalars']
-        strides_dtype   = extra_kwds['strides_dtype']
-        offset_dtype    = extra_kwds['offset_dtype']
+
+        is_inplace = extra_kwds['is_inplace']
+        scalars_in = extra_kwds['scalars_in']
+        scalars_out = extra_kwds['scalars_out']
+        nscalars = extra_kwds['nscalars']
+        strides_dtype = extra_kwds['strides_dtype']
+        offset_dtype = extra_kwds['offset_dtype']
         hardcode_arrays = extra_kwds['hardcode_arrays']
-        
+
         args_mapping = {}
         arg_index = 0
 
         args_mapping['position_base'] = (0, cl.MemoryObjectHolder)
         arg_index += 1
         if not hardcode_arrays:
-            args_mapping['position_strides'] = (1, strides_dtype) 
-            args_mapping['position_offset']  = (2, offset_dtype) 
+            args_mapping['position_strides'] = (1, strides_dtype)
+            args_mapping['position_offset'] = (2, offset_dtype)
             arg_index += 2
 
         if is_inplace:
-            for (i,dsinout) in enumerate(scalars_in):
+            for (i, dsinout) in enumerate(scalars_in):
                 for j in xrange(dsinout.nb_components):
-                    prefix = 'S{}_{}_inout'.format(i,j)
+                    prefix = 'S{}_{}_inout'.format(i, j)
                     args_mapping[prefix+'_base'] = (arg_index, cl.MemoryObjectHolder)
-                    arg_index+=1
+                    arg_index += 1
                     if not hardcode_arrays:
                         args_mapping[prefix+'_strides'] = (arg_index+0, strides_dtype)
-                        args_mapping[prefix+'_offset']  = (arg_index+1, offset_dtype)
+                        args_mapping[prefix+'_offset'] = (arg_index+1, offset_dtype)
                         arg_index += 2
         else:
-            for (i,dsin) in enumerate(scalars_in):
+            for (i, dsin) in enumerate(scalars_in):
                 for j in xrange(dsin.nb_components):
-                    prefix = 'S{}_{}_in'.format(i,j)
-                    args_mapping[prefix+'_base']   = (arg_index, cl.MemoryObjectHolder)
+                    prefix = 'S{}_{}_in'.format(i, j)
+                    args_mapping[prefix+'_base'] = (arg_index, cl.MemoryObjectHolder)
                     arg_index += 1
                     if not hardcode_arrays:
                         args_mapping[prefix+'_strides'] = (arg_index+0, strides_dtype)
-                        args_mapping[prefix+'_offset']  = (arg_index+1, offset_dtype)
+                        args_mapping[prefix+'_offset'] = (arg_index+1, offset_dtype)
                         arg_index += 2
-            for (i,dsout) in enumerate(scalars_out):
+            for (i, dsout) in enumerate(scalars_out):
                 for j in xrange(dsout.nb_components):
-                    prefix = 'S{}_{}_out'.format(i,j)
-                    args_mapping[prefix+'_base']   = (arg_index, cl.MemoryObjectHolder)
+                    prefix = 'S{}_{}_out'.format(i, j)
+                    args_mapping[prefix+'_base'] = (arg_index, cl.MemoryObjectHolder)
                     arg_index += 1
                     if not hardcode_arrays:
                         args_mapping[prefix+'_strides'] = (arg_index+0, strides_dtype)
-                        args_mapping[prefix+'_offset']  = (arg_index+1, offset_dtype)
+                        args_mapping[prefix+'_offset'] = (arg_index+1, offset_dtype)
                         arg_index += 2
-        assert len(args_mapping)==arg_index
+        assert len(args_mapping) == arg_index
         assert arg_index == (1+2*(1-hardcode_arrays))*(1+(2-is_inplace)*nscalars)
 
         return args_mapping
 
-
-    def compute_parameters(self, extra_kwds): 
+    def compute_parameters(self, extra_kwds):
         """Register extra parameters to optimize."""
         check_instance(extra_kwds, dict, keys=str)
 
-        ftype         = extra_kwds['ftype']
-        work_dim      = extra_kwds['work_dim']
-        precision     = extra_kwds['precision']
+        ftype = extra_kwds['ftype']
+        work_dim = extra_kwds['work_dim']
+        precision = extra_kwds['precision']
         force_atomics = extra_kwds['force_atomics']
 
-        nparticles_options  = [1,2,4,8,16]
+        nparticles_options = [1, 2, 4, 8, 16]
         use_atomics_options = [True] if force_atomics else [True, False]
         if True in use_atomics_options:
             cl_env = self.cl_env
-            msg=None
+            msg = None
             if ftype == 'float':
                 if not cl_env.has_extension('cl_khr_local_int32_base_atomics'):
-                    msg='OpenCL device {} does not support int32 atomics '
-                    msg+='(cl_khr_local_int32_base_atomics).'
-                    msg=msg.format(cl_env.device.name)
+                    msg = 'OpenCL device {} does not support int32 atomics '
+                    msg += '(cl_khr_local_int32_base_atomics).'
+                    msg = msg.format(cl_env.device.name)
             elif ftype == 'double':
                 if not cl_env.has_extension('cl_khr_int64_base_atomics'):
-                    msg='OpenCL device {} does not support int64 atomics '
-                    msg+='(cl_khr_int64_base_atomics).'
-                    msg=msg.format(cl_env.device.name)
+                    msg = 'OpenCL device {} does not support int64 atomics '
+                    msg += '(cl_khr_int64_base_atomics).'
+                    msg = msg.format(cl_env.device.name)
             else:
                 msg = 'Atomic remeshing kernel has not been implemented for '
-                msg+= '{} yet.'.format(precision)
+                msg += '{} yet.'.format(precision)
 
             if msg:
                 if force_atomics:
@@ -297,113 +297,112 @@ class OpenClAutotunableDirectionalRemeshKernel(OpenClAutotunableKernel):
                     msg += '\nAtomic version of the remeshing kernel will be disabled.'
                     warnings.warn(msg, CodeGeneratorWarning)
                     use_atomics_options.remove(True)
-                    
+
         autotuner_flag = self.autotuner_config.autotuner_flag
         if (autotuner_flag == AutotunerFlags.ESTIMATE):
             if True in use_atomics_options:
                 use_atomics_options.pop(False)
-            max_workitem_workload = [1,1,1]
+            max_workitem_workload = [1, 1, 1]
         elif (autotuner_flag == AutotunerFlags.MEASURE):
-            max_workitem_workload = [1,8,1]
+            max_workitem_workload = [1, 8, 1]
         elif (autotuner_flag == AutotunerFlags.PATIENT):
-            max_workitem_workload = [1,8,8]
+            max_workitem_workload = [1, 8, 8]
         elif (autotuner_flag == AutotunerFlags.EXHAUSTIVE):
-            max_workitem_workload = [1,16,16]
+            max_workitem_workload = [1, 16, 16]
 
         max_workitem_workload = npw.asarray(max_workitem_workload[:work_dim])
         extra_kwds['max_work_load'] = max_workitem_workload
-        
+
         params = super(OpenClAutotunableDirectionalRemeshKernel, self).compute_parameters(
-                        extra_kwds=extra_kwds)
+            extra_kwds=extra_kwds)
         params.register_extra_parameter('use_atomics', use_atomics_options)
         params.register_extra_parameter('nparticles', nparticles_options)
 
         return params
-               
+
     def compute_min_max_wg_size(self, work_bounds, work_load, global_work_size,
-            extra_parameters, extra_kwds):
+                                extra_parameters, extra_kwds):
         """Default min and max workgroup size."""
         cache_ghosts = extra_kwds['min_s_ghosts']
 
-        min_wg_size = npw.ones(shape=work_bounds.work_dim, dtype=npw.int32) 
+        min_wg_size = npw.ones(shape=work_bounds.work_dim, dtype=npw.int32)
         min_wg_size[0] = max(min_wg_size[0], 2*cache_ghosts)
         max_wg_size = npw.ones(shape=work_bounds.work_dim, dtype=npw.int32)
         max_wg_size[0] = max(global_work_size[0], min_wg_size[0])
         return (min_wg_size, max_wg_size)
-    
+
     def compute_global_work_size(self, local_work_size, work, extra_parameters, extra_kwds):
         gs = super(OpenClAutotunableDirectionalRemeshKernel, self).compute_global_work_size(
-                local_work_size=local_work_size, work=work, 
-                extra_parameters=extra_parameters, extra_kwds=extra_kwds)
+            local_work_size=local_work_size, work=work,
+            extra_parameters=extra_parameters, extra_kwds=extra_kwds)
         gs[0] = local_work_size[0]
         return gs
 
     def generate_kernel_src(self, global_work_size, local_work_size,
-            extra_parameters, extra_kwds, tuning_mode, dry_run):
+                            extra_parameters, extra_kwds, tuning_mode, dry_run):
         """Generate kernel name and source code"""
 
-        ## Extract usefull variables
-        ftype               = extra_kwds['ftype']
-        work_dim            = extra_kwds['work_dim']
-        nscalars            = extra_kwds['nscalars']
-        known_args          = extra_kwds['known_args']
-        is_inplace          = extra_kwds['is_inplace']
-        scalar_cfl          = extra_kwds['scalar_cfl']
-        remesh_kernel       = extra_kwds['remesh_kernel']
-        group_scalars       = extra_kwds['group_scalars']
-        mesh_info_vars      = extra_kwds['mesh_info_vars']
-        min_nparticles      = extra_kwds['min_nparticles']
+        # Extract usefull variables
+        ftype = extra_kwds['ftype']
+        work_dim = extra_kwds['work_dim']
+        nscalars = extra_kwds['nscalars']
+        known_args = extra_kwds['known_args']
+        is_inplace = extra_kwds['is_inplace']
+        scalar_cfl = extra_kwds['scalar_cfl']
+        remesh_kernel = extra_kwds['remesh_kernel']
+        group_scalars = extra_kwds['group_scalars']
+        mesh_info_vars = extra_kwds['mesh_info_vars']
+        min_nparticles = extra_kwds['min_nparticles']
         remesh_criteria_eps = extra_kwds['remesh_criteria_eps']
 
-        ## Extract and check parameters
-        nparticles  = extra_parameters['nparticles']
+        # Extract and check parameters
+        nparticles = extra_parameters['nparticles']
         use_atomics = extra_parameters['use_atomics']
 
         if (not use_atomics) and (nparticles < min_nparticles):
-            msg='Insufficient number of particles, min={}, got {}.'
-            msg=msg.format(min_nparticles, nparticles)
+            msg = 'Insufficient number of particles, min={}, got {}.'
+            msg = msg.format(min_nparticles, nparticles)
             raise KernelGenerationError(msg)
 
-        ## Get compile time OpenCL known variables
+        # Get compile time OpenCL known variables
         known_vars = super(OpenClAutotunableDirectionalRemeshKernel, self).generate_kernel_src(
-                global_work_size=global_work_size, 
-                local_work_size=local_work_size, 
-                extra_parameters=extra_parameters, 
-                extra_kwds=extra_kwds, tuning_mode=tuning_mode, dry_run=dry_run)
+            global_work_size=global_work_size,
+            local_work_size=local_work_size,
+            extra_parameters=extra_parameters,
+            extra_kwds=extra_kwds, tuning_mode=tuning_mode, dry_run=dry_run)
         known_vars.update(mesh_info_vars)
         known_vars.update(known_args)
-                        
+
         # disable periodic-periodic because we exchange ghosts anyways
         sboundaries = (BoundaryCondition.NONE, BoundaryCondition.NONE,)
-        
-        ## Generate OpenCL source code
-        codegen = DirectionalRemeshKernelGenerator(typegen=self.typegen, 
-                           work_dim=work_dim, ftype=ftype, 
-                           nparticles=nparticles, nscalars=nscalars, 
-                           sboundary=sboundaries, is_inplace=is_inplace, 
-                           scalar_cfl=scalar_cfl, remesh_kernel=remesh_kernel,
-                           group_scalars=group_scalars,
-                           remesh_criteria_eps=remesh_criteria_eps,
-                           use_atomics = use_atomics,
-                           symbolic_mode = self.symbolic_mode,
-                           tuning_mode = tuning_mode,
-                           debug_mode = False, 
-                           known_vars = known_vars)
-        
+
+        # Generate OpenCL source code
+        codegen = DirectionalRemeshKernelGenerator(typegen=self.typegen,
+                                                   work_dim=work_dim, ftype=ftype,
+                                                   nparticles=nparticles, nscalars=nscalars,
+                                                   sboundary=sboundaries, is_inplace=is_inplace,
+                                                   scalar_cfl=scalar_cfl, remesh_kernel=remesh_kernel,
+                                                   group_scalars=group_scalars,
+                                                   remesh_criteria_eps=remesh_criteria_eps,
+                                                   use_atomics=use_atomics,
+                                                   symbolic_mode=self.symbolic_mode,
+                                                   tuning_mode=tuning_mode,
+                                                   debug_mode=False,
+                                                   known_vars=known_vars)
+
         kernel_name = codegen.name
-        kernel_src  = str(codegen)
+        kernel_src = str(codegen)
 
-        ## Check if cache would fit
+        # Check if cache would fit
         if (local_work_size is not None):
             self.check_cache(codegen.required_workgroup_cache_size(local_work_size)[2])
 
         return (kernel_name, kernel_src)
-    
+
     def hash_extra_kwds(self, extra_kwds):
         """Hash extra_kwds dictionnary for caching purposes."""
-        kwds = ('remesh_criteria_eps', 'nscalars', 'ftype', 
+        kwds = ('remesh_criteria_eps', 'nscalars', 'ftype',
                 'is_inplace', 'remesh_kernel', 'work_size',
                 'known_args')
-        return self.custom_hash(*tuple(extra_kwds[kwd] for kwd in kwds), 
-                mesh_info_vars=extra_kwds['mesh_info_vars'])
-
+        return self.custom_hash(*tuple(extra_kwds[kwd] for kwd in kwds),
+                                mesh_info_vars=extra_kwds['mesh_info_vars'])
diff --git a/hysop/numerics/remesh/remesh.py b/hysop/numerics/remesh/remesh.py
index bf2c38bd199f757eb93eb1be004ff448814a2da7..c830e0d1dbce0a9c7e844a4b25693e1987a28a3e 100644
--- a/hysop/numerics/remesh/remesh.py
+++ b/hysop/numerics/remesh/remesh.py
@@ -1,87 +1,101 @@
 from hysop.constants import __VERBOSE__, __DEBUG__
-from hysop.tools.enum  import EnumFactory
+from hysop.tools.enum import EnumFactory
 from hysop.tools.types import check_instance
 from hysop.numerics.remesh.kernel_generator import Kernel, SymmetricKernelGenerator
 
-Remesh = EnumFactory.create('Remesh',
-    ['L1_0', 'L2_1','L2_2','L4_2','L4_4','L6_4','L6_6','L8_4',        # lambda remesh kernels
-     'L2_1s','L2_2s','L4_2s','L4_4s','L6_4s','L6_6s','L8_4s', # splitted lambda remesh kernels
-     'Mp4', 'Mp6', 'Mp8', # Mprimes kernels: Mp4 = M'4 = L2_1 and Mp6 = M'6 = L4_2
+Remesh = EnumFactory.create(
+    'Remesh',
+    ['L1_0', 'L2_1', 'L2_2', 'L4_2', 'L4_4', 'L6_4', 'L6_6', 'L8_4',        # lambda remesh kernels
+     'L2_1s', 'L2_2s', 'L4_2s', 'L4_4s', 'L6_4s', 'L6_6s', 'L8_4s',  # splitted lambda remesh kernels
+     'Mp4', 'Mp6', 'Mp8',  # Mprimes kernels: Mp4 = M'4 = L2_1 and Mp6 = M'6 = L4_2
+     'M4', 'M8',  # M kernels
      'O2', 'O4',          # Corrected kernels, allow a large CFL number
      'L2',                # Corrected and limited lambda 2
      ])
 
+
 class RemeshKernelGenerator(SymmetricKernelGenerator):
-    def configure(self,n):
-        return super(RemeshKernelGenerator,self).configure(n=n, H=None)
+    def configure(self, n):
+        return super(RemeshKernelGenerator, self).configure(n=n, H=None)
+
 
 class RemeshKernel(Kernel):
 
     def __init__(self, moments, regularity,
-            verbose = __DEBUG__,
-            split_polys=False,
-            override_cache=False):
+                 verbose=__DEBUG__,
+                 split_polys=False,
+                 override_cache=False):
 
         generator = RemeshKernelGenerator(verbose=verbose)
         generator.configure(n=moments)
 
         kargs = generator.solve(r=regularity, override_cache=override_cache,
-                        split_polys=split_polys, no_wrap=True)
+                                split_polys=split_polys, no_wrap=True)
 
-        super(RemeshKernel,self).__init__(**kargs)
+        super(RemeshKernel, self).__init__(**kargs)
 
     @staticmethod
     def from_enum(remesh):
         check_instance(remesh, Remesh)
         remesh = str(remesh)
-        assert remesh[0]=='L' and (remesh!='L2'), \
-                'Only lambda remesh kernels are supported.'
-        remesh=remesh[1:]
-        if remesh[-1] == 's':
-            remesh = remesh[:-1]
-            split_polys=True
+        assert remesh[0] == 'L' and (remesh != 'L2') or (remesh in ('M4', 'M8')), \
+            'Only lambda remesh kernels are supported.'
+        if remesh in ('M4', 'M8'):
+            # given M4 or M8 kernels
+            from hysop.deps import sm
+            x = sm.abc.x
+            if remesh == 'M4':
+                M4 = (sm.Poly((1/sm.Rational(6))*((2-x)**3-4*(1-x)**3), x),
+                      sm.Poly((1/sm.Rational(6))*((2-x)**3), x))
+                return Kernel(n=2, r=4, deg=3, Ms=2, Mh=None, H=None, remesh=True,
+                              P=(M4[1].subs(x, -x), M4[0].subs(x, -x), M4[0], M4[1]))
         else:
-            split_polys=False
-        remesh = [int(x) for x in remesh.split('_')]
-        assert len(remesh) == 2
-        assert remesh[0] >= 1
-        assert remesh[1] >= 0
-        return RemeshKernel(remesh[0], remesh[1], split_polys=split_polys)
+            remesh = remesh[1:]
+            if remesh[-1] == 's':
+                remesh = remesh[:-1]
+                split_polys = True
+            else:
+                split_polys = False
+            remesh = [int(x) for x in remesh.split('_')]
+            assert len(remesh) == 2
+            assert remesh[0] >= 1
+            assert remesh[1] >= 0
+            return RemeshKernel(remesh[0], remesh[1], split_polys=split_polys)
 
     def __str__(self):
         return 'RemeshKernel(n={}, r={}, split={})'.format(self.n, self.r, self.poly_splitted)
 
 
-if __name__=='__main__':
+if __name__ == '__main__':
     import numpy as np
     from matplotlib import pyplot as plt
 
-    for i in xrange(1,5):
+    for i in xrange(1, 5):
         p = 2*i
         kernels = []
-        for r in [1,2,4,8]:
+        for r in [1, 2, 4, 8]:
             try:
-                kernel = RemeshKernel(p,r)
+                kernel = RemeshKernel(p, r)
                 kernels.append(kernel)
             except RuntimeError:
-                print 'Solver failed for p={} and r={}.'.format(p,r)
+                print 'Solver failed for p={} and r={}.'.format(p, r)
 
-        if len(kernels)==0:
+        if len(kernels) == 0:
             continue
         k0 = kernels[0]
 
         fig = plt.figure()
         plt.xlabel(r'$x$')
-        plt.ylabel(r'$\Lambda_{'+'{},{}'.format(p,'r')+'}$')
-        X = np.linspace(-k0.Ms-1,+k0.Ms+1,1000)
-        s = plt.subplot(1,1,1)
-        for i,k in enumerate(kernels):
-            s.plot(X,k(X),label=r'$\Lambda_{'+'{},{}'.format(p,k.r)+'}$')
-        s.plot(k0.I,k0.H,'or')
+        plt.ylabel(r'$\Lambda_{'+'{},{}'.format(p, 'r')+'}$')
+        X = np.linspace(-k0.Ms-1, +k0.Ms+1, 1000)
+        s = plt.subplot(1, 1, 1)
+        for i, k in enumerate(kernels):
+            s.plot(X, k(X), label=r'$\Lambda_{'+'{},{}'.format(p, k.r)+'}$')
+        s.plot(k0.I, k0.H, 'or')
         axe_scaling = 0.10
         ylim = s.get_ylim()
         Ly = ylim[1] - ylim[0]
-        s.set_ylim(ylim[0]-axe_scaling*Ly,ylim[1]+axe_scaling*Ly)
+        s.set_ylim(ylim[0]-axe_scaling*Ly, ylim[1]+axe_scaling*Ly)
         s.legend()
 
         plt.show(block=True)