diff --git a/Examples/mainJM.py b/Examples/mainJM.py
index 8aa4df466bc6721efc411b2db16fc7f3b55b11a5..eb321bd7c924bc6837b0da10fa27cd9f5ead1be2 100644
--- a/Examples/mainJM.py
+++ b/Examples/mainJM.py
@@ -20,7 +20,7 @@ def scalaire(x, y, z):
 
 def run():
     # Parameters
-    timeStep = 1.  # 0.02
+    timeStep = 0.02
     finalTime = 1.
     outputFilePrefix = './res/RK2_'
     outputModulo = 0
@@ -31,8 +31,6 @@ def run():
     box = pp.Box(3, length=[1., 1., 1.], origin=[0., 0., 0.])
 
     ## Fields
-    #scal = pp.AnalyticalField(domain=box, formula=scalaire, name='Scalar')
-    #velo = pp.AnalyticalField(domain=box, formula=vitesse, name='Velocity', vector=True)
     scal = pp.ContinuousField(domain=box, name='Scalar')
     velo = pp.ContinuousField(domain=box, name='Velocity', vector=True)
 
@@ -47,7 +45,6 @@ def run():
 
     ## Setting solver to Problem
     pb.setSolver(finalTime, timeStep, 'gpu', src='./mainJM_kernels.cl')
-    #pb.setSolver(finalTime, timeStep, 'gpu')
 
     # ## Setting io to Problem
     pb.setIO(pp.Printer(fields=[scal, velo], frequency=outputModulo, outputPrefix=outputFilePrefix))
@@ -55,10 +52,11 @@ def run():
     t1 = time.time()
 
     ## Solve problem
-    #pb.solve()
+    pb.solve()
 
     tf = time.time()
 
+    print "\n"
     print "Total time : ", tf - t0, "sec (CPU)"
     print "Init time : ", t1 - t0, "sec (CPU)"
     print "Solving time : ", tf - t1, "sec (CPU)"
diff --git a/HySoP/hysop/__init__.py.in b/HySoP/hysop/__init__.py.in
index 14ffc8da560d25f8dd3f8b892a4a793d17a5e57a..435cd62c9bbead836e7b422042c0b4abcb7a6a2f 100755
--- a/HySoP/hysop/__init__.py.in
+++ b/HySoP/hysop/__init__.py.in
@@ -10,6 +10,7 @@ __all__ = ['Box', 'CartesianTopology', 'ScalarField']
 import os
 import site
 import mpi4py.MPI as MPI
+import pyopencl as cl
 
 rank_world = MPI.COMM_WORLD.Get_rank()
 if(rank_world == 0):
diff --git a/HySoP/hysop/constants.py b/HySoP/hysop/constants.py
index ebe7f3745c3894036d95733f2bfd451ddbb034bd..434b9d86b9012fd1c4d4aae2080eac24881d0acf 100644
--- a/HySoP/hysop/constants.py
+++ b/HySoP/hysop/constants.py
@@ -16,6 +16,7 @@ PI = math.pi
 PARMES_REAL = np.float64
 PARMES_INTEGER = np.uint32
 PARMES_REAL_GPU = np.float32
+PARMES_INTEGER_GPU = np.uint32
 ## default array layout (fortran or C convention)
 ORDER = 'F'
 DEBUG = 0
diff --git a/HySoP/hysop/fields/topology.py b/HySoP/hysop/fields/topology.py
index 41448182e61b8e3129139e8852ddcab889a06927..c57cc2b188f81d79e74251a4d78719b565676c45 100644
--- a/HySoP/hysop/fields/topology.py
+++ b/HySoP/hysop/fields/topology.py
@@ -65,7 +65,7 @@ class CartesianTopology(object):
                 localResolution[i] = nbpoints[i] + remainingPoints[i]
         start = np.zeros((domain.dimension), dtype=PARMES_INTEGER)
         start[:self.dim] = self.coords * nbpoints
-        self.mesh = LocalMesh(self.rank, resolution=localResolution, start=start, dom_size=self.domain.length/self.resolution, ghosts=self.ghosts)
+        self.mesh = LocalMesh(self.rank, resolution=localResolution, start=start, dom_size=self.domain.length / self.resolution, ghosts=self.ghosts)
 
     def __str__(self):
         """ Topology info display """
diff --git a/HySoP/hysop/operator/Transport.py b/HySoP/hysop/operator/Transport.py
index 8bf58be176205a5c5f8a4ae44f3cd26a6c4ed3ab..0490eb21b22354dd80e835deca2b45ac5e2de75f 100644
--- a/HySoP/hysop/operator/Transport.py
+++ b/HySoP/hysop/operator/Transport.py
@@ -42,7 +42,7 @@ class Transport(ContinuousOperator):
         """ToString method"""
         s = "Advection operator (ContinuousOperator)"
         if self.discreteOperator is not None:
-            s += "with the following discretization:\n"
+            s += " with the following discretization:\n"
             s += str(self.discreteOperator)
         else:
             s += ". Not discretised"
diff --git a/HySoP/hysop/operator/advection_d.py b/HySoP/hysop/operator/advection_d.py
index 9cb78ff52b1a2b54b212a64c0e438b3e3c69ff6e..6f5226e054e234da7687b63016e85f6a20903b7e 100644
--- a/HySoP/hysop/operator/advection_d.py
+++ b/HySoP/hysop/operator/advection_d.py
@@ -3,8 +3,10 @@
 @package operator
 Discrete advection representation
 """
+from ..constants import *
 from .discrete import DiscreteOperator
 import pyopencl as cl
+import numpy as np
 import time
 
 
@@ -45,12 +47,49 @@ class Advection_P(DiscreteOperator):
         # self.total_bytes_accessed = 0
 
     def apply(self, t, dt, splittingDirection):
-        c_time = time.time()
-        print "Advection operator step, direction :", splittingDirection,
-        c_time = (time.time() - c_time)
-        self.compute_time += c_time
-        if self.numMethod is None:
+        c_time = 0
+        #print "Advection operator step :",
+        if self.numMethod is not None:
+            dim = self.scalar.topology.dim
+            # Compute OpenCL Global Index Space
+            l = list(self.scalar.data.shape)
+            nb_ligne = l.pop(splittingDirection)
+            opencl_gis = tuple(l)
+            # \todo Compute OpenCL Local Index Space
+            l = list(opencl_gis)
+            l[0] = 1
+            opencl_lis = tuple(l)
+            # Compute local domain min coord
+            coord_min = np.ones(4, dtype=PARMES_REAL_GPU)
+            coord_min[0:dim] = self.scalar.topology.mesh.start
+            # Compute local domain length
+            mesh_size = np.ones(4, dtype=PARMES_REAL_GPU)
+            mesh_size[0:dim] = self.scalar.topology.mesh.size
+            # Compute global domain resolution
+            domain_length = np.ones(4, dtype=PARMES_REAL_GPU)
+            domain_length[0:dim] = self.scalar.topology.domain.length
+            # Initialize particle scalar
+            evt_copy = cl.enqueue_copy(self.numMethod.queue, self.res_scalar.gpu_data, self.scalar.gpu_data)
+            # Perform advection
+            evt = self.numMethod.launch(opencl_gis, opencl_lis,
+                                        self.velocity.gpu_data[splittingDirection],
+                                        self.res_position.gpu_data,
+                                        PARMES_REAL_GPU(t),
+                                        PARMES_REAL_GPU(dt),
+                                        PARMES_INTEGER_GPU(splittingDirection),
+                                        PARMES_INTEGER_GPU(nb_ligne),
+                                        coord_min,
+                                        domain_length,
+                                        mesh_size)
+            self.numMethod.queue.finish()
+            c_time = (evt_copy.profile.end - evt_copy.profile.start) * 1e-9
+            c_time += (evt.profile.end - evt.profile.start) * 1e-9
+        else:
+            c_time = time.time()
             print "nothing to do"
+            c_time = (time.time() - c_time)
+        self.total_time += c_time
+        return c_time
             #     """
     #     Apply advection operator.
 
@@ -124,6 +163,10 @@ class Advection_P(DiscreteOperator):
     #         self.total_bytes_accessed += 2 * nb_ligne * np.prod(l) * 4  # 2 floats lus + 1+DIM float écrits
     #         #print "Advection:", self.compute_time * 1e-9
 
+
+    def printComputeTime(self):
+        print "Advection total time : ", self.total_time
+
     def __str__(self):
         s = "Advection_P (DiscreteOperator). " + DiscreteOperator.__str__(self)
         return s
diff --git a/HySoP/hysop/operator/continuous.py b/HySoP/hysop/operator/continuous.py
index a1083989aed3d47dc8ba3f44ee5cee395195e2db..0375e3ec60f3f6cbcebc64273666608c55bf0f1c 100644
--- a/HySoP/hysop/operator/continuous.py
+++ b/HySoP/hysop/operator/continuous.py
@@ -37,7 +37,19 @@ class ContinuousOperator:
                 self.variables.append(v)
 
     def apply(self, *args):
-        self.discreteOperator.apply(*args)
+        return self.discreteOperator.apply(*args)
+
+    def setMethod(self, method):
+        if self.discreteOperator is not None:
+            self.discreteOperator.setMethod(method)
+        else:
+            raise ValueError("Cannot set mumerical method to non discretized operator")
+
+    def printComputeTime(self):
+        if self.discreteOperator is not None:
+            self.discreteOperator.printComputeTime()
+        else:
+            raise ValueError("Cannot print compute time of a non discretized operator")
 
     @abstractmethod
     def discretize(self, spec=None):
diff --git a/HySoP/hysop/operator/discrete.py b/HySoP/hysop/operator/discrete.py
index 301674518908a166a8484d54c6d0b8b3c1d5668f..600b1833f9a140c707422c26adc08850ee0883ae 100644
--- a/HySoP/hysop/operator/discrete.py
+++ b/HySoP/hysop/operator/discrete.py
@@ -28,7 +28,7 @@ class DiscreteOperator:
         self.gpu_kernel = None
         self.gpu_kernel_name = ""
         self.discreteOperator = self
-        self.compute_time = 0.
+        self.total_time = 0.
 
     def setMethod(self, method):
         """
@@ -49,6 +49,13 @@ class DiscreteOperator:
             if self.variables.count(v) == 0:
                 self.variables.append(v)
 
+    @abstractmethod
+    def printComputeTime(self):
+        """
+        Print total computing time
+        """
+        raise NotImplementedError("Need to override method in a subclass of " + providedClass)
+
     @abstractmethod
     def apply(self):
         """
diff --git a/HySoP/hysop/operator/remeshing.py b/HySoP/hysop/operator/remeshing.py
index c49c0375314e93dc64c6a08438772d9095c6c27c..7ba2e597f80663a4224373e77468490376f531d7 100644
--- a/HySoP/hysop/operator/remeshing.py
+++ b/HySoP/hysop/operator/remeshing.py
@@ -4,6 +4,7 @@
 Operator representation
 """
 from .discrete import DiscreteOperator
+from ..constants import *
 import pyopencl as cl
 import time
 
@@ -32,13 +33,13 @@ class Remeshing(DiscreteOperator):
         ## Particles scalar values.
         self.pscal = partScalar
         ## Result scalar
-        self.resultScalar = resscal
-        self.addVariable([self.ppos, self.pscal, self.resultScalar])
+        self.res_scalar = resscal
+        self.addVariable([self.ppos, self.pscal, self.res_scalar])
         self.numMethod = method
         ## input fields
         self.input = [self.ppos, self.pscal]
         ## output fields
-        self.output = [self.resultScalar]
+        self.output = [self.res_scalar]
         self.needSplitting = True
         # self.gpu_kernel_name = "remeshing"
         # self.compute_time = [0., 0., 0.]
@@ -51,12 +52,43 @@ class Remeshing(DiscreteOperator):
         """
         Apply Remeshing operator.
         """
-        c_time = time.time()
-        print "Remeshing operator step, direction :", splittingDirection,
-        c_time = (time.time() - c_time)
-        self.compute_time += c_time
-        if self.numMethod is None:
+        c_time = 0
+        #print "Remeshing operator step :",
+        if self.numMethod is not None:
+            dim = self.res_scalar.topology.dim
+            # Compute OpenCL Global Index Space
+            l = list(self.res_scalar.data.shape)
+            nb_ligne = l.pop(splittingDirection)
+            opencl_gis = tuple(l)
+            # \todo Compute OpenCL Local Index Space
+            l = list(opencl_gis)
+            l[0] = 1
+            opencl_lis = tuple(l)
+            # Compute local domain min coord
+            coord_min = np.ones(4, dtype=PARMES_REAL_GPU)
+            coord_min[0:dim] = self.res_scalar.topology.mesh.start
+            # Compute local domain length
+            mesh_size = np.ones(4, dtype=PARMES_REAL_GPU)
+            mesh_size[0:dim] = self.res_scalar.topology.mesh.size
+            # Perform remeshing
+            evt = self.numMethod.launch(opencl_gis, opencl_lis,
+                                        self.ppos.gpu_data,
+                                        self.pscal.gpu_data,
+                                        self.res_scalar.gpu_data,
+                                        PARMES_REAL_GPU(t),
+                                        PARMES_REAL_GPU(dt),
+                                        PARMES_INTEGER_GPU(splittingDirection),
+                                        PARMES_INTEGER_GPU(nb_ligne),
+                                        coord_min,
+                                        mesh_size)
+            self.numMethod.queue.finish()
+            c_time = (evt.profile.end - evt.profile.start) * 1e-9
+        else:
+            c_time = time.time()
             print "nothing to do"
+            c_time = (time.time() - c_time)
+        self.total_time += c_time
+        return c_time
 
         # c_time = time.time()
         # if self.gpu_kernel is None:
@@ -95,8 +127,11 @@ class Remeshing(DiscreteOperator):
         #     self.total_bytes_accessed += ((2 + 1) * nb_ligne) * np.prod(l) * 4  # 2 float lus + 1 float écrits
         #     #print "Remeshing M'6:", self.compute_time * 1e-9
 
+    def printComputeTime(self):
+        print "Remeshing total time : ", self.total_time
+
     def __str__(self):
-        s = "Advection_P (DiscreteOperator). " + DiscreteOperator.__str__(self)
+        s = "Remeshing (DiscreteOperator). " + DiscreteOperator.__str__(self)
         return s
 
 if __name__ == "__main__":
diff --git a/HySoP/hysop/operator/splitting.py b/HySoP/hysop/operator/splitting.py
index 82b6657c1cb03f1dee67f7ce2ba923d5e4a8dbee..0eda955b58e44110889b7831b231b5f6dcfc37e6 100644
--- a/HySoP/hysop/operator/splitting.py
+++ b/HySoP/hysop/operator/splitting.py
@@ -26,19 +26,34 @@ class Splitting(DiscreteOperator):
         [self.splitting.append((i, 0.5)) for i in xrange(dim - 1)]
         self.splitting.append((dim - 1, 1.))
         [self.splitting.append((dim - 2 - i, 0.5)) for i in xrange(dim - 1)]
+        self.compute_time_details = [[0. for i in xrange(dim)] for op in self.operators]
 
     def apply(self, t, dt):
         """
         Apply Remeshing operator.
         """
-        print "Splitting operator step :"
-        c_time = time.time()
+        #print "Splitting operator step :"
+        c_time = 0.
         for split in self.splitting:
-            print "direction : " + str(split[0]) + " dt*" + str(split[1])
+            #print "direction : " + str(split[0]) + " dt*" + str(split[1])
+            op_index = 0
             for op in self.operators:
-                op.apply(t, split[1] * dt, split[0])
-        c_time = (time.time() - c_time)
-        self.compute_time += c_time
+                temp_time = op.apply(t, split[1] * dt, split[0])
+                c_time += temp_time
+                self.compute_time_details[op_index][split[0]] += temp_time
+                op_index += 1
+        self.total_time += c_time
+        return c_time
+
+    def printComputeTime(self):
+        print "Splitting total time : ", self.total_time
+        op_index = 0
+        for op in self.operators:
+            print "  ",
+            op.printComputeTime()
+            for d in xrange(len(self.compute_time_details[op_index])):
+                print "     direction :", d, " : ", self.compute_time_details[op_index][d]
+            op_index += 1
 
     def __str__(self):
         s = "Splitting (DiscreteOperator). Splitting steps : \n"
diff --git a/HySoP/hysop/particular_solvers/basic.py b/HySoP/hysop/particular_solvers/basic.py
index c5783fd23b3d02f118907f37741b4947abe8962e..115389e5ef75e0a5ad3e546480548915254e8fce 100644
--- a/HySoP/hysop/particular_solvers/basic.py
+++ b/HySoP/hysop/particular_solvers/basic.py
@@ -79,15 +79,15 @@ class ParticleSolver(Solver):
             else:
                 op.discretize()
         ## Create remeshing operator
-        remesh = Remeshing(partPositions=self.p_position,
-                           partScalar=self.p_scalar,
-                           resscal=self.advection.scalar,
-                           method=self.RemeshingMethod)
+        self.remesh = Remeshing(partPositions=self.advection.discreteOperator.res_position,
+                                partScalar=self.advection.discreteOperator.res_scalar,
+                                resscal=self.advection.discreteOperator.scalar,
+                                method=self.RemeshingMethod)
         for op in self.problem.operators:
             if op.discreteOperator.needSplitting:
                 if op is self.advection:
                     index = self.problem.operators.index(op)
-                    self.problem.operators[index] = Splitting([op, remesh])
+                    self.problem.operators[index] = Splitting([op, self.remesh])
                 else:
                     self.problem.operators[index] = Splitting([op])
 
diff --git a/HySoP/hysop/particular_solvers/gpu.py b/HySoP/hysop/particular_solvers/gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9a98bd3c766e20a1c89ef68bb091b435324123f
--- /dev/null
+++ b/HySoP/hysop/particular_solvers/gpu.py
@@ -0,0 +1,422 @@
+# -*- coding: utf-8 -*-
+"""
+@package Utils
+GPU Particular solver description.
+"""
+from ..constants import *
+from basic import ParticleSolver
+from ..fields.continuous import ContinuousField
+from ..operator.Transport import Transport
+from ..operator.remeshing import Remeshing
+from ..operator.splitting import Splitting
+from ..tools.timer import Timer
+from ..tools.printer import Printer
+import pyopencl as cl
+from ..tools.gpu_data_transfer import hostToDevice
+
+
+class GPUParticleSolver(ParticleSolver):
+    """
+    GPU Particular solver description.
+    Link with differents numericals methods used. Prepare GPU side (memory, kernels, ...)
+    """
+    def __init__(self, problem, t_end, dt,
+                 ODESolver=None,
+                 InterpolationMethod=None,
+                 RemeshingMethod=None,
+                 splittingMethod='strang',
+                 timer=None,
+                 io=None,
+                 platform_id=0, device_id=0,
+                 device_type='gpu',
+                 src=None):
+        """
+        Constructor.
+        Create a solver description.
+        """
+        ParticleSolver.__init__(self, problem, t_end, dt)
+        self.user_gpu_src = src
+        print "=== Discover OpenCL environment ==="
+        #Get platform.
+        try:
+            self.platform = cl.get_platforms()[platform_id]
+        except IndexError:
+            print "  Incorrect platform_id :", platform_id, ".",
+            print " Only ", len(cl.get_platforms()), " available.",
+            print " Getting defalut platform. "
+            self.platform = cl.get_platforms()[0]
+        print "  Platform   "
+        print "  - Name       :", self.platform.name
+        print "  - Version    :", self.platform.version
+        #Get device.
+        self.device = self.platform.get_devices(eval("cl.device_type." + device_type.upper()))[device_id]
+        print "  Device"
+        print "  - Name                :", self.device.name
+        print "  - Type                :", cl.device_type.to_string(self.device.type)
+        print "  - C Version           :", self.device.opencl_c_version
+        print "  - Global mem size     :", self.device.global_mem_size / (1024 ** 3), "GB"
+        print "===\n"
+        #Creates GPU Context
+        self.ctx = cl.Context([self.device])
+        #Create CommandQueue on the GPU Context
+        self.queue = cl.CommandQueue(self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE)
+
+    def initialize(self):
+        """
+        Solver initialisation for a given DiscreteProblem.DiscreteProblem.
+
+        @param discreteProblem : Problem to initialize.
+        """
+        ParticleSolver.initialize(self)
+        ## kernels compilation
+        print "=== Kernel sources compiling ==="
+        print "Sources files (default): ", GPU_SRC
+        f = open(GPU_SRC, 'r')
+        self.gpu_src = "".join(f.readlines())
+        f.close()
+        if self.user_gpu_src is not None:
+            print "Sources files (user): ", self.user_gpu_src
+            f = open(self.user_gpu_src, 'r')
+            self.gpu_src += "".join(f.readlines())
+            f.close()
+        build_options = "-cl-single-precision-constant -cl-opt-disable"
+        build_options += " -D DIM=" + str(self.problem.topology.dim)
+        build_options += " -D MAX_LINE_POINTS=" + str(np.max(self.problem.topology.mesh.resolution))
+        self.prg = cl.Program(self.ctx, self.gpu_src).build(build_options)
+        print self.prg.get_build_info(self.device, cl.program_build_info.LOG)
+        print self.prg.get_build_info(self.device, cl.program_build_info.OPTIONS)
+        print self.prg.get_build_info(self.device, cl.program_build_info.STATUS)
+        print "===\n"
+        ## Convert fields to simple preceision floats (PARMES_REAL_GPU)
+        for f in self.problem.variables:
+            for df in f.discreteField:
+                if f.vector:
+                    for d in xrange(len(df.data)):
+                        df.data[d] = df.data[d].astype(PARMES_REAL_GPU)
+                else:
+                    df.data = df.data.astype(PARMES_REAL_GPU)
+        ## Create OpenCL Buffers from fields
+        print "=== OpenCL Buffer allocations ==="
+        total_mem_used = 0
+        for f in self.problem.variables:
+            print f.name, "allocate ",
+            temp_mem_used = 0
+            for df in f.discreteField:
+                if f.vector:
+                    for d in xrange(len(df.data)):
+                        df.gpu_data[d] = cl.Buffer(self.ctx,
+                                                   cl.mem_flags.READ_WRITE,
+                                                   size=df.data[d].nbytes)
+                        temp_mem_used += df.data[d].nbytes
+                else:
+                    df.gpu_data = cl.Buffer(self.ctx,
+                                            cl.mem_flags.READ_WRITE,
+                                            size=df.data.nbytes)
+                    temp_mem_used += df.data.nbytes
+            total_mem_used += temp_mem_used
+            print temp_mem_used, "Bytes (", temp_mem_used / (1024 ** 2), "MB)"
+        print "Total Global Memory used : ", total_mem_used, "Bytes (", total_mem_used / (1024 ** 2), "MB)",
+        print "({0:.3f} %)".format(100 * total_mem_used / (self.device.global_mem_size * 1.))
+        print "===\n"
+        print "=== OpenCL Buffer initialisation ==="
+        data, transfer_time, compute_time = 0, 0., 0.
+        for f in self.problem.variables:
+            initKernel = None
+            # Looking for initKernel
+            for k in self.prg.all_kernels():
+                if k.get_info(cl.kernel_info.FUNCTION_NAME) == ('init' + f.name):
+                    initKernel = cl.Kernel(self.prg, 'init' + f.name)
+            if initKernel is not None:
+                for df in f.discreteField:
+                    print f.name, ": kernel init ... ",
+                    global_wg = tuple(df.resolution.tolist())
+                    if global_wg[0] / 64 > 0:
+                        local_wg = (64, 1, 1)
+                    else:
+                        local_wg = None
+                    dim = df.topology.dim
+                    coord_min = np.ones(4, dtype=PARMES_REAL_GPU)
+                    mesh_size = np.ones(4, dtype=PARMES_REAL_GPU)
+                    coord_min[0:dim] = df.topology.mesh.start
+                    mesh_size[0:dim] = df.topology.mesh.size
+                    if f.vector:
+                        if dim == 3:
+                            evt = initKernel(self.queue,
+                                             global_wg,
+                                             local_wg,
+                                             df.gpu_data[0], df.gpu_data[1], df.gpu_data[2],
+                                             coord_min,
+                                             mesh_size)
+                        else:
+                            evt = initKernel(self.queue,
+                                             global_wg,
+                                             local_wg,
+                                             df.gpu_data[0], df.gpu_data[1],
+                                             coord_min,
+                                             mesh_size)
+                    else:
+                        evt = initKernel(self.queue,
+                                         global_wg,
+                                         local_wg,
+                                         df.gpu_data,
+                                         coord_min,
+                                         mesh_size)
+                    self.queue.finish()
+                    temp_time = (evt.profile.end - evt.profile.start) * 1e-9
+                    print "Done in ", temp_time, "sec"
+                    compute_time += temp_time
+            else:
+                for df in f.discreteField:
+                    if df.contains_data:
+                        print f.name, ":",
+                        temp_data, temp_time = hostToDevice(self.queue, df)
+                        data += temp_data
+                        transfer_time += temp_time
+        if data > 0:
+            print "Total Transfers : ", data, "Bytes transfered at {0:.3f} GBytes/sec".format((data * 1e-9) / transfer_time)
+        if compute_time > 0.:
+            print "Total Computing  : ", compute_time, "sec"
+        ## Setting advection and remeshing kernels:
+        self.advection.setMethod(KernelLauncher(self.prg.advection, self.queue))
+        self.remesh.setMethod(KernelLauncher(self.prg.remeshing, self.queue))
+
+        # total_data, total_time = 0, 0.
+        # for f in self.problem.variables:
+        #     initKernel = None
+        #     for k in self.prg.all_kernels():
+        #         if k.get_info(cl.kernel_info.FUNCTION_NAME) == ('init'+f.name):
+        #             initKernel = k
+        #     if initKernel is not None:
+        #         print "Initialisation kernel found", initKernel
+        #     else:
+        #         for df in f.discreteField:
+        #             if df.contains_data:
+        #                 print f.name, ":",
+        #                 temp_data, temp_time = hostToDevice(self.queue, df)
+        #         total_data += temp_data
+        #         total_time += temp_time
+        # if total_data > 0:
+        #     print "Total : ", total_data, "Bytes transfered at {0:.3f} GBytes/sec".format((total_data * 1e-9) / total_time)
+        # self.queue.finish()
+        print "===\n"
+        ##\todo brancher les objets opencl sur les opérateurs et les fields
+
+    def __str__(self):
+        """ToString method"""
+        s = " Particular solver "
+        return s
+
+if __name__ == "__main__":
+    print __doc__
+    print "- Provided class : ParticularSolver"
+    print ParticularSolver.__doc__
+
+
+#     def parameters_initialization(self):
+#         # Modifying domains parameters to fit with float4 or int4 opencl objects
+#         self.gpu_shape = tuple(self.grid.elementNumber)
+#         padding_float = np.zeros(4 - self.grid.dimension, dtype=dtype_real_gpu)
+#         padding_uint = np.zeros(4 - self.grid.dimension, dtype=dtype_integer)
+#         self.grid.min = np.concatenate((self.grid.min, padding_float)).astype(dtype_real_gpu)
+#         self.grid.max = np.concatenate((self.grid.max, padding_float)).astype(dtype_real_gpu)
+#         self.grid.length = np.concatenate((self.grid.length, padding_float)).astype(dtype_real_gpu)
+#         self.grid.elementSize = np.concatenate((self.grid.elementSize, padding_float)).astype(dtype_real_gpu)
+#         self.grid.elementNumber = np.concatenate((self.grid.elementNumber, padding_uint + 1)).astype(dtype_integer)
+#         self.ppos.values.resize(self.gpu_shape, refcheck=False)
+
+#     def buffer_allocations(self):
+#         self.gpu_used_mem = 0
+#         for v in self.discreteProblem.variables:
+#             v.values = np.asarray(v.values, dtype=dtype_real_gpu)
+#             v.gpu_mem_object = cl.Buffer(self.ctx,
+#                                          cl.mem_flags.READ_WRITE,
+#                                          size=v.values.nbytes
+#                 )
+#             self.gpu_used_mem += v.gpu_mem_object.size
+#             print "Allocation " + v.name + " on gpu, size:", v.gpu_mem_object.size * 1e-6, 'MBytes'
+
+#         if debug > 0:
+#             debug_size = 0
+#             self.discreteProblem.advecOp.debug_integer = np.zeros(debug, dtype=dtype_integer)
+#             self.discreteProblem.advecOp.debug_integer_buffer = cl.Buffer(
+#                 self.ctx, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,
+#                 hostbuf=self.discreteProblem.advecOp.debug_integer)
+#             debug_size += self.discreteProblem.advecOp.debug_integer_buffer.size
+#             self.discreteProblem.advecOp.debug_float = np.zeros(debug, dtype=dtype_real_gpu)
+#             self.discreteProblem.advecOp.debug_float_buffer = cl.Buffer(
+#                 self.ctx, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR,
+#                 hostbuf=self.discreteProblem.advecOp.debug_float)
+#             debug_size += self.discreteProblem.advecOp.debug_float_buffer.size
+#             self.gpu_used_mem += debug_size
+#             print "Allocation debug buffer on gpu, size:", debug_size, 'B'
+
+#     def initialize(self):
+#         """
+#         Solver initialisation for a given DiscreteProblem.DiscreteProblem.
+
+#         @param discreteProblem : Problem to initialize.
+#         """
+#         # OpenCL Buffer allocations
+#         print "\n===    Device memory allocations    ==="
+#         self.buffer_allocations()
+#         p = self.gpu_used_mem * 100 / (self.device.global_mem_size * 1.)
+#         print "Total memory allocated on gpu :", self.gpu_used_mem * 1e-6, "MBytes (", p, " % of total available memory)"
+
+#         # Kernels creation/compilation
+#         print "\n===    Kernel sources compiling    ==="
+#         f = open(os.path.join(os.path.split(os.path.abspath(__file__))[0], "gpu_src.cl"), 'r')
+#         self.gpu_src = "".join(f.readlines())
+#         f.close()
+#         if not self.user_src is None:
+#             f = open(self.user_src, 'r')
+#             self.gpu_src += "".join(f.readlines())
+#             f.close()
+#         build_options = "-cl-single-precision-constant -cl-opt-disable"
+#         build_options += " -D DIM=" + str(self.dim)
+#         build_options += " -D DEBUG=" + str(debug)
+#         build_options += " -D MAX_LINE_POINTS=" + str(np.max(self.grid.elementNumber))
+#         self.prg = cl.Program(self.ctx, self.gpu_src).build(build_options)
+#         print self.prg.get_build_info(self.device, cl.program_build_info.LOG)
+#         print self.prg.get_build_info(self.device, cl.program_build_info.OPTIONS)
+#         print self.prg.get_build_info(self.device, cl.program_build_info.STATUS)
+
+#         for op in self.discreteProblem.operators:
+#             for k in self.prg.all_kernels():
+#                 if op.gpu_kernel_name == k.get_info(cl.kernel_info.FUNCTION_NAME):
+#                     op.gpu_kernel = self.prg
+#                     op.gpu_queue = self.queue
+#                     op.gpu_shape = self.gpu_shape
+
+#         print "\n===    Data initialization    ==="
+#         for v in self.discreteProblem.variables:
+#             v_is_init = False
+#             print v.name,
+#             for i,k in enumerate(self.prg.all_kernels()):
+#                 if ('init' + v.name) == k.get_info(cl.kernel_info.FUNCTION_NAME):
+#                     print 'Initialization Kernel found ',
+#                     evt = eval("self.prg."+"init"+v.name)(self.queue,
+#                                                     self.gpu_shape,
+#                                                     None,
+#                                                     v.gpu_mem_object,
+#                                                     dtype_real_gpu(0.),
+#                                                     v.domain.min.astype(dtype_real_gpu),
+#                                                     v.domain.elementNumber.astype(dtype_integer),
+#                                                     v.domain.elementSize.astype(dtype_real_gpu)
+#                         )
+#                     self.queue.finish()
+#                     print 'compute time :', (evt.profile.end - evt.profile.start) * 1e-9, 's',
+#                     v.contains_data, v_is_init = False, True
+#             if not v_is_init:
+#                 v.init()
+#             print '... Done'
+
+#         print "\n===    Data Transfers host->device    ==="
+#         copy_evts = []
+#         transfered_size = 0
+#         for v in self.discreteProblem.variables:
+#             if v.contains_data:
+#                 print v.name,
+#                 if v.dimension == self.dim:
+#                     print "Memoire arrangée",
+#                     offset = 0
+#                     evt = cl.enqueue_copy(self.queue, v.gpu_mem_object,
+#                                           v.values[..., 0].ravel(),
+#                                           device_offset=np.long(offset))
+#                     copy_evts.append(evt)
+#                     offset += v.values[..., 0].nbytes
+#                     evt = cl.enqueue_copy(self.queue, v.gpu_mem_object,
+#                                           v.values[..., 1].swapaxes(1, 0).ravel(),
+#                                           device_offset=np.long(offset))
+#                     copy_evts.append(evt)
+#                     if self.dim == 3:
+#                         offset += v.values[..., 1].nbytes
+#                         evt = cl.enqueue_copy(self.queue, v.gpu_mem_object,
+#                                           v.values[..., 2].swapaxes(1, 0).swapaxes(2, 0).ravel(),
+#                                           device_offset=np.long(offset))
+#                         copy_evts.append(evt)
+#                 else:
+#                     print "Normal layout",
+#                     evt = cl.enqueue_copy(self.queue, v.gpu_mem_object, v.values)
+#                     copy_evts.append(evt)
+#                 print "Transfering", v.values.nbytes * 1e-6, "MBytes ..."
+#                 transfered_size += v.values.nbytes
+#         self.queue.finish()
+#         if len(copy_evts)>0:
+#             t = 0.
+#             for evt in copy_evts:
+#                 t += (evt.profile.end - evt.profile.start)
+#             print "Transfering host -> device :", t * 1e-9, "s \t", transfered_size / t, "GB/s"
+#         else:
+#             print "No transfers"
+
+#     def collect_data(self):
+#         self.queue.finish()
+#         print "\n===    Data Transfers device->host    ==="
+#         copy_evts = []
+#         transfered_size = 0
+#         for v in self.discreteProblem.variables:
+#             if v.contains_data:
+#                 print v.name,
+#                 if v.dimension == self.dim:
+#                     print "Memoire arrangée",
+#                     temp = np.empty(self.gpu_shape, dtype=dtype_real_gpu)
+#                     offset = 0
+#                     evt = cl.enqueue_copy(self.queue, temp, v.gpu_mem_object, device_offset=np.long(offset))
+#                     copy_evts.append(evt)
+#                     v.values[..., 0] = temp.reshape(self.gpu_shape)
+#                     offset += temp.nbytes
+#                     evt = cl.enqueue_copy(self.queue, temp, v.gpu_mem_object, device_offset=np.long(offset))
+#                     copy_evts.append(evt)
+#                     v.values[..., 1] = temp.reshape(self.gpu_shape).swapaxes(1, 0)
+#                     if self.dim == 3:
+#                         offset += temp.nbytes
+#                         evt = cl.enqueue_copy(self.queue, temp, v.gpu_mem_object, device_offset=np.long(offset))
+#                         copy_evts.append(evt)
+#                         v.values[..., 2] = temp.reshape(self.gpu_shape).swapaxes(2, 0).swapaxes(1, 0)
+#                 else:
+#                     print "Normal layout",
+#                     evt = cl.enqueue_copy(self.queue, v.values, v.gpu_mem_object)
+#                     copy_evts.append(evt)
+#                 print "Transfering", v.values.nbytes * 1e-6, "MBytes ..."
+#                 transfered_size += v.values.nbytes
+#         self.queue.finish()
+#         t = 0.
+#         for evt in copy_evts:
+#             t += (evt.profile.end - evt.profile.start)
+#         print "Transfering device -> host :", t * 1e-9, "s \t", transfered_size / t, "GB/s"
+
+#     def terminate(self):
+#         self.queue.finish()
+
+#     def __str__(self):
+#         """ToString method"""
+#         s = "GPU  Particular solver "
+#         s += "\n   - ODESolver : " + str(self.ODESolver)
+#         s += "\n   - Interpolation : " + str(self.InterpolationMethod)
+#         s += "\n   - Remeshing : " + str(self.RemeshingMethod)
+#         s += "\n   - GPU informations : "
+#         s += "\n       - platform : " + self.platform.name
+#         s += " OpenCL version: " + self.platform.version
+#         s += "\n       - device : " + self.device.name
+#         s += " Global Memory available : " + str(self.device.global_mem_size // 1024 // 1024) + " MB  ( " + str(self.device.global_mem_size) + " B)"
+#         s += "\n                  Space index max : " + str(self.device.max_work_item_sizes)
+#         s += " Max workgroup size : " + str(self.device.max_work_group_size)
+#         return s
+
+# if __name__ == "__main__":
+#     print __doc__
+#     print "- Provided class : GPUParticularSolver"
+#     print GPUParticularSolver.__doc__
+
+class KernelLauncher:
+    """
+    kernel launcher
+    """
+    def __init__(self, kernel, queue):
+        self.kernel = kernel
+        self.queue = queue
+        self.name = "Kernel Launcher for : " + self.kernel.function_name
+
+    def launch(self, *args):
+        evt = self.kernel(self.queue, *args)
+        return evt
diff --git a/HySoP/hysop/particular_solvers/gpu_src.cl b/HySoP/hysop/particular_solvers/gpu_src.cl
index fb7fa0265abb9c288c14f2b2a7fad1b403df7e00..c161a82344e0be8622f5a8ae27b43d9454454363 100644
--- a/HySoP/hysop/particular_solvers/gpu_src.cl
+++ b/HySoP/hysop/particular_solvers/gpu_src.cl
@@ -139,16 +139,13 @@ void advection_init_2_1(__private float4 size, __private float *line_c)
 
 // Integration Kernel (RK2 method)
 __kernel void advection(__global const float* gvelo,
-			__global float* gscal,
 			__global float* ppos,
-			__global float* pscal,
 			__private const float t,
 			__private const float dt,
 			__private const uint dir,
 			__private const uint line_nb_pts,
 			__private const float4 min_v,
 			__private const float4 length_v,
-			__private const uint4 nb,
 			__private const float4 size
 			)
 {
@@ -163,47 +160,63 @@ __kernel void advection(__global const float* gvelo,
   __private float line_c[DIM];  // Particle final position
   __private float gvelo_buffer_loc[MAX_LINE_POINTS]__attribute__((aligned)); // Local buffer to store gvelo for the current line
   __private uint ind_m, stride_m, stride_component;
+  uint nbx, nby, nbz;
   // Compute strides and index. Depend on array order (C order here)
   dx_dir = size[dir];
   if(DIM == 3)
     {
       if (dir == 0)
 	{
-	  ind_m = get_global_id(0)*nb.z+get_global_id(1);
-	  stride_component = (nb.x*nb.y*nb.z)*dir;
-	  stride_m = nb.y*nb.z;
+	  nbx = line_nb_pts;
+	  nby = get_global_size(0);
+	  nbz = get_global_size(1);
+	  ind_m = get_global_id(0)*nbz+get_global_id(1);
+	  stride_component = (nbx*nby*nbz)*dir;
+	  stride_m = nby*nbz;
 	}
       else if (dir == 1)
 	{
-	  ind_m = get_global_id(0)*nb.z+get_global_id(1);
-	  stride_component = (nb.x*nb.y*nb.z)*dir;
-	  stride_m = nb.x*nb.z;
+	  nbx = get_global_size(0);
+	  nby = line_nb_pts;
+	  nbz = get_global_size(1);
+	  ind_m = get_global_id(0)*nbz+get_global_id(1);
+	  stride_component = (nbx*nby*nbz)*dir;
+	  stride_m = nbx*nbz;
 	}
       else
 	{
-	  ind_m = get_global_id(0)*nb.y+get_global_id(1);
-	  stride_component = (nb.x*nb.y*nb.z)*dir;
-	  stride_m = nb.x*nb.y;
+	  nbx = get_global_size(0);
+	  nby = get_global_size(1);
+	  nbz = line_nb_pts;
+	  ind_m = get_global_id(0)*nby+get_global_id(1);
+	  stride_component = (nbx*nby*nbz)*dir;
+	  stride_m = nbx*nby;
 	}
     }
   else
     {
       if (dir == 0)
 	{
+	  nbx = line_nb_pts;
+	  nby = get_global_size(0);
+	  nbz = 1;
 	  ind_m = get_global_id(0);
-	  stride_component = (nb.x*nb.y)*dir;
-	  stride_m = nb.y;
+	  stride_component = (nbx*nby)*dir;
+	  stride_m = nby;
 	}
       else
 	{
+	  nbx = get_global_size(0);
+	  nby = line_nb_pts;
+	  nbz = 1;
 	  ind_m = get_global_id(0);
-	  stride_component = (nb.x*nb.y)*dir;
-	  stride_m = nb.x;
+	  stride_component = (nbx*nby)*dir;
+	  stride_m = nbx;
 	}
     }
   //Get velocity local copy
   for(p=0;p<line_nb_pts;p++)
-    gvelo_buffer_loc[p] = gvelo[ind_m + stride_component + p*stride_m]; //Read 1float
+    gvelo_buffer_loc[p] = gvelo[ind_m + p*stride_m]; //Read 1float
 
   // Advection algorithm along line
   for(p=0;p<line_nb_pts;p++)
@@ -233,11 +246,9 @@ __kernel void remeshing(__global const float* part,
 			__global float* gscal,
 			__private const float t,
 			__private const float dt,
-			__private const int dir,
+			__private const uint dir,
 			__private const uint line_nb_pts,
 			__private const float4 min_v,
-			__private const float4 max_v,
-			__private const uint4 nb,
 			__private const float4 size
 			)
 {
@@ -253,39 +264,59 @@ __kernel void remeshing(__global const float* part,
   __private float weights[6];  // Remeshing weights
   __private float gscal_buffer_loc[MAX_LINE_POINTS]; // Local buffer to store gscal for the current line
   __private uint ind_m, stride_m;
+  uint nbx, nby, nbz;
+  uint4 nb;
   // Compute strides and index. Depend on array order (C order here)
   dx_dir = size[dir];
   if(DIM == 3){
     if (dir == 0)
       {
+	nbx = line_nb_pts;
+	nby = get_global_size(0);
+	nbz = get_global_size(1);
+	nb = (uint4)(nbx, nby ,nbz, 1);
 	kernel_init_3_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal);
-	ind_m = get_global_id(0)*nb.z+get_global_id(1);
-	stride_m = nb.y*nb.z;
+	ind_m = get_global_id(0)*nbz+get_global_id(1);
+	stride_m = nby*nbz;
       }
     else if (dir == 1)
       {
+	nbx = get_global_size(0);
+	nby = line_nb_pts;
+	nbz = get_global_size(1);
+	nb = (uint4)(nbx, nby ,nbz, 1);
 	kernel_init_3_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal);
-	ind_m = get_global_id(0)*nb.z+get_global_id(1);
-	stride_m = nb.x*nb.z;
+	ind_m = get_global_id(0)*nbz+get_global_id(1);
+	stride_m = nbx*nbz;
       }
     else
       {
+	nbx = get_global_size(0);
+	nby = get_global_size(1);
+	nbz = line_nb_pts;
+	nb = (uint4)(nbx, nby ,nbz, 1);
 	kernel_init_3_2(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal);
-	ind_m = get_global_id(0)*nb.y+get_global_id(1);
-	stride_m = nb.x*nb.y;
+	ind_m = get_global_id(0)*nby+get_global_id(1);
+	stride_m = nbx*nby;
       }}
   else{
     if (dir == 0)
       {
+	nbx = line_nb_pts;
+	nby = get_global_size(0);
+	nb = (uint4)(nbx, nby ,nbz, 1);
 	kernel_init_2_0(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal);
 	ind_m = get_global_id(0);
-	stride_m = nb.y;
+	stride_m = nby;
       }
     else
       {
+	nbx = get_global_size(0);
+	nby = line_nb_pts;
+	nb = (uint4)(nbx, nby ,nbz, 1);
 	kernel_init_2_1(nb, &stride_along_dir, &stride_along_dir_scal, &ind_line, &ind_line_scal);
 	ind_m = get_global_id(0);
-	stride_m = nb.x;
+	stride_m = nbx;
       }
   }
 
diff --git a/HySoP/hysop/problem/problem.py b/HySoP/hysop/problem/problem.py
index fb209fcd723bf4c55f24c1e2bffc7d18c6e40faf..433b6744c17964923ca430c5961138100cffe5de 100644
--- a/HySoP/hysop/problem/problem.py
+++ b/HySoP/hysop/problem/problem.py
@@ -46,13 +46,18 @@ class Problem():
         self.solver.initialize()
 
     def solve(self):
-        print"\n\n Start solving ..."
+        print "\n\n Start solving ..."
         while not self.timer.end:
-            print "\n==== Iteration : " + str(self.timer.ite + 1) + "\t t=" + str(self.timer.t + self.timer.dt) + " ===="
+            print "==== Iteration : {0:3d}   t={1:6.2f} ====".format(self.timer.ite + 1, self.timer.t + self.timer.dt)
             for op in self.operators:
                 op.apply(self.timer.t, self.timer.dt)
             self.timer.step()
             self.io.step()
+        print "\n\n End solving\n"
+        print "=== Timings ==="
+        for op in self.operators:
+            op.printComputeTime()
+        print "===\n"
 
     def addVariable(self, cVariable):
         """
diff --git a/HySoP/hysop/tools/gpu_data_transfer.py b/HySoP/hysop/tools/gpu_data_transfer.py
new file mode 100644
index 0000000000000000000000000000000000000000..710bb902abe570498d72d6d38bf76342916834b2
--- /dev/null
+++ b/HySoP/hysop/tools/gpu_data_transfer.py
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+"""
+@package tools
+GPU Memory transfering methods
+"""
+from ..constants import *
+import pyopencl as cl
+
+
+def hostToDevice(queue, discreteField):
+    print "host->device :",
+    data, time = 0, 0.
+    if discreteField.vector:
+        evt = cl.enqueue_copy(queue, discreteField.gpu_data[XDIR],
+                              discreteField.data[XDIR].ravel())
+        data += discreteField.data[XDIR].nbytes
+        time += (evt.profile.end - evt.profile.start) * 1e-9
+        evt = cl.enqueue_copy(queue, discreteField.gpu_data[YDIR],
+                              discreteField.data[YDIR].swapaxes(1, 0).ravel())
+        data += discreteField.data[YDIR].nbytes
+        time += (evt.profile.end - evt.profile.start) * 1e-9
+        if len(discreteField.data) == 3:
+            evt = cl.enqueue_copy(queue, discreteField.gpu_data[ZDIR],
+                                  discreteField.data[ZDIR].swapaxes(1, 0).swapaxes(2, 0).ravel())
+            data += discreteField.data[ZDIR].nbytes
+            time += (evt.profile.end - evt.profile.start) * 1e-9
+    else:
+        evt = cl.enqueue_copy(queue, discreteField.gpu_data, discreteField.data)
+        data += discreteField.data.nbytes
+        time += (evt.profile.end - evt.profile.start) * 1e-9
+    print data, "Bytes transfered at {0:.3f} GBytes/sec".format(data / (time * 1024 ** 3))
+    return data, time
+
+
+def deviceToHost(queue, discreteField):
+    print "device->host :",
+    data, time = 0, 0.
+    # if discreteField.vector:
+    #     evt = cl.enqueue_copy(queue, discreteField.gpu_data[XDIR],
+    #                           discreteField.data[XDIR].ravel())
+    #     data += discreteField.data[XDIR].nbytes
+    #     time += (evt.profile.end - evt.profile.start) * 1e-9
+    #     evt = cl.enqueue_copy(queue, discreteField.gpu_data[YDIR],
+    #                           discreteField.data[YDIR].swapaxes(1, 0).ravel())
+    #     data += discreteField.data[YDIR].nbytes
+    #     time += (evt.profile.end - evt.profile.start) * 1e-9
+    #     if len(discreteField.data) == 3:
+    #         evt = cl.enqueue_copy(queue, discreteField.gpu_data[ZDIR],
+    #                               discreteField.data[ZDIR].swapaxes(1, 0).swapaxes(2, 0).ravel())
+    #         data += discreteField.data[ZDIR].nbytes
+    #         time += (evt.profile.end - evt.profile.start) * 1e-9
+    # else:
+    #     evt = cl.enqueue_copy(queue, discreteField.gpu_data, discreteField.data)
+    #     data += discreteField.data.nbytes
+    #     time += (evt.profile.end - evt.profile.start) * 1e-9
+    print data, "Bytes transfered at {0:.3f} GBytes/sec".format(0)  # (data * 1e-9) / time)
+    return data, time