From 6474c1d616f507923f6fef53c8d0f60a76cf82ac Mon Sep 17 00:00:00 2001
From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@imag.fr>
Date: Wed, 26 Feb 2014 12:41:30 +0000
Subject: [PATCH] Hybrid computing fix (now ok). Sharing MPI communicator
 between python and fortran (convert python to fortran). Fix Scales and FFT
 MPI precision.

---
 HySoP/CMake/FindFFTW.cmake                    | 35 ++++++---
 HySoP/hysop/f2py/fftw2py.f90                  | 12 ++-
 HySoP/hysop/f2py/parameters.f90               |  4 +-
 HySoP/hysop/f2py/scales2py.f90                |  8 +-
 HySoP/hysop/fields/discrete.py                | 17 ++++-
 HySoP/hysop/gpu/gpu_discrete.py               | 18 +++--
 HySoP/hysop/gpu/gpu_particle_advection.py     |  8 --
 HySoP/hysop/mpi/topology.py                   |  1 +
 HySoP/hysop/numerics/update_ghosts.py         |  6 --
 HySoP/hysop/operator/adapt_timestep.py        |  7 +-
 HySoP/hysop/operator/advection.py             | 17 ++++-
 HySoP/hysop/operator/advection_dir.py         | 25 ++++---
 HySoP/hysop/operator/analytic.py              |  7 +-
 HySoP/hysop/operator/baroclinic.py            |  9 ++-
 HySoP/hysop/operator/continuous.py            |  6 +-
 HySoP/hysop/operator/curlAndDiffusion.py      | 14 +++-
 HySoP/hysop/operator/density.py               |  7 +-
 HySoP/hysop/operator/differential.py          | 19 +++--
 HySoP/hysop/operator/diffusion.py             | 18 ++++-
 .../hysop/operator/discrete/adapt_timestep.py |  2 +-
 .../operator/discrete/particle_advection.py   |  7 --
 HySoP/hysop/operator/energy_enstrophy.py      |  7 +-
 .../hysop/operator/monitors/compute_forces.py |  5 +-
 HySoP/hysop/operator/monitors/printer.py      | 21 +++---
 HySoP/hysop/operator/penalization.py          |  7 +-
 HySoP/hysop/operator/poisson.py               | 17 ++++-
 HySoP/hysop/operator/redistribute.py          |  3 +-
 .../hysop/operator/redistribute_intercomm.py  | 33 ++++++---
 HySoP/hysop/operator/reprojection.py          | 19 ++---
 HySoP/hysop/operator/stretching.py            |  8 +-
 HySoP/hysop/operator/velocity_correction.py   |  7 +-
 HySoP/hysop/problem/problem.py                |  4 +-
 HySoP/hysop/problem/problem_tasks.py          | 25 ++++++-
 HySoP/hysop/problem/simulation.py             |  5 +-
 HySoP/src/client_data.f90                     |  6 +-
 HySoP/src/fftw/Diffusion.f90                  | 44 ++++++-----
 HySoP/src/fftw/Poisson.f90                    | 56 ++++++++------
 HySoP/src/fftw/fft2d.f90                      | 24 +++---
 HySoP/src/fftw/fft3d.f90                      | 34 ++++-----
 HySoP/src/main/main.f90                       | 74 +++++++++----------
 .../scalesInterface/layout/cart_topology.f90  | 21 ++++--
 .../src/scalesInterface/particles/advecX.f90  |  4 +-
 .../src/scalesInterface/particles/advecY.f90  |  4 +-
 .../src/scalesInterface/particles/advecZ.f90  |  4 +-
 .../particles/advec_common_interpol.F90       | 10 +--
 .../particles/advec_common_remesh.F90         |  6 +-
 .../particles/advec_correction.f90            | 12 +--
 .../advec_line/advec_common_line.f90          | 16 ++--
 .../particles/interpolation_velo.f90          | 40 +++++-----
 HySoP/src/scalesInterface/precision_tools.f90 | 10 ++-
 50 files changed, 456 insertions(+), 317 deletions(-)

diff --git a/HySoP/CMake/FindFFTW.cmake b/HySoP/CMake/FindFFTW.cmake
index a7a63d867..6c67967d0 100644
--- a/HySoP/CMake/FindFFTW.cmake
+++ b/HySoP/CMake/FindFFTW.cmake
@@ -7,7 +7,7 @@
 #
 # Set fftw_DIR=where_fftw_is_installed if it's not in a "classic" place or if you want a specific version
 #
-# Written by F. Pérignon, nov/2009 
+# Written by F. Pérignon, nov/2009
 # inspired from http://www.cmake.org/Wiki/CMake:How_To_Find_Libraries
 
 include(LibFindMacros)
@@ -19,25 +19,25 @@ libfind_pkg_check_modules(FFTW_PKGCONF fftw3)
 # -- user-defined
 find_path(
   FFTW_INCLUDE_DIR
-  NAMES fftw3.h 
+  NAMES fftw3.h
   #NO_CMAKE_ENVIRONMENT_PATH
   PATHS ${fftw_DIR} PATHS ENV INCLUDE ENV PATH ${FFTW_PKGCONF_INCLUDE_DIRS}
   PATH_SUFFIXES include
   NO_DEFAULT_PATH
   )
-# -- default 
+# -- default
 find_path(FFTW_INCLUDE_DIR NAMES fftw3.h)
 
 # Finally the library itself
 find_library(
   FFTW_LIBRARY
   NAMES fftw3
-  PATHS ${fftw_DIR} ${FFTW_INCLUDE_DIR}/.. ENV LIBRARY_PATH ENV LD_LIBRARY_PATH  ${FFTW_PKGCONF_LIBRARY_DIRS} 
+  PATHS ${fftw_DIR} ${FFTW_INCLUDE_DIR}/.. ENV LIBRARY_PATH ENV LD_LIBRARY_PATH  ${FFTW_PKGCONF_LIBRARY_DIRS}
   PATH_SUFFIXES lib
   NO_DEFAULT_PATH
 )
 
-find_library(FFTW_LIBRARY 
+find_library(FFTW_LIBRARY
   NAMES fftw3
   )
 
@@ -45,27 +45,40 @@ find_library(FFTW_LIBRARY
 find_library(
   FFTW_MPI_LIBRARY
   NAMES fftw3_mpi
-  PATHS ${fftw_DIR} ${FFTW_INCLUDE_DIR}/.. ENV LIBRARY_PATH ENV LD_LIBRARY_PATH  ${FFTW_PKGCONF_LIBRARY_DIRS} 
+  PATHS ${fftw_DIR} ${FFTW_INCLUDE_DIR}/.. ENV LIBRARY_PATH ENV LD_LIBRARY_PATH  ${FFTW_PKGCONF_LIBRARY_DIRS}
   PATH_SUFFIXES lib
   NO_DEFAULT_PATH
 )
 
-find_library(FFTW_MPI_LIBRARY 
+find_library(FFTW_MPI_LIBRARY
   NAMES fftw3_mpi
 )
 
+# Finally the library itself, float version
 find_library(
   FFTWFloat_LIBRARY
   NAMES fftw3f
-  PATHS ${fftw_DIR} ENV LIBRARY_PATH ENV LD_LIBRARY_PATH  ${FFTW_PKGCONF_LIBRARY_DIRS} 
+  PATHS ${fftw_DIR} ENV LIBRARY_PATH ENV LD_LIBRARY_PATH  ${FFTW_PKGCONF_LIBRARY_DIRS}
   PATH_SUFFIXES lib
   NO_DEFAULT_PATH
 )
-find_library(FFTWFloat_LIBRARY 
-  NAMES fftw3f 
+find_library(FFTWFloat_LIBRARY
+  NAMES fftw3f
+  )
+
+# Finally the library itself, float and mpi version
+find_library(
+  FFTWFloat_MPI_LIBRARY
+  NAMES fftw3f_mpi
+  PATHS ${fftw_DIR} ENV LIBRARY_PATH ENV LD_LIBRARY_PATH  ${FFTW_PKGCONF_LIBRARY_DIRS}
+  PATH_SUFFIXES lib
+  NO_DEFAULT_PATH
+)
+find_library(FFTWFloat_MPI_LIBRARY
+  NAMES fftw3f_mpi
   )
 
 
 set(FFTW_PROCESS_INCLUDES FFTW_INCLUDE_DIR)
-set(FFTW_PROCESS_LIBS FFTW_LIBRARY FFTWFloat_LIBRARY FFTW_MPI_LIBRARY)
+set(FFTW_PROCESS_LIBS FFTW_LIBRARY FFTWFloat_LIBRARY FFTW_MPI_LIBRARY FFTWFloat_MPI_LIBRARY)
  libfind_process(FFTW)
diff --git a/HySoP/hysop/f2py/fftw2py.f90 b/HySoP/hysop/f2py/fftw2py.f90
index 6e290e6e5..8645a9f59 100755
--- a/HySoP/hysop/f2py/fftw2py.f90
+++ b/HySoP/hysop/f2py/fftw2py.f90
@@ -4,6 +4,7 @@
 !> Interface to mpi-fftw (fortran) utilities
 module fftw2py
 
+  use client_data
   use parmesparam
   !> 2d case
    use fft2d
@@ -17,20 +18,27 @@ contains
   !> Initialisation of fftw context : create plans and memory buffers
   !! @param[in] resolution global resolution of the discrete domain
   !! @param[in] lengths width of each side of the domain
+  !! @param[in] comm MPI communicator
   !! @param[out] datashape local dimension of the input/output field
   !! @param[out] offset absolute index of the first component of the local field
-  subroutine init_fftw_solver(resolution,lengths,datashape,offset,dim,fftw_type_real)
+  subroutine init_fftw_solver(resolution,lengths,comm,datashape,offset,dim,fftw_type_real)
 
     integer, intent(in) :: dim
     integer, dimension(dim),intent(in) :: resolution
     real(pk),dimension(dim), intent(in) :: lengths
     integer(ik), dimension(dim), intent(out) :: datashape
     integer(ik), dimension(dim), intent(out) :: offset
+    integer, intent(in)                 :: comm
     logical, optional :: fftw_type_real
     !f2py optional :: dim=len(resolution)
     !f2py intent(hide) dim
     !f2py logical optional, intent(in) :: fftw_type_real = 1
 
+    integer :: ierr
+
+    ! Duplicate comm into client_data::main_comm (used later in fft2d and fft3d)
+    call mpi_comm_dup(comm, main_comm, ierr)
+
     if(fftw_type_real) then
        if(dim == 2) then
           print*, "Init fftw/poisson solver for a 2d problem"
@@ -250,7 +258,7 @@ contains
     call filter_curl_3d()
 
     call c2r_3d(omega_x,omega_y,omega_z, ghosts_vort)
-    
+
   end subroutine solve_curl_3d
 
 end module fftw2py
diff --git a/HySoP/hysop/f2py/parameters.f90 b/HySoP/hysop/f2py/parameters.f90
index bb281b27a..b45d02594 100755
--- a/HySoP/hysop/f2py/parameters.f90
+++ b/HySoP/hysop/f2py/parameters.f90
@@ -17,9 +17,9 @@ module parmesparam_sp
 
   implicit none
 
-  ! double precision kind
+  ! single precision kind
   integer, parameter :: pk = 4
   ! integer precision kind
-  integer, parameter :: ik = 4
+  integer, parameter :: ik = 8
 
 end module parmesparam_sp
diff --git a/HySoP/hysop/f2py/scales2py.f90 b/HySoP/hysop/f2py/scales2py.f90
index 62b469c8b..e57436974 100755
--- a/HySoP/hysop/f2py/scales2py.f90
+++ b/HySoP/hysop/f2py/scales2py.f90
@@ -16,13 +16,15 @@ contains
   !! @param[in] ncells number of cells in the global discrete domain
   !! @param[in] lengths width of each side of the domain
   !! @param[in] topodims number of mpi-processus in each dir
+  !! @param[in] mpi communicator from python
   !! @param[out] datashape local dimension of the input/output field
   !! @param[out] offset absolute index of the first component of the local field
-  subroutine init_advection_solver(ncells,lengths,topodims,datashape,offset,dim,order,stab_coeff,dim_split)
+  subroutine init_advection_solver(ncells,lengths,topodims,main_comm,datashape,offset,dim,order,stab_coeff,dim_split)
     integer, intent(in) :: dim
     integer, dimension(dim),intent(in) :: ncells
     real(pk),dimension(dim), intent(in) :: lengths
     integer, dimension(dim), intent(in) :: topodims
+    integer, intent(in)                 :: main_comm
     integer(ik), dimension(dim), intent(out) :: datashape
     integer(ik), dimension(dim), intent(out) :: offset
     character(len=*), optional, intent(in)  ::  order, dim_split
@@ -40,9 +42,9 @@ contains
     ! get current process rank
     !call MPI_COMM_RANK(MPI_COMM_WORLD,rank,error)
     !call MPI_COMM_SIZE(MPI_COMM_WORLD,nbprocs,error)
-
     !groupsize = 5
-    call cart_create(topodims,error)
+
+    call cart_create(topodims,error, main_comm)
     !call set_group_size(groupSize)
     ! Create meshes
     call discretisation_create(ncells(1),ncells(2),ncells(3),lengths(1),lengths(2),lengths(3))
diff --git a/HySoP/hysop/fields/discrete.py b/HySoP/hysop/fields/discrete.py
index da0daad55..60704c3ef 100644
--- a/HySoP/hysop/fields/discrete.py
+++ b/HySoP/hysop/fields/discrete.py
@@ -11,6 +11,7 @@ import numpy as np
 from parmepy.constants import ORDER, PARMES_REAL
 import parmepy.tools.numpywrappers as npw
 import numpy.linalg as la
+from parmepy.numerics.updateGhosts import UpdateGhosts
 
 
 class DiscreteField(object):
@@ -102,7 +103,14 @@ class DiscreteField(object):
         ## The memory space for data ...
         self.data = [npw.zeros(self.resolution)
                      for d in xrange(self.nbComponents)]
- #      assert i == 0, 'Scalar field has only one component.'
+
+        ## Ghost synchronization function
+        self.synchro_ghosts = self._empty_synchro_ghosts
+        self._the_gosts_synchro = None
+        if self.topology.hasGhosts:
+            self.synchro_ghosts = self._synchro_ghosts
+            self._the_gosts_synchro = UpdateGhosts(self.topology,
+                                                   self.nbComponents)
 
     def __getitem__(self, i):
         """ Access to the content of the field.
@@ -118,6 +126,13 @@ class DiscreteField(object):
         """
         self.data[i][...] = value
 
+    def _empty_synchro_ghosts(self):
+        pass
+
+    def _synchro_ghosts(self):
+        """Synchronize ghosts."""
+        self._the_gosts_synchro(self.data)
+
     @debug
     @timed_function
     def initialize(self, formula=None, doVectorize=False,
diff --git a/HySoP/hysop/gpu/gpu_discrete.py b/HySoP/hysop/gpu/gpu_discrete.py
index e126f3a86..a75229a04 100644
--- a/HySoP/hysop/gpu/gpu_discrete.py
+++ b/HySoP/hysop/gpu/gpu_discrete.py
@@ -4,7 +4,7 @@
 Contains class for discrete fields on GPU.
 """
 from parmepy import __VERBOSE__
-from parmepy.constants import ORDER, np, debug, PARMES_REAL, PARMES_INTEGER
+from parmepy.constants import ORDER, np, debug, PARMES_REAL, PARMES_INTEGER, S_DIR
 from parmepy.fields.discrete import DiscreteField
 from parmepy.gpu import cl, clArray, CL_PROFILE
 from parmepy.gpu.gpu_kernel import KernelLauncher, KernelListLauncher
@@ -371,6 +371,10 @@ class GPUDiscreteField(DiscreteField):
         else:
             range_components = [component]
             evt = [None]
+
+        #Ghosts Synchronization before sending
+        self.synchro_ghosts()
+
         self.wait()
         mem_transfered = 0
         for d_id, d in enumerate(range_components):
@@ -383,7 +387,7 @@ class GPUDiscreteField(DiscreteField):
                     layoutDir = 0
                 if __VERBOSE__:
                     print "{"+str(main_rank)+"}", "host->device :", \
-                        self.name, d, layoutDir, 'No Batch'
+                        self.name, S_DIR[d], layoutDir, 'No Batch'
                 self._tmp[...] = self._toLayoutMgrFunc[layoutDir](self.data[d])
                 evt[d_id] = cl.enqueue_copy(self.cl_env.queue,
                                             self.gpu_data[d].data, self._tmp)
@@ -396,7 +400,7 @@ class GPUDiscreteField(DiscreteField):
                 if layoutDir is None and batch_d is None:
                     if __VERBOSE__:
                         print "{"+str(main_rank)+"}", "host->device :", \
-                            self.name, d, 0, 'Batch_default'
+                            self.name, S_DIR[d], 0, 'Batch_default'
                     # Default transfer, using the shape of the gpu_data
                     # component and the default layout
                     self._tmp[:np.prod(self.shape_gpu_default)] = \
@@ -416,7 +420,7 @@ class GPUDiscreteField(DiscreteField):
                         batch_dir = 0
                     if __VERBOSE__:
                         print "{"+str(main_rank)+"}", "host->device :", \
-                            self.name, d, layoutDir, 'Batch', batch_dir
+                            self.name, S_DIR[d], layoutDir, 'Batch', batch_dir
                     if self.shape_gpu[layoutDir] is not None:
                         self._tmp[:np.prod(self.shape_gpu[layoutDir])] = \
                             self._toLayoutMgrFunc[layoutDir](
@@ -475,7 +479,7 @@ class GPUDiscreteField(DiscreteField):
                     layoutDir = 0
                 if __VERBOSE__:
                     print "{"+str(main_rank)+"}", "device->host :", \
-                        self.name, d, layoutDir, 'No Batch'
+                        self.name, S_DIR[d], layoutDir, 'No Batch'
                 evt[d_id] = cl.enqueue_copy(self.cl_env.queue,
                                             self._tmp,
                                             self.gpu_data[d].data,
@@ -489,7 +493,7 @@ class GPUDiscreteField(DiscreteField):
                 if layoutDir is None and batch_d is None:
                     if __VERBOSE__:
                         print "{"+str(main_rank)+"}", "device->host :", \
-                            self.name, d, 0, 'Batch_default'
+                            self.name, S_DIR[d], 0, 'Batch_default'
                     # Default transfer, using the shape of the gpu_data
                     # component and the default layout
                     evt[d_id] = cl.enqueue_copy(
@@ -510,7 +514,7 @@ class GPUDiscreteField(DiscreteField):
                         batch_d = 0
                     if __VERBOSE__:
                         print "{"+str(main_rank)+"}", "device->host :", \
-                            self.name, d, layoutDir, 'Batch', batch_d
+                            self.name, S_DIR[d], layoutDir, 'Batch', batch_d
                     evt[d_id] = cl.enqueue_copy(
                         self.cl_env.queue,
                         self._tmp[:np.prod(self.shape_gpu[layoutDir])],
diff --git a/HySoP/hysop/gpu/gpu_particle_advection.py b/HySoP/hysop/gpu/gpu_particle_advection.py
index 7f7cf9631..14e27284d 100644
--- a/HySoP/hysop/gpu/gpu_particle_advection.py
+++ b/HySoP/hysop/gpu/gpu_particle_advection.py
@@ -19,7 +19,6 @@ from parmepy.gpu.gpu_kernel import KernelLauncher
 from parmepy.tools.timers import Timer
 from parmepy.operator.discrete.discrete import DiscreteOperator
 from parmepy.mpi import MPI
-from parmepy.numerics.updateGhosts import UpdateGhosts
 
 
 class GPUParticleAdvection(ParticleAdvection):
@@ -190,11 +189,6 @@ class GPUParticleAdvection(ParticleAdvection):
         <code>"FieldName"</code> the field
         name as given by user.
         """
-        self._synchroGhosts = None
-        if self._isMultiScale:
-            self._synchroGhosts = UpdateGhosts(self.velocity.topology,
-                                               self.velocity.nbComponents)
-
         self.gpu_precision = self.cl_env.precision
         self.coord_min = np.ones(4, dtype=self.gpu_precision)
         self.mesh_size = np.ones(4, dtype=self.gpu_precision)
@@ -649,8 +643,6 @@ class GPUParticleAdvection(ParticleAdvection):
         """
         # Calling for requirements completion
         DiscreteOperator.apply(self, simulation)
-        if self._isMultiScale:# and not old_dir == self.dir:
-            self._synchroGhosts(self.velocity.data)
         ctime = MPI.Wtime()
 
         # If first direction of advection, wait for work gpu fields
diff --git a/HySoP/hysop/mpi/topology.py b/HySoP/hysop/mpi/topology.py
index cafa05498..3f8fc6b37 100644
--- a/HySoP/hysop/mpi/topology.py
+++ b/HySoP/hysop/mpi/topology.py
@@ -160,6 +160,7 @@ class Cartesian(object):
         else:
             self.ghosts = np.asarray(ghosts, dtype=PARMES_INTEGER)
             assert(self.ghosts.size == domain.dimension)
+        self.hasGhosts = not np.all(self.ghosts == 0)
 
         # --- Computation of the local mesh resolution and indices ---
         ##  Resolution of the global mesh
diff --git a/HySoP/hysop/numerics/update_ghosts.py b/HySoP/hysop/numerics/update_ghosts.py
index 48c54743f..407c228fc 100644
--- a/HySoP/hysop/numerics/update_ghosts.py
+++ b/HySoP/hysop/numerics/update_ghosts.py
@@ -187,9 +187,3 @@ class UpdateGhosts(object):
 
         # Apply boundary conditions for non-distributed directions
         self.applyBC(variables)
-
-
-if (__name__ == "__main__"):
-    print __doc__
-    print "- Provided class : UpdateGhosts"
-    print UpdateGhosts.__doc__
diff --git a/HySoP/hysop/operator/adapt_timestep.py b/HySoP/hysop/operator/adapt_timestep.py
index 0abbd87fa..6cb987145 100755
--- a/HySoP/hysop/operator/adapt_timestep.py
+++ b/HySoP/hysop/operator/adapt_timestep.py
@@ -25,7 +25,7 @@ class AdaptTimeStep(Operator):
                  dt_adapt=None,
                  method=None,
                  topo=None, ghosts=None, prefix='./dt.dat',
-                 task_id=None, **other_config):
+                 task_id=None, comm=None, **other_config):
         """
         Create a timeStep-evaluation operator from given
         velocity and vorticity variables.
@@ -52,7 +52,7 @@ class AdaptTimeStep(Operator):
                       SpaceDiscretisation: FD_C_4,
                       dtAdvecCrit: 'vort'}
         Operator.__init__(self, [velocity, vorticity], method, topo=topo,
-                          ghosts=ghosts, task_id=task_id)
+                          ghosts=ghosts, task_id=task_id, comm=comm)
         self.config = other_config
         ## velocity variable (vector)
         self.velocity = velocity
@@ -96,7 +96,8 @@ class AdaptTimeStep(Operator):
             for v in self.variables:
                 topo = self.domain.getOrCreateTopology(self.domain.dimension,
                                                        self.resolutions[v],
-                                                       ghosts=self.ghosts)
+                                                       ghosts=self.ghosts,
+                                                       comm=self._comm)
                 self.discreteFields[v] = v.discretize(topo)
 
     @debug
diff --git a/HySoP/hysop/operator/advection.py b/HySoP/hysop/operator/advection.py
index 0b8cf6ae7..2ed83e464 100644
--- a/HySoP/hysop/operator/advection.py
+++ b/HySoP/hysop/operator/advection.py
@@ -50,7 +50,8 @@ class Advection(Operator):
     @debug
     def __init__(self, velocity, advectedFields, resolutions,
                  method=None,
-                 topo=None, ghosts=None, task_id=None, **other_config):
+                 topo=None, ghosts=None, comm=None,
+                 task_id=None, **other_config):
         """
         Create a Transport operator from given variables velocity and scalar.
 
@@ -82,7 +83,7 @@ class Advection(Operator):
             vars_str += vv.name + ","
         Operator.__init__(self, v, method, topo=topo,
                           ghosts=ghosts, name_suffix=vars_str[0:-1] + ')',
-                          task_id=task_id)
+                          task_id=task_id, comm=comm)
         ## Transport velocity
         self.velocity = velocity
         ## Transported fields
@@ -140,6 +141,7 @@ class Advection(Operator):
                     self.resolutions,
                     method=self.method, topo=self._predefinedTopo,
                     ghosts=self.ghosts, isMultiScale=self._isMultiScale,
+                    comm=self._comm,
                     task_id=task_id, name_suffix=vars_str[0:-1] + ')',
                     **self.config)
 
@@ -235,13 +237,19 @@ class Advection(Operator):
                                  dtype=PARMES_INDEX) - 1
             if self._predefinedTopo is not None:
                 main_size = self._predefinedTopo.size
+                comm = self._predefinedTopo.comm
+            elif self._comm is not None:
+                main_size = self._comm.Get_size()
+                comm =self._comm
             else:
                 from parmepy.mpi.main_var import main_size
+                from parmepy.mpi.main_var import main_comm as comm
             topodims = [1, 1, main_size]
             scalesres, scalesoffset, stab_coeff = \
                 scales.init_advection_solver(nbcells,
                                              self.domain.length,
-                                             topodims, order=order,
+                                             topodims, comm.py2f(),
+                                             order=order,
                                              dim_split=splitting)
             # Use same topodims as scales to create Cartesian topology
             # in order to discretize our fields
@@ -260,7 +268,8 @@ class Advection(Operator):
                         self.domain.getOrCreateTopology(self._dim,
                                                         self.resolutions[v],
                                                         topodims,
-                                                        ghosts=self.ghosts)
+                                                        ghosts=self.ghosts,
+                                                        comm=self._comm)
                     # ... and the corresponding discrete field
                     self.discreteFields[v] = v.discretize(topo)
             if self._isMultiScale:
diff --git a/HySoP/hysop/operator/advection_dir.py b/HySoP/hysop/operator/advection_dir.py
index f4f987f2c..61bdd2ae9 100644
--- a/HySoP/hysop/operator/advection_dir.py
+++ b/HySoP/hysop/operator/advection_dir.py
@@ -32,7 +32,7 @@ class AdvectionDir(Operator):
                  particle_positions, resolutions,
                  method=None,
                  topo=None, ghosts=None, isMultiScale=False,
-                 task_id=None, name_suffix='',
+                 task_id=None, name_suffix='', comm=None,
                  **other_config):
         if method is None:
             method = {TimeIntegrator: RK2,
@@ -44,7 +44,7 @@ class AdvectionDir(Operator):
         v = [velocity]
         [v.append(f) for f in advectedFields]
         Operator.__init__(self, v, method, topo=topo,
-                          ghosts=ghosts, task_id=task_id,
+                          ghosts=ghosts, task_id=task_id, comm=comm,
                           name_suffix=name_suffix + S_DIR[d])
         self.output = advectedFields
         self.input = [var for var in self.variables]
@@ -67,12 +67,15 @@ class AdvectionDir(Operator):
             if not MultiScale in method.keys():
                 method[MultiScale] == L2_1
             if method[MultiScale] == Linear:
-                self._v_ghosts = [1, ] * self.domain.dimension
+                self._v_ghosts = np.array([1, ] * self.domain.dimension,
+                                          dtype=PARMES_INDEX)
             elif method[MultiScale] == L2_1:
-                self._v_ghosts = [2, ] * self.domain.dimension
+                self._v_ghosts = np.array([2, ] * self.domain.dimension,
+                                          dtype=PARMES_INDEX)
             elif method[MultiScale] == L4_2 or \
                     method[MultiScale] == L4_4:
-                self._v_ghosts = [3, ] * self.domain.dimension
+                self._v_ghosts = np.array([3, ] * self.domain.dimension,
+                                          dtype=PARMES_INDEX)
             else:
                 raise ValueError("Unknown multiscale method")
 
@@ -89,9 +92,10 @@ class AdvectionDir(Operator):
         """
         if self._predefinedTopo is not None:
             main_size = self._predefinedTopo.size
+        elif self._comm is not None:
+            main_size = self._comm.Get_size()
         else:
             from parmepy.mpi import main_size
-        comm = None
         if main_size == 1:
             # - topologies creation and variables discretization -
             if self._predefinedTopo is not None:
@@ -103,10 +107,11 @@ class AdvectionDir(Operator):
                     if v == self.velocity:
                         topo = self.domain.getOrCreateTopology(
                             self.domain.dimension, self.resolutions[v],
-                            ghosts=self._v_ghosts)
+                            ghosts=self._v_ghosts, comm=self._comm)
                     else:
                         topo = self.domain.getOrCreateTopology(
-                            self.domain.dimension, self.resolutions[v])
+                            self.domain.dimension, self.resolutions[v],
+                            comm=self._comm)
                     self.discreteFields[v] = v.discretize(topo)
         else:
             topodims = np.ones((self.domain.dimension))
@@ -140,12 +145,12 @@ class AdvectionDir(Operator):
                     topo = self.domain.getOrCreateTopology(
                         self.domain.dimension, self.resolutions[v],
                         topoResolution=topodims, fixedResolution=True,
-                        ghosts=self._v_ghosts, comm=comm)
+                        ghosts=self._v_ghosts, comm=self._comm)
                 else:
                     topo = self.domain.getOrCreateTopology(
                         self.domain.dimension, self.resolutions[v],
                         topoResolution=topodims, fixedResolution=True,
-                        comm=comm)
+                        comm=self._comm)
                 self.discreteFields[v] = v.discretize(topo)
 
     @staticmethod
diff --git a/HySoP/hysop/operator/analytic.py b/HySoP/hysop/operator/analytic.py
index 2f19be81c..a71394ea9 100644
--- a/HySoP/hysop/operator/analytic.py
+++ b/HySoP/hysop/operator/analytic.py
@@ -15,7 +15,7 @@ class Analytic(Operator):
     @debug
     def __init__(self, variables, resolutions, formula=None,
                  ghosts=None, topo=None, doVectorize=False, method=None,
-                 task_id=None):
+                 task_id=None, comm=None):
         """
         Create an operator using an analytic formula to compute field(s).
         @param variables : list of fields on which this operator will apply.
@@ -28,7 +28,7 @@ class Analytic(Operator):
         @remark : method seems useless but it is useful for GPU.
         """
         Operator.__init__(self, variables, ghosts=ghosts, topo=topo,
-                          method=method, task_id=task_id)
+                          method=method, task_id=task_id, comm=comm)
         if formula is not None:
             ## A formula applied to all variables of this operator
             self.formula = formula
@@ -58,7 +58,8 @@ class Analytic(Operator):
                 # the topology for v ...
                 topo = self.domain.getOrCreateTopology(self.domain.dimension,
                                                        self.resolutions[v],
-                                                       ghosts=self.ghosts)
+                                                       ghosts=self.ghosts,
+                                                       comm=self._comm)
                 # ... and the corresponding discrete field
                 self.discreteFields[v] = v.discretize(topo)
                 v.setTopoInit(topo)
diff --git a/HySoP/hysop/operator/baroclinic.py b/HySoP/hysop/operator/baroclinic.py
index 5e8fc035e..824050d60 100644
--- a/HySoP/hysop/operator/baroclinic.py
+++ b/HySoP/hysop/operator/baroclinic.py
@@ -18,9 +18,9 @@ class Baroclinic(Operator):
 
     @debug
     def __init__(self, velocity, vorticity, density, viscosity,
-                 resolutions=None, topo=None, ghosts=None, 
+                 resolutions=None, topo=None, ghosts=None,
                  method={SpaceDiscretisation: FD_C_4},
-                 task_id=None, **other_config):
+                 task_id=None, comm=None, **other_config):
         """
         Constructor.
         Create a Pressure operator from given velocity variables.
@@ -37,7 +37,7 @@ class Baroclinic(Operator):
         Autom. computed if not set.
         """
         Operator.__init__(self, [velocity, vorticity, density], method,
-                          topo=topo, ghosts=ghosts, task_id=task_id)
+                          topo=topo, ghosts=ghosts, task_id=task_id, comm=comm)
 
         self.velocity = velocity
         self.vorticity = vorticity
@@ -72,7 +72,8 @@ class Baroclinic(Operator):
             for v in self.variables:
                 topo = self.domain.getOrCreateTopology(self.domain.dimension,
                                                        self.resolutions[v],
-                                                       ghosts=self.ghosts)
+                                                       ghosts=self.ghosts,
+                                                       comm=self._comm)
                 self.discreteFields[v] = v.discretize(topo)
 
     @debug
diff --git a/HySoP/hysop/operator/continuous.py b/HySoP/hysop/operator/continuous.py
index 1d8d99a8a..641629c7f 100644
--- a/HySoP/hysop/operator/continuous.py
+++ b/HySoP/hysop/operator/continuous.py
@@ -32,7 +32,7 @@ class Operator(object):
     @debug
     @abstractmethod
     def __init__(self, variables, method=None, topo=None, ghosts=None,
-                 name_suffix='', task_id=None):
+                 name_suffix='', task_id=None, comm=None):
         """
         Build the operator.
         The only required parameter is a list of variables.
@@ -43,6 +43,7 @@ class Operator(object):
         @param name : operator id.
         @param topo : a predefined topology to discretize variables
         @param ghosts : number of points in the ghost layer
+        @param comm : MPI communicator to build topologies
         Warning : you cannot set both topo and ghosts.
         """
         ## List of parmepy.continuous.Fields involved in the operator.
@@ -82,7 +83,10 @@ class Operator(object):
             self.ghosts = None
 
         ## A predefined topology to discretize variables.
+        self._comm = comm
         self._predefinedTopo = None
+        ## Check if only one of topo or comm is given by user
+        assert comm is None or topo is None
         if topo is not None:
             self._predefinedTopo = topo
             if self.ghosts is not None:
diff --git a/HySoP/hysop/operator/curlAndDiffusion.py b/HySoP/hysop/operator/curlAndDiffusion.py
index a2db23ba3..55ac3e977 100644
--- a/HySoP/hysop/operator/curlAndDiffusion.py
+++ b/HySoP/hysop/operator/curlAndDiffusion.py
@@ -24,7 +24,7 @@ class Diffusion(Operator):
 
     @debug
     def __init__(self, velocity=None, vorticity=None,
-                 resolutions=None, method=None, task_id=None,
+                 resolutions=None, method=None, task_id=None, comm=None,
                  **other_config):
         """
         Constructor.
@@ -36,7 +36,7 @@ class Diffusion(Operator):
         """
         Operator.__init__(self, [velocity, vorticity], method,
                           name="Curl and Diffusion",
-                          task_id=task_id)
+                          task_id=task_id, comm=comm)
         self.velocity = velocity
         self.vorticity = vorticity
         self.resolutions = resolutions
@@ -48,9 +48,14 @@ class Diffusion(Operator):
         Diffusion operator discretization method.
         Create a discrete Diffusion operator from given specifications.
         """
+        if self._comm is None:
+            from parmepy.mpi.main_var import main_comm as comm
+        else:
+            comm = self._comm
+
         localres, localoffset = fftw2py.init_fftw_solver(
             self.resolutions[self.vorticity],
-            self.domain.length)
+            self.domain.length, comm=comm.py2f())
 
         topodims = self.resolutions[self.vorticity] / localres
         print 'topodims DIFFUSION', topodims
@@ -59,7 +64,8 @@ class Diffusion(Operator):
         for v in self.variables:
             topo = self.domain.getOrCreateTopology(self.domain.dimension,
                                                    self.resolutions[v],
-                                                   topodims)
+                                                   topodims,
+                                                   comm=comm)
             vd = v.discretize(topo)
             self.discreteFields[v] = vd
 
diff --git a/HySoP/hysop/operator/density.py b/HySoP/hysop/operator/density.py
index 4ab66b463..f82fef7e2 100644
--- a/HySoP/hysop/operator/density.py
+++ b/HySoP/hysop/operator/density.py
@@ -16,7 +16,7 @@ class DensityVisco(Operator):
     @debug
     def __init__(self, density, viscosity,
                  resolutions=None, method=None,
-                 task_id=None,
+                 task_id=None, comm=None,
                  **other_config):
         """
         Constructor.
@@ -25,7 +25,7 @@ class DensityVisco(Operator):
         @param velocity ContinuousVectorField : velocity variable.
         """
         Operator.__init__(self, [density, viscosity], method,
-                          name="DensityVisco", task_id=task_id)
+                          name="DensityVisco", task_id=task_id, comm=comm)
 
         self.density = density
         self.viscosity = viscosity
@@ -39,7 +39,8 @@ class DensityVisco(Operator):
         for v in self.variables:
             # the topology for v ...
             topo = self.domain.getOrCreateTopology(self.domain.dimension,
-                                                   self.resolutions[v])
+                                                   self.resolutions[v],
+                                                   comm=self._comm)
             # ... and the corresponding discrete field
             self.discreteFields[v] = v.discretize(topo)
 
diff --git a/HySoP/hysop/operator/differential.py b/HySoP/hysop/operator/differential.py
index 56adbf615..f9627250d 100644
--- a/HySoP/hysop/operator/differential.py
+++ b/HySoP/hysop/operator/differential.py
@@ -21,12 +21,12 @@ class Differential(Operator):
 
     @debug
     def __init__(self, invar, outvar, resolutions,
-                 method=None, topo=None, ghosts=None, task_id=None):
+                 method=None, topo=None, ghosts=None, task_id=None, comm=None):
 
         if method is None:
             method = {SpaceDiscretisation: FD_C_4, GhostUpdate: True}
         Operator.__init__(self, [invar, outvar], method, topo=topo,
-                          ghosts=ghosts, task_id=task_id)
+                          ghosts=ghosts, task_id=task_id, comm=comm)
         ## input variable
         self.invar = invar
         ## Curl of input
@@ -60,11 +60,18 @@ class Differential(Operator):
 
         else:
             if self.method[SpaceDiscretisation] is fftw2py:
+                if self._comm is not None:
+                    main_size = self._comm.Get_size()
+                    comm = self._comm
+                else:
+                    from parmepy.mpi.main_var import main_size
+                    from parmepy.mpi.main_var import main_comm as comm
+
                 # Compute local resolution/distribution of data
                 # according to fftw requirements.
                 localres, localoffset = fftw2py.init_fftw_solver(
-                    self.resolution, self.domain.length)
-                from parmepy.mpi.main_var import main_size
+                    self.resolution, self.domain.length, comm=comm.py2f())
+
                 topodims = np.ones((self.domain.dimension))
                 topodims[-1] = main_size
                 #variables discretization
@@ -73,7 +80,7 @@ class Differential(Operator):
                         self.domain.dimension,
                         self.resolution, topodims, precomputed=True,
                         offset=localoffset, localres=localres,
-                        ghosts=self.ghosts)
+                        ghosts=self.ghosts, comm=self._comm)
                     self.discreteFields[v] = v.discretize(topo)
             else:
                 # default topology constructor, with global resolution
@@ -84,7 +91,7 @@ class Differential(Operator):
                 for v in self.variables:
                     topo = self.domain.getOrCreateTopology(
                         dim, self.resolutions[v],
-                        ghosts=self.ghosts)
+                        ghosts=self.ghosts, comm=self._comm)
                     self.discreteFields[v] = v.discretize(topo)
 
         assert self.discreteFields[self.invar].topology == \
diff --git a/HySoP/hysop/operator/diffusion.py b/HySoP/hysop/operator/diffusion.py
index 127a65eb2..80f52a89c 100644
--- a/HySoP/hysop/operator/diffusion.py
+++ b/HySoP/hysop/operator/diffusion.py
@@ -25,7 +25,8 @@ class Diffusion(Operator):
 
     @debug
     def __init__(self, vorticity, resolution, viscosity, method=None,
-                 topo=None, ghosts=None, task_id=None, **other_config):
+                 topo=None, ghosts=None, task_id=None, comm=None,
+                 **other_config):
         """
         Constructor for the diffusion operator.
         @param[in,out] vorticity : field \f$ \omega \f$
@@ -42,7 +43,7 @@ class Diffusion(Operator):
         ## viscosity
         self.viscosity = viscosity
         Operator.__init__(self, [vorticity], method=method, ghosts=ghosts,
-                          topo=topo, task_id=task_id)
+                          topo=topo, task_id=task_id, comm=comm)
         self.config = other_config
         self.input = [self.vorticity]
         self.output = [self.vorticity]
@@ -55,11 +56,19 @@ class Diffusion(Operator):
 
         # Compute local resolution/distribution of data
         # according to fftw requirements.
+        if self._predefinedTopo is not None:
+            comm = self._predefinedTopo.comm
+        elif self._comm is not None:
+            comm = self._comm
+        else:
+            from parmepy.mpi.main_var import main_comm as comm
         localres, localoffset = fftw2py.init_fftw_solver(
-            self.resolution, self.domain.length)
+            self.resolution, self.domain.length, comm=comm.py2f())
 
         if self._predefinedTopo is not None:
             main_size = self._predefinedTopo.size
+        elif self._comm is not None:
+            main_size = self._comm.Get_size()
         else:
             from parmepy.mpi.main_var import main_size
         topodims = np.ones((self.domain.dimension))
@@ -82,7 +91,8 @@ class Diffusion(Operator):
                     precomputed=True,
                     offset=localoffset,
                     localres=localres,
-                    ghosts=self.ghosts)
+                    ghosts=self.ghosts,
+                    comm=self._comm)
                 self.discreteFields[v] = v.discretize(topo)
 
     @debug
diff --git a/HySoP/hysop/operator/discrete/adapt_timestep.py b/HySoP/hysop/operator/discrete/adapt_timestep.py
index 3804b7c88..e49dfe835 100755
--- a/HySoP/hysop/operator/discrete/adapt_timestep.py
+++ b/HySoP/hysop/operator/discrete/adapt_timestep.py
@@ -190,7 +190,7 @@ class AdaptTimeStep_D(DiscreteOperator):
                 self.dt_adapt.data[0] = min(dt_advec, dt_stretch, dt_cfl)
 
             self.dt_adapt.data[0] = \
-                self.velocity.topology.topo.allreduce(self.dt_adapt.data[0],
+                self.velocity.topology.comm.allreduce(self.dt_adapt.data[0],
                                                     PARMES_MPI_REAL, op=MPI.MIN)
 
 
diff --git a/HySoP/hysop/operator/discrete/particle_advection.py b/HySoP/hysop/operator/discrete/particle_advection.py
index f79b8da1f..1e2b44fe2 100644
--- a/HySoP/hysop/operator/discrete/particle_advection.py
+++ b/HySoP/hysop/operator/discrete/particle_advection.py
@@ -15,7 +15,6 @@ from parmepy.fields.continuous import Field
 from parmepy.tools.timers import timed_function
 import parmepy.tools.numpywrappers as npw
 from parmepy.mpi import MPI
-from parmepy.numerics.updateGhosts import UpdateGhosts
 
 
 class ParticleAdvection(DiscreteOperator):
@@ -147,10 +146,6 @@ class ParticleAdvection(DiscreteOperator):
             self.dir, work=self._remesh_w,
             iwork=self._remesh_iw)
 
-        self._synchroGhosts = None
-        if self._isMultiScale:
-            self._synchroGhosts = UpdateGhosts(self.velocity.topology, self.velocity.nbComponents)
-
         self._isUpToDate = True
 
     @debug
@@ -174,8 +169,6 @@ class ParticleAdvection(DiscreteOperator):
         """
         # Calling for requirements completion
         DiscreteOperator.apply(self, simulation)
-        if self._isMultiScale:
-            self._synchroGhosts(self.velocity.data)
         ctime = MPI.Wtime()
 
         t, dt = simulation.time, simulation.timeStep * dtCoeff
diff --git a/HySoP/hysop/operator/energy_enstrophy.py b/HySoP/hysop/operator/energy_enstrophy.py
index a68fd256d..cb6819178 100644
--- a/HySoP/hysop/operator/energy_enstrophy.py
+++ b/HySoP/hysop/operator/energy_enstrophy.py
@@ -25,8 +25,8 @@ class Energy_enstrophy(Monitoring):
     """
 
     def __init__(self, velocity, vorticity,
-                 viscosity, isNormalized, topo, frequency,
-                 filename=None, safeOutput=True):
+                 viscosity, isNormalized, topo, frequency, prefix=None,
+                 filename=None, safeOutput=True, task_id=None):
         """
         Constructor.
         @param velocity field
@@ -43,7 +43,8 @@ class Energy_enstrophy(Monitoring):
         False --> open at init and close during finalize (cost less but if simu
         crashes, data are lost.)
         """
-        Monitoring.__init__(self, [velocity, vorticity], topo, frequency)
+        Monitoring.__init__(self, [velocity, vorticity], topo, frequency,
+                            task_id=task_id)
         ## velocity field
         self.velocity = velocity
         ## vorticity field
diff --git a/HySoP/hysop/operator/monitors/compute_forces.py b/HySoP/hysop/operator/monitors/compute_forces.py
index d8d239a14..c6a6dc9b8 100644
--- a/HySoP/hysop/operator/monitors/compute_forces.py
+++ b/HySoP/hysop/operator/monitors/compute_forces.py
@@ -26,7 +26,7 @@ class DragAndLift(Monitoring):
     """
     def __init__(self, velocity, vorticity, nu, coefForce, topo,
                  volumeOfControl, obstacles=None, frequency=1,
-                 filename=None, safeOutput=True):
+                 filename=None, safeOutput=True, task_id=None):
         """
         @param velocity field
         @param vorticity field
@@ -40,7 +40,8 @@ class DragAndLift(Monitoring):
         @param filename output file name. Default is None ==> no output.
         Full or relative path.
         """
-        Monitoring.__init__(self, [velocity, vorticity], topo, frequency)
+        Monitoring.__init__(self, [velocity, vorticity], topo, frequency,
+                            task_id=task_id)
         self.velocity = velocity
         self.vorticity = vorticity
         self._voc = volumeOfControl
diff --git a/HySoP/hysop/operator/monitors/printer.py b/HySoP/hysop/operator/monitors/printer.py
index cf2ade524..7d9bbc0e7 100644
--- a/HySoP/hysop/operator/monitors/printer.py
+++ b/HySoP/hysop/operator/monitors/printer.py
@@ -35,7 +35,8 @@ class Printer(Monitoring):
         @param prefix : output file name prefix.
         @param ext : output file extension, default=.vtk.
         """
-        Monitoring.__init__(self, variables, topo, frequency, task_id=task_id)
+        Monitoring.__init__(self, variables, topo, frequency,
+                            task_id=task_id)
         ## output file name prefix
         if prefix is None:
             self.prefix = './out_'
@@ -73,6 +74,11 @@ class Printer(Monitoring):
         self.topo = self._predefinedTopo
         self._xmf_data_files = []
 
+
+    def setUp(self):
+        """
+        Print initial state
+        """
         # Create output dir if required
         if self.topo.rank == 0:
             d = os.path.dirname(self.prefix)
@@ -91,12 +97,6 @@ class Printer(Monitoring):
             self._get_filename = lambda i: self.prefix + \
                 "{0}_iter_{1:03d}.dat".format(self.topo.rank, i)
 
-    def setUp(self):
-        """
-        Print initial state
-        """
-        self.step()
-
     @debug
     @timed_function
     def apply(self, simulation=None):
@@ -169,11 +169,8 @@ class Printer(Monitoring):
         If fails, turn to classical ascii output.
         @param iter : current iteration number
         """
-        if simu is None:
-            ite, t = 0, 0.
-        else:
-            ite = simu.currentIteration
-            t = ite * simu.timeStep
+        ite = simu.currentIteration
+        t = ite * simu.timeStep
         # Transfer from GPU to CPU if required
         for f in self.variables:
             df = f.discreteFields[self.topo]
diff --git a/HySoP/hysop/operator/penalization.py b/HySoP/hysop/operator/penalization.py
index 6af9deeac..602300176 100644
--- a/HySoP/hysop/operator/penalization.py
+++ b/HySoP/hysop/operator/penalization.py
@@ -24,7 +24,7 @@ class Penalization(Operator):
     @debug
     def __init__(self, variables, obstacles, coeff,
                  resolutions=None, method=None, topo=None,
-                 ghosts=None, task_id=None, **other_config):
+                 ghosts=None, task_id=None, comm=None, **other_config):
         """
         Constructor.
         @param[in,out] variables : list of fields to be penalized
@@ -37,7 +37,7 @@ class Penalization(Operator):
             method = {}
         Operator.__init__(self, variables, method=method,
                           topo=topo, ghosts=ghosts,
-                          task_id=task_id)
+                          task_id=task_id, comm=comm)
 
         ## domain where penalization must be applied.
         ## A parmepy.domain.obstacle .
@@ -61,7 +61,8 @@ class Penalization(Operator):
         else:
             topo = self.domain.getOrCreateTopology(self.domain.dimension,
                                                    self.resolution,
-                                                   ghosts=self.ghosts)
+                                                   ghosts=self.ghosts,
+                                                   comm=self._comm)
         for v in self.variables:
             # the discrete fields
             self.discreteFields[v] = v.discretize(topo)
diff --git a/HySoP/hysop/operator/poisson.py b/HySoP/hysop/operator/poisson.py
index 6d266d724..768d56466 100644
--- a/HySoP/hysop/operator/poisson.py
+++ b/HySoP/hysop/operator/poisson.py
@@ -26,7 +26,7 @@ class Poisson(Operator):
 
     @debug
     def __init__(self, velocity, vorticity, resolutions, ghosts=None,
-                 method=None, topo=None, task_id=None,
+                 method=None, topo=None, task_id=None, comm=None,
                  **other_config):
         """
         Constructor for the Poisson problem.
@@ -44,7 +44,7 @@ class Poisson(Operator):
         self.resolutions = resolutions
 
         Operator.__init__(self, [velocity, vorticity], method, ghosts=ghosts,
-                          topo=topo, task_id=task_id)
+                          topo=topo, task_id=task_id, comm=comm)
         self.config = other_config
         if method is None:
             self.method = 'fftw'
@@ -78,11 +78,19 @@ class Poisson(Operator):
 
         # Compute local resolution/distribution of data
         # according to fftw requirements.
+        if self._predefinedTopo is not None:
+            comm = self._predefinedTopo.comm
+        elif self._comm is not None:
+            comm = self._comm
+        else:
+            from parmepy.mpi.main_var import main_comm as comm
         localres, localoffset = fftw2py.init_fftw_solver(
-            self.resolution, self.domain.length)
+            self.resolution, self.domain.length, comm=comm.py2f())
 
         if self._predefinedTopo is not None:
             main_size = self._predefinedTopo.size
+        elif self._comm is not None:
+            main_size = self._comm.Get_size()
         else:
             from parmepy.mpi.main_var import main_size
         topodims = np.ones((self.domain.dimension))
@@ -102,7 +110,8 @@ class Poisson(Operator):
                     precomputed=True,
                     offset=localoffset,
                     localres=localres,
-                    ghosts=self.ghosts)
+                    ghosts=self.ghosts,
+                    comm=self._comm)
                 self.discreteFields[v] = v.discretize(topo)
 
     @debug
diff --git a/HySoP/hysop/operator/redistribute.py b/HySoP/hysop/operator/redistribute.py
index ce306e8a9..d1f2a8c73 100644
--- a/HySoP/hysop/operator/redistribute.py
+++ b/HySoP/hysop/operator/redistribute.py
@@ -59,7 +59,6 @@ class Redistribute(Operator):
             vars_str += S_DIR[component]
         Operator.__init__(self, variables, method, task_id=task_id,
                           name_suffix=vars_str + name_suffix)
-
         ## Source Operator
         self.opFrom = opFrom
         ## Targeted operator
@@ -288,7 +287,7 @@ class Redistribute(Operator):
                 if __VERBOSE__:
                     print "{0} APPLY HOST".format(self._main_rank), \
                         self.opFrom.discreteFields[v].name, '->', \
-                        self.opTo.discreteFields[v].name
+                        self.opTo.discreteFields[v].name, S_DIR[d]
                 vTo = self.opTo.discreteFields[v].data[d]
                 vFrom = self.opFrom.discreteFields[v].data[d]
                 v_name = self.opFrom.discreteFields[v].name + S_DIR[d]
diff --git a/HySoP/hysop/operator/redistribute_intercomm.py b/HySoP/hysop/operator/redistribute_intercomm.py
index ebe3927f5..9b68569a0 100644
--- a/HySoP/hysop/operator/redistribute_intercomm.py
+++ b/HySoP/hysop/operator/redistribute_intercomm.py
@@ -11,6 +11,7 @@ from parmepy.constants import debug, PARMES_MPI_REAL, ORDERMPI, S_DIR, np
 from parmepy import __VERBOSE__
 from parmepy.operator.continuous import Operator
 from parmepy.mpi.topology import Bridge_intercomm
+from parmepy.operator.redistribute import Redistribute
 from parmepy.methods_keys import Support
 
 
@@ -21,9 +22,9 @@ class RedistributeIntercomm(Operator):
     Transfers data from topology of id_from to the id_to.
     """
     @debug
-    def __init__(self, variables, topo, op_from, op_to,
-                 proc_tasks, parent_comm,
-                 method=None, component=None):
+    def __init__(self, variables, op_from, op_to,
+                 proc_tasks, parent_comm, topo=None,
+                 method=None, component=None, name_suffix=''):
         """
         Create an operator to distribute data between two mpi topologies for a
         list of variables.
@@ -49,7 +50,8 @@ class RedistributeIntercomm(Operator):
         vars_str = vars_str[:-1] + ')'
         if not component is None:
             vars_str += S_DIR[component]
-        Operator.__init__(self, variables, method, name_suffix=vars_str)
+        Operator.__init__(self, variables, method,
+                          name_suffix=vars_str+name_suffix)
         assert parent_comm.Get_size() == len(proc_tasks), \
             "Parent communicator ({0})".format(parent_comm.Get_size()) + \
             " and size of the task id array " + \
@@ -60,7 +62,7 @@ class RedistributeIntercomm(Operator):
         self.id_from = self.opFrom.task_id
         self.id_to = self.opTo.task_id
         self.parent_comm = parent_comm
-        self._dim = topo.domain.dimension
+        self._dim = variables[0].domain.dimension
         self.proc_tasks = proc_tasks
         self.input = self.output = self.variables
         self.component = component
@@ -71,10 +73,6 @@ class RedistributeIntercomm(Operator):
             # Only the given component is considered
             self._range_components = lambda v: [component]
 
-        self._parent_rank = self.parent_comm.Get_rank()
-        self._my_rank = self.topo.comm.Get_rank()
-        self._dim = self.topo.domain.dimension
-
         self.bridges = {}
         self.r_request = {}
         self.s_request = {}
@@ -85,11 +83,22 @@ class RedistributeIntercomm(Operator):
             self._s_types[v] = {}
         self._toHost_fields = []
         self._toDevice_fields = []
-        for v in self.variables:
-            self.discreteFields[v] = v.discretize(self.topo)
 
     def discretize(self):
-        pass
+        self._parent_rank = self.parent_comm.Get_rank()
+
+        for v in self.variables:
+            if self.topo is None:
+                if self.proc_tasks[self._parent_rank] == self.id_from:
+                    self.topo = self.opFrom.discreteFields[v].topology
+                else:
+                    self.topo = self.opTo.discreteFields[v].topology
+
+        self._my_rank = self.topo.comm.Get_rank()
+        self._dim = self.topo.domain.dimension
+
+        for v in self.variables:
+            self.discreteFields[v] = v.discretize(self.topo)
 
     @debug
     def setUp(self):
diff --git a/HySoP/hysop/operator/reprojection.py b/HySoP/hysop/operator/reprojection.py
index 51f6085a3..eb3c89f95 100644
--- a/HySoP/hysop/operator/reprojection.py
+++ b/HySoP/hysop/operator/reprojection.py
@@ -11,7 +11,7 @@ from parmepy.numerics.finite_differences import FD_C_4, FD_C_2
 from parmepy.numerics.differential_operations import GradV
 import parmepy.tools.numpywrappers as npw
 from parmepy.numerics.updateGhosts import UpdateGhosts
-from parmepy.mpi import main_rank, main_comm, main_size, MPI
+from parmepy.mpi import MPI
 from parmepy.tools.timers import timed_function
 
 
@@ -24,9 +24,9 @@ class Reprojection_criterion(Monitoring):
     def __init__(self, vorticity,
                  need_reproj,
                  reproj_cst,
-                 topo, frequency, 
+                 topo, frequency,
                  method={SpaceDiscretisation: FD_C_4},
-                 prefix=None):
+                 prefix=None, task_id=None):
         """
         Constructor.
         @param vorticity field
@@ -35,7 +35,8 @@ class Reprojection_criterion(Monitoring):
         @param frequency : output file producing frequency.
         @param prefix : output file name.
         """
-        Monitoring.__init__(self, [vorticity], topo, frequency)
+        Monitoring.__init__(self, [vorticity], topo, frequency,
+                            task_id=task_id)
         if prefix is None:
             self.prefix = './res/reproj_crit.dat'
         else:
@@ -54,11 +55,11 @@ class Reprojection_criterion(Monitoring):
         # Note FP : for multiresolution case, it would probably be
         # better to use two different operators for energy and enstrophy.
 
-        # Boolean : is the reprojection of vorticity needed for 
+        # Boolean : is the reprojection of vorticity needed for
         # current timestep ?
         self.need_reproj = need_reproj
 
-        # constant defining the reprojection criterion : 
+        # constant defining the reprojection criterion :
         # if the latter is greater than this constant, then a reprojection is needed
         self.reproj_cst = reproj_cst
         self.default_reproj_frequency =  need_reproj.data[1]
@@ -82,7 +83,7 @@ class Reprojection_criterion(Monitoring):
                                          self.vorticity.nbComponents)
 
         # grad function
-        self._function = GradV(self._topo, 
+        self._function = GradV(self._topo,
                                method=self.method)
 
         self.vort_d = self.discreteFields[self.vorticity]
@@ -137,7 +138,7 @@ class Reprojection_criterion(Monitoring):
         # computation of the reprojection criterion and reduction among all proc.
         projCriterion = diagnostics[0] / diagnostics[1]
         projCriterion = \
-            self._topo.topo.allreduce(projCriterion,
+            self._topo.comm.allreduce(projCriterion,
                                       PARMES_MPI_REAL,
                                       op=MPI.MAX)
 
@@ -150,7 +151,7 @@ class Reprojection_criterion(Monitoring):
                 self.proj_counter += 1
 
         # printer
-        if ((main_rank == 0) and (ite % self.freq == 0)):
+        if ((self._topo.comm.Get_rank() == 0) and (ite % self.freq == 0)):
             self.f = open(self.prefix, 'a')
             self.f.write("%s   %s   %s   %s    %s\n" % (t, projCriterion,
                                                         diagnostics[0],
diff --git a/HySoP/hysop/operator/stretching.py b/HySoP/hysop/operator/stretching.py
index 374ac299c..0725416b7 100755
--- a/HySoP/hysop/operator/stretching.py
+++ b/HySoP/hysop/operator/stretching.py
@@ -22,7 +22,8 @@ class Stretching(Operator):
 
     @debug
     def __init__(self, velocity, vorticity, resolutions,
-                 method=None, topo=None, ghosts=None, task_id=None):
+                 method=None, topo=None, ghosts=None,
+                 comm=None, task_id=None):
         """
         Create a Stretching operator from given
         velocity and vorticity variables.
@@ -39,7 +40,7 @@ class Stretching(Operator):
         Autom. computed if not set.
         """
         Operator.__init__(self, [velocity, vorticity], method, topo=topo,
-                          ghosts=ghosts, task_id=task_id)
+                          ghosts=ghosts, task_id=task_id, comm=comm)
         ## velocity variable (vector)
         self.velocity = velocity
         ## vorticity variable (vector)
@@ -85,7 +86,8 @@ class Stretching(Operator):
             for v in self.variables:
                 topo = self.domain.getOrCreateTopology(self.domain.dimension,
                                                        self.resolutions[v],
-                                                       ghosts=self.ghosts)
+                                                       ghosts=self.ghosts,
+                                                       comm=self._comm)
                 self.discreteFields[v] = v.discretize(topo)
 
     @staticmethod
diff --git a/HySoP/hysop/operator/velocity_correction.py b/HySoP/hysop/operator/velocity_correction.py
index 45bbada89..72de985cc 100755
--- a/HySoP/hysop/operator/velocity_correction.py
+++ b/HySoP/hysop/operator/velocity_correction.py
@@ -21,7 +21,7 @@ class VelocityCorrection(Operator):
 
     @debug
     def __init__(self, velocity, vorticity, resolutions,
-                 topo=None, task_id=None, **other_config):
+                 topo=None, task_id=None, comm=None, **other_config):
         """
         Corrects the values of the velocity field after
         solving Poisson equation in order to prescribe proper
@@ -32,7 +32,7 @@ class VelocityCorrection(Operator):
         @param topo : a predefined topology to discretize velocity
         """
         Operator.__init__(self, [velocity, vorticity], topo=topo,
-                          task_id=task_id)
+                          task_id=task_id, comm=comm)
         self.config = other_config
         ## velocity variable (vector)
         self.velocity = velocity
@@ -53,7 +53,8 @@ class VelocityCorrection(Operator):
             # Variables discretization
             for v in self.variables:
                 topo = self.domain.getOrCreateTopology(self.domain.dimension,
-                                                       self.resolutions[v])
+                                                       self.resolutions[v],
+                                                       comm=self._comm)
                 self.discreteFields[v] = v.discretize(topo)
 
     @debug
diff --git a/HySoP/hysop/problem/problem.py b/HySoP/hysop/problem/problem.py
index c6d1fbee1..37227c629 100644
--- a/HySoP/hysop/problem/problem.py
+++ b/HySoP/hysop/problem/problem.py
@@ -177,6 +177,7 @@ class Problem(object):
         At end of time step, call an io step.\n
         Displays timings at simulation end.
         """
+        self.simulation.init()
         if main_rank == 0:
             print "\n\n Start solving ..."
         while not self.simulation.isOver:
@@ -184,8 +185,7 @@ class Problem(object):
 
             for op in self.operators:
                 if __VERBOSE__:
-                    print main_rank, op.__class__.__name__
-
+                    print main_rank, op.name
                 op.apply(self.simulation)
 
             testdump = \
diff --git a/HySoP/hysop/problem/problem_tasks.py b/HySoP/hysop/problem/problem_tasks.py
index ba67f1484..9749aa4e1 100644
--- a/HySoP/hysop/problem/problem_tasks.py
+++ b/HySoP/hysop/problem/problem_tasks.py
@@ -81,12 +81,12 @@ class ProblemTasks(Problem):
                         op.id_to != self.tasks_list[self._main_rank]:
                     self.operators.remove(op)
 
+        # Discretize and setup computational operators
         for op in self.operators:
             if not isinstance(op, Monitoring):
                 if not isinstance(op, Redistribute):
                     if not isinstance(op, RedistributeIntercomm):
                         op.discretize()
-
         for op in self.operators:
             if not isinstance(op, Monitoring):
                 if not isinstance(op, Redistribute):
@@ -105,7 +105,11 @@ class ProblemTasks(Problem):
                     self.input.remove(v)
             for v in op.input:
                 if not v in self.input:
-                    self.input.append(v)
+                    if isinstance(op, RedistributeIntercomm):
+                        if op.id_from == self.tasks_list[self._main_rank]:
+                            self.input.append(v)
+                    else:
+                        self.input.append(v)
                     if not isinstance(op, Monitoring):
                         if not isinstance(op, Redistribute):
                             if not isinstance(op, RedistributeIntercomm):
@@ -113,6 +117,21 @@ class ProblemTasks(Problem):
 
         self._isReady = True
 
+    def addMonitors(self, monitors, position=None):
+        """
+        Set monitors for the problem
+        If position is given, insert monitor[i] at position[i]
+        else, monitior are added at the end of the list.
+        """
+        if position is None:
+            for m in monitors:
+                if m.task_id == self.tasks_list[self._main_rank]:
+                    self.operators.append(m)
+        else:
+            for m, pos in zip(monitors, position):
+                if m.task_id == self.tasks_list[self._main_rank]:
+                    self.operators.insert(pos, m)
+
     @debug
     @timed_function
     def setUp(self):
@@ -133,6 +152,7 @@ class ProblemTasks(Problem):
 
         for op in self.operators:
             if isinstance(op, RedistributeIntercomm):
+                op.discretize()
                 op.setUp()
 
         for op in self.operators:
@@ -153,6 +173,7 @@ class ProblemTasks(Problem):
         At end of time step, call an io step.\n
         Displays timings at simulation end.
         """
+        self.simulation.init()
         self.main_comm.Barrier()
         if self._main_rank == 0:
             print "\n\n Start solving ..."
diff --git a/HySoP/hysop/problem/simulation.py b/HySoP/hysop/problem/simulation.py
index 9d2a3082c..df6b3850f 100644
--- a/HySoP/hysop/problem/simulation.py
+++ b/HySoP/hysop/problem/simulation.py
@@ -40,7 +40,7 @@ class Simulation(object):
         ## Is simulation is terminated
         self.isOver = False
         ## Iteration counter
-        self.currentIteration = 1
+        self.currentIteration = 0
         ## Simulation time step variable
         if (isinstance(timeStep, Variables)):
             ## Set isAdaptative to True
@@ -61,6 +61,9 @@ class Simulation(object):
         self.iterMax = iterMax
         self._lastStep = False
 
+    def init(self):
+        self.currentIteration = 1
+
     def advance(self):
         """
         Proceed to next time.
diff --git a/HySoP/src/client_data.f90 b/HySoP/src/client_data.f90
index ba2280bfb..46b526867 100755
--- a/HySoP/src/client_data.f90
+++ b/HySoP/src/client_data.f90
@@ -1,22 +1,22 @@
 !> Some global parameters and variables
 module client_data
 
-  use MPI, only : MPI_DOUBLE_PRECISION, MPI_REAL,MPI_COMM_WORLD
+  use MPI, only : MPI_DOUBLE_PRECISION
   use, intrinsic :: iso_c_binding ! required for fftw
   implicit none
 
   !> kind for real variables (simple or double precision)
   integer, parameter :: mk = kind(1.0d0) ! double precision
-  !integer, parameter :: mk = kind(1.0) ! simple precision
   !> kind for real variables in mpi routines
   integer, parameter :: mpi_mk = MPI_DOUBLE_PRECISION
-  !integer, parameter :: mpi_mk = MPI_REAL
   !> Problem dimension (model, required for ppm to work properly)
   integer, parameter :: dime = 2
   !> Real dimension
   integer, parameter :: dim3 = 3
   !> Pi constant
   real(mk), parameter :: pi = 4.0*atan(1.0_mk)
+  !> MPI main communicator
+  integer :: main_comm
   !> Rank of the mpi current process
   integer :: rank ! current mpi-processus rank
   !> Total number of mpi process
diff --git a/HySoP/src/fftw/Diffusion.f90 b/HySoP/src/fftw/Diffusion.f90
index 3e5802df2..fe5cc9316 100755
--- a/HySoP/src/fftw/Diffusion.f90
+++ b/HySoP/src/fftw/Diffusion.f90
@@ -1,11 +1,11 @@
-!> Diffusion solvers based on fftw . 
+!> Diffusion solvers based on fftw .
 !! These routines are just used for tests, since their equivalent is implemented in python interface,
 !! with direct call to functions from fft2d and fft3d modules.
 module diffusion
 
   use fft2d
   use fft3d
-  
+
   use client_data
 
   implicit none
@@ -13,19 +13,19 @@ module diffusion
 contains
 
   subroutine solveDiffusion2D(nudt,velocity_x,velocity_y,omega)
-    
+
     real(mk), intent(in) :: nudt
     real(mk),dimension(:,:),intent(in) :: velocity_x,velocity_y
     real(mk),dimension(:,:),intent(inout) :: omega
 
     !! Compute fftw forward transform
-    !! Omega is used to initialize the fftw buffer for input field. 
+    !! Omega is used to initialize the fftw buffer for input field.
     call r2c_diffusion_2d(velocity_x,velocity_y)
-    
+
     call filter_diffusion_2d(nudt)
 
     call c2r_diffusion_2d(omega)
-  
+
   end subroutine solveDiffusion2D
 
   subroutine solveDiffusion3D(nudt,velocity_x,velocity_y,velocity_z,omega_x,omega_y,omega_z, ghosts_v, ghosts_w)
@@ -35,29 +35,35 @@ contains
     integer, dimension(3), intent(in) :: ghosts_w, ghosts_v
 
     !! Compute fftw forward transform
-    !! Omega is used to initialize the fftw buffer for input field. 
+    !! Omega is used to initialize the fftw buffer for input field.
 
     call r2c_3d(velocity_x,velocity_y,velocity_z, ghosts_v)
-    
+
     call filter_curl_diffusion_3d(nudt)
 
     call c2r_3d(omega_x,omega_y,omega_z, ghosts_w)
-  
+
   end subroutine solveDiffusion3D
 
 
-  subroutine initDiffusionSolverC(pbdim,resolution,lengths)
+  subroutine initDiffusionSolverC(pbdim,resolution,lengths,parent_comm)
 
     integer, intent(in) :: pbdim
     integer, dimension(pbdim),intent(in) :: resolution
     real(mk),dimension(pbdim), intent(in) :: lengths
+    integer, intent(in)                 :: parent_comm
+
+    integer :: ierr
+
+    ! Duplicate parent_comm
+    call mpi_comm_dup(parent_comm, main_comm, ierr)
 
     if(pbdim == 2) then
        call init_c2c_diffusion_2d(resolution,lengths)
     elseif(pbdim == 3) then
        call init_c2c_3d(resolution,lengths)
     endif
-    
+
   end subroutine initDiffusionSolverC
 
 !!$  subroutine solveDiffusion2DC(nudt,velocity_x,velocity_y,omega)
@@ -66,9 +72,9 @@ contains
 !!$    complex(mk),dimension(:,:) :: omega,velocity_x,velocity_y
 !!$
 !!$    !! Compute fftw forward transform
-!!$    !! Omega is used to initialize the fftw buffer for input field. 
+!!$    !! Omega is used to initialize the fftw buffer for input field.
 !!$    call c2c_2d(omega,velocity_x,velocity_y)
-!!$  
+!!$
 !!$  end subroutine solveDiffusion2DC
 !!$
 !!$  subroutine solveDiffusion3DC(nudt, omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z)
@@ -77,22 +83,22 @@ contains
 !!$    complex(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z
 !!$
 !!$    !! Compute fftw forward transform
-!!$    !! Omega is used to initialize the fftw buffer for input field. 
+!!$    !! Omega is used to initialize the fftw buffer for input field.
 !!$    call c2c_3d(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z)
-!!$      
+!!$
 !!$  end subroutine solveDiffusion3DC
 
   subroutine cleanDiffusionSolver3D()
     integer ::info
-    call MPI_BARRIER(MPI_COMM_WORLD,info)
+    call MPI_BARRIER(main_comm,info)
     call cleanFFTW_3d()
-    
+
   end subroutine cleanDiffusionSolver3D
   subroutine cleanDiffusionSolver2D()
     integer ::info
-    call MPI_BARRIER(MPI_COMM_WORLD,info)
+    call MPI_BARRIER(main_comm,info)
     call cleanFFTW_2d()
-    
+
   end subroutine cleanDiffusionSolver2D
 
 
diff --git a/HySoP/src/fftw/Poisson.f90 b/HySoP/src/fftw/Poisson.f90
index f9900cf80..78b355c79 100755
--- a/HySoP/src/fftw/Poisson.f90
+++ b/HySoP/src/fftw/Poisson.f90
@@ -1,4 +1,4 @@
-!> Poisson solvers based on fftw . 
+!> Poisson solvers based on fftw .
 !
 !! These routines are just used for tests, since their equivalent is implemented in python interface,
 !! with direct call to functions from fft2d and fft3d modules.
@@ -13,18 +13,24 @@ module poisson
 
 contains
 
-  subroutine initPoissonSolver(pbdim,resolution,lengths)
+  subroutine initPoissonSolver(pbdim,resolution,lengths,parent_comm)
 
     integer, intent(in) :: pbdim
     integer, dimension(pbdim),intent(in) :: resolution
     real(mk),dimension(pbdim), intent(in) :: lengths
-    
+    integer, intent(in)                 :: parent_comm
+
+    integer :: ierr
+
+    ! Duplicate parent_comm
+    call mpi_comm_dup(parent_comm, main_comm, ierr)
+
     if(pbdim == 2) then
        call init_r2c_2d(resolution,lengths)
     else if(pbdim == 3) then
        call init_r2c_3d(resolution,lengths)
     end if
-    
+
   end subroutine initPoissonSolver
 
   subroutine solvePoisson2D(omega,velocity_x,velocity_y)
@@ -33,13 +39,13 @@ contains
     real(mk),dimension(:,:),intent(inout) :: velocity_x,velocity_y
 
     !! Compute fftw forward transform
-    !! Omega is used to initialize the fftw buffer for input field. 
+    !! Omega is used to initialize the fftw buffer for input field.
     call r2c_2d(omega)
-    
+
     call filter_poisson_2d()
 
     call c2r_2d(velocity_x,velocity_y)
-  
+
   end subroutine solvePoisson2D
 
   subroutine solvePoisson3D(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z, ghosts_w, ghosts_v)
@@ -49,7 +55,7 @@ contains
     integer, dimension(3), intent(in) :: ghosts_w, ghosts_v
     real(mk) :: start
     !! Compute fftw forward transform
-    !! Omega is used to initialize the fftw buffer for input field. 
+    !! Omega is used to initialize the fftw buffer for input field.
 
     start = MPI_WTIME()
     call r2c_3d(omega_x,omega_y,omega_z, ghosts_w)
@@ -70,9 +76,9 @@ contains
     real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z
     real(mk) :: start
     !! Compute fftw forward transform
-    !! Omega is used to initialize the fftw buffer for input field. 
+    !! Omega is used to initialize the fftw buffer for input field.
 
-    print *, "--------------------------- Start solve 3d many case ..." 
+    print *, "--------------------------- Start solve 3d many case ..."
 
     start = MPI_WTIME()
     call r2c_3d_many(omega_x,omega_y,omega_z)
@@ -81,20 +87,26 @@ contains
     start = MPI_WTIME()
     call filter_poisson_3d_many()
     print *, "time for filtermany : ", MPI_WTIME() - start
-    
+
     start = MPI_WTIME()
     call c2r_3d_many(velocity_x,velocity_y,velocity_z)
     print *, "time for c2rmany : ", MPI_WTIME() - start
-  
+
   end subroutine solvePoisson3D_many
 
 
-  subroutine initPoissonSolverC(pbdim,resolution,lengths)
+  subroutine initPoissonSolverC(pbdim,resolution,lengths,parent_comm)
 
     integer, intent(in) :: pbdim
     integer, dimension(pbdim),intent(in) :: resolution
     real(mk),dimension(pbdim), intent(in) :: lengths
-    
+    integer, intent(in)                 :: parent_comm
+
+    integer :: ierr
+
+    ! Duplicate parent_comm
+    call mpi_comm_dup(parent_comm, main_comm, ierr)
+
     if(pbdim == 2) then
        call init_c2c_2d(resolution,lengths)
     elseif(pbdim == 3) then
@@ -108,9 +120,9 @@ contains
     complex(mk),dimension(:,:) :: omega,velocity_x,velocity_y
 
     !! Compute fftw forward transform
-    !! Omega is used to initialize the fftw buffer for input field. 
+    !! Omega is used to initialize the fftw buffer for input field.
     call c2c_2d(omega,velocity_x,velocity_y)
-  
+
   end subroutine solvePoisson2DC
 
   subroutine solvePoisson3DC(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z)
@@ -119,22 +131,22 @@ contains
     complex(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z
 
     !! Compute fftw forward transform
-    !! Omega is used to initialize the fftw buffer for input field. 
+    !! Omega is used to initialize the fftw buffer for input field.
     call c2c_3d(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z)
-      
+
   end subroutine solvePoisson3DC
 
   subroutine cleanPoissonSolver3D()
     integer ::info
-    call MPI_BARRIER(MPI_COMM_WORLD,info)
+    call MPI_BARRIER(main_comm,info)
     call cleanFFTW_3d()
-    
+
   end subroutine cleanPoissonSolver3D
   subroutine cleanPoissonSolver2D()
     integer ::info
-    call MPI_BARRIER(MPI_COMM_WORLD,info)
+    call MPI_BARRIER(main_comm,info)
     call cleanFFTW_2d()
-    
+
   end subroutine cleanPoissonSolver2D
 
 
diff --git a/HySoP/src/fftw/fft2d.f90 b/HySoP/src/fftw/fft2d.f90
index d4a5bf26e..c56412ff3 100755
--- a/HySoP/src/fftw/fft2d.f90
+++ b/HySoP/src/fftw/fft2d.f90
@@ -90,7 +90,7 @@ contains
     fft_resolution = resolution-1
 
     ! compute "optimal" size (according to fftw) for local date (warning : dimension reversal)
-    alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y),fft_resolution(c_X),MPI_COMM_WORLD,&
+    alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y),fft_resolution(c_X),main_comm,&
          local_resolution(c_Y),local_offset(c_Y),local_resolution(c_X),local_offset(c_X));
 
     ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!)
@@ -107,11 +107,11 @@ contains
 
     !   create MPI plan for in-place forward/backward DFT (note dimension reversal)
     plan_forward1 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,&
-         MPI_COMM_WORLD,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+         main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
     plan_backward1 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,&
-         MPI_COMM_WORLD,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
     plan_backward2 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,&
-         MPI_COMM_WORLD,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
 
     call computeKxC(lengths(c_X))
     call computeKy(lengths(c_Y))
@@ -191,7 +191,7 @@ contains
     fft_resolution(:) = resolution(:)-1
     halfLength = fft_resolution(c_X)/2+1
     ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!)
-    alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y),halfLength,MPI_COMM_WORLD,local_resolution(c_Y),&
+    alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y),halfLength,main_comm,local_resolution(c_Y),&
          local_offset(c_Y),local_resolution(c_X),local_offset(c_X));
 
     ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!)
@@ -210,11 +210,11 @@ contains
 
     !   create MPI plans for in-place forward/backward DFT (note dimension reversal)
     plan_forward1 = fftw_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
     plan_backward1 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
     plan_backward2 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
 
     call computeKx(lengths(c_X))
     call computeKy(lengths(c_Y))
@@ -515,7 +515,7 @@ contains
     n(2) = halfLength
     ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!)
     alloc_local = fftw_mpi_local_size_many_transposed(2,n,howmany,FFTW_MPI_DEFAULT_BLOCK,&
-         FFTW_MPI_DEFAULT_BLOCK,MPI_COMM_WORLD,local_resolution(c_Y),&
+         FFTW_MPI_DEFAULT_BLOCK,main_comm,local_resolution(c_Y),&
          local_offset(c_Y),local_resolution(c_X),local_offset(c_X));
 
     ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!)
@@ -534,11 +534,11 @@ contains
 
     !   create MPI plans for in-place forward/backward DFT (note dimension reversal)
     plan_forward1 = fftw_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1Many, dataout1, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
     plan_backward1 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
     plan_backward2 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
 
     call computeKx(lengths(c_X))
     call computeKy(lengths(c_Y))
diff --git a/HySoP/src/fftw/fft3d.f90 b/HySoP/src/fftw/fft3d.f90
index ea581db0e..1b064574e 100755
--- a/HySoP/src/fftw/fft3d.f90
+++ b/HySoP/src/fftw/fft3d.f90
@@ -98,7 +98,7 @@ contains
     fft_resolution(:) = resolution(:)-1
 
     ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal)
-    alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),MPI_COMM_WORLD,&
+    alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),main_comm,&
          local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y));
 
     ! Set a default value for c_X components.
@@ -125,17 +125,17 @@ contains
 
     !   create MPI plan for in-place forward/backward DFT (note dimension reversal)
     plan_forward1 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,&
-         MPI_COMM_WORLD,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+         main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
     plan_backward1 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,&
-         MPI_COMM_WORLD,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
     plan_forward2 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain2,dataout2,&
-         MPI_COMM_WORLD,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+         main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
     plan_backward2 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,&
-         MPI_COMM_WORLD,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
     plan_forward3 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain3,dataout3,&
-         MPI_COMM_WORLD,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+         main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
     plan_backward3 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout3,datain3,&
-         MPI_COMM_WORLD,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
 
     call computeKx(lengths(c_X))
     call computeKy(lengths(c_Y))
@@ -222,7 +222,7 @@ contains
 
     ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal)
     alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,&
-         MPI_COMM_WORLD,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y));
+         main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y));
 
     ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases.
     local_offset(c_X) = 0
@@ -247,17 +247,17 @@ contains
 
     !   create MPI plans for in-place forward/backward DFT (note dimension reversal)
    plan_forward1 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
     plan_backward1 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
     plan_forward2 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain2, dataout2, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
     plan_backward2 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
     plan_forward3 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain3, dataout3, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
     plan_backward3 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout3, rdatain3, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
 
     call computeKx(lengths(c_X))
     call computeKy(lengths(c_Y))
@@ -424,7 +424,7 @@ contains
     howmany = 3
     ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal)
     alloc_local = fftw_mpi_local_size_many_transposed(3,n,howmany,blocksize,blocksize,&
-         MPI_COMM_WORLD,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y));
+         main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y));
 
     ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases.
     local_offset(c_X) = 0
@@ -441,9 +441,9 @@ contains
     n(3) = fft_resolution(c_X)
 
     plan_forward1 = fftw_mpi_plan_many_dft_r2c(3,n,howmany,blocksize,blocksize, rdatain_many, dataout_many, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
     plan_backward1 = fftw_mpi_plan_many_dft_c2r(3,n,howmany,blocksize,blocksize, dataout_many, rdatain_many, &
-         MPI_COMM_WORLD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+         main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
     call computeKx(lengths(c_X))
     call computeKy(lengths(c_Y))
     call computeKz(lengths(c_Z))
diff --git a/HySoP/src/main/main.f90 b/HySoP/src/main/main.f90
index 4102d726a..38cc6a56a 100755
--- a/HySoP/src/main/main.f90
+++ b/HySoP/src/main/main.f90
@@ -44,27 +44,27 @@ contains
     logical :: ok
     !    call MPI_BARRIER(MPI_COMM_WORLD)
     if (rank==0) print *, " ======= Test 2D Poisson solver (r2c) for resolution ======= ", resolution
-    
+
     lengths(:)=2*pi!!1.0
     step(:)=lengths(:)/(resolution(:)-1)
 
     !call init_r2c_2dBIS(resolution,lengths)
-    
-    call initPoissonSolver(2,resolution,lengths)
-    
+
+    call initPoissonSolver(2,resolution,lengths, MPI_COMM_WORLD)
+
     call getParamatersTopologyFFTW2d(nfft,offset)
     allocate(omega(nfft(c_X),nfft(c_Y)),velocity_x(nfft(c_X),nfft(c_Y)),velocity_y(nfft(c_X),nfft(c_Y)))
     allocate(refx(nfft(c_X),nfft(c_Y)),refy(nfft(c_X),nfft(c_Y)))
     call computeOmega2D(omega,step,refx,refy,offset,lengths)
-    
+
     call solvePoisson2D(omega,velocity_x,velocity_y)
     call cleanPoissonSolver2D()
-    
+
     ok = iscloseto_2d(refx,velocity_x,tolerance,step,error)
     if(rank==0) write(*,'(a,L2,3f10.4)') 'Solver convergence (x): ', ok, error
     ok = iscloseto_2d(refy,velocity_y,tolerance,step,error)
     if(rank==0)  write(*,'(a,L2,3f10.4)') 'Solver convergence (y): ', ok, error
-    
+
     if (rank==0) print *, " ======= End of Test 2D Poisson ======= "
 
   end subroutine test2D_r2c
@@ -79,21 +79,21 @@ contains
     logical :: ok
 
     if (rank==0) print *, " ======= Test 2D Poisson solver (c2c) for resolution ======= ", resolution
-    
+
     lengths(:)=2*pi!!1.0
     step(:)=lengths(:)/(resolution(:)-1)
-    
+
 !    call MPI_BARRIER(MPI_COMM_WORLD)
-    
-    call initPoissonSolverC(2,resolution,lengths)
+
+    call initPoissonSolverC(2,resolution,lengths,MPI_COMM_WORLD)
 
     call getParamatersTopologyFFTW2d(nfft,offset)
 
     allocate(omega(nfft(c_X),nfft(c_Y)),velocity_x(nfft(c_X),nfft(c_Y)),velocity_y(nfft(c_X),nfft(c_Y)))
     allocate(refx(nfft(c_X),nfft(c_Y)),refy(nfft(c_X),nfft(c_Y)))
-    
+
     call computeOmega2DC(omega,step,refx,refy,offset,lengths)
-    
+
     call solvePoisson2DC(omega,velocity_x,velocity_y)
     call cleanPoissonSolver2D()
 
@@ -118,17 +118,17 @@ contains
     integer(C_INTPTR_T),dimension(3) :: nfft,offset
     real(mk) :: error,start
     logical :: ok
-    
+
     if (rank==0) print *, " ======= Test 3D Poisson (r2c) solver for resolution  ", resolution
-    
+
     lengths(:)=2*pi!!1.0
     step(:)=lengths(:)/(resolution(:)-1)
     ghosts_v(:) = 0
     ghosts_w(:) = 0
     !    call MPI_BARRIER(MPI_COMM_WORLD)
-    
-    call initPoissonSolver(3,resolution,lengths)
-    
+
+    call initPoissonSolver(3,resolution,lengths,MPI_COMM_WORLD)
+
     call getParamatersTopologyFFTW3d(nfft,offset)
 
     allocate(omega_x(nfft(c_X),nfft(c_Y),nfft(c_Z)),omega_y(nfft(c_X),nfft(c_Y),nfft(c_Z)),omega_z(nfft(c_X),nfft(c_Y),nfft(c_Z)))
@@ -137,12 +137,12 @@ contains
     allocate(refx(nfft(c_X),nfft(c_Y),nfft(c_Z)),refy(nfft(c_X),nfft(c_Y),nfft(c_Z)),refz(nfft(c_X),nfft(c_Y),nfft(c_Z)))
 
     call computeOmega3D(omega_x,omega_y,omega_z,step,refx,refy,refz,offset,lengths)
-    
+
     start = MPI_Wtime()
     call solvePoisson3D(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z, ghosts_w, ghosts_v)
     print *, "resolution time : ", MPI_Wtime()-start
     call cleanPoissonSolver3D()
-    
+
     ok = iscloseto_3d(refx,velocity_x,tolerance,step,error)
     if(rank==0) write(*,'(a,L2,3f10.4)') 'Solver convergence (x): ',ok, error
     ok = iscloseto_3d(refy,velocity_y,tolerance,step,error)
@@ -165,16 +165,16 @@ contains
     integer(C_INTPTR_T),dimension(3) :: nfft,offset
     real(mk) :: error
     logical :: ok
-    
+
     if (rank==0) print *, " ======= Test 3D Poisson (r2c many) solver for resolution ======= ", resolution
-    
+
     lengths(:)=2*pi!!1.0
     step(:)=lengths(:)/(resolution(:)-1)
-    
+
     !    call MPI_BARRIER(MPI_COMM_WORLD)
-    
+
     call init_r2c_3d_many(resolution,lengths)
-    
+
     call getParamatersTopologyFFTW3d(nfft,offset)
 
     allocate(omega_x(nfft(c_X),nfft(c_Y),nfft(c_Z)),omega_y(nfft(c_X),nfft(c_Y),nfft(c_Z)),omega_z(nfft(c_X),nfft(c_Y),nfft(c_Z)))
@@ -186,7 +186,7 @@ contains
 
     call solvePoisson3D_many(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z)
     call cleanFFTW_3d()
-    
+
     ok = iscloseto_3d(refx,velocity_x,tolerance,step,error)
     if(rank==0) write(*,'(a,L2,3f10.4)') 'Solver convergence (x): ',ok, error
     ok = iscloseto_3d(refy,velocity_y,tolerance,step,error)
@@ -199,7 +199,7 @@ contains
     if (rank==0) print *, " ======= End of Test 3D Poisson ======= "
 
   end subroutine test3D_r2c_many
-  
+
   subroutine test3D_c2c()
 
     integer, dimension(3),parameter :: resolution =(/65,65,65/)
@@ -210,23 +210,23 @@ contains
     logical :: ok
 
     if (rank==0) print *, " ======= Test 3D Poisson (c2c) solver for resolution ======= ", resolution
-    
+
     lengths(:)=2*pi!!1.0
     step(:)=lengths(:)/(resolution(:)-1)
-    
+
 !    call MPI_BARRIER(MPI_COMM_WORLD)
-    
-    call initPoissonSolverC(3,resolution,lengths)
-    
+
+    call initPoissonSolverC(3,resolution,lengths,MPI_COMM_WORLD)
+
     call getParamatersTopologyFFTW3d(nfft,offset)
 
     allocate(omega_x(nfft(c_X),nfft(c_Y),nfft(c_Z)),omega_y(nfft(c_X),nfft(c_Y),nfft(c_Z)),omega_z(nfft(c_X),nfft(c_Y),nfft(c_Z)))
     allocate(velocity_x(nfft(c_X),nfft(c_Y),nfft(c_Z)),velocity_y(nfft(c_X),nfft(c_Y),nfft(c_Z)),&
          velocity_z(nfft(c_X),nfft(c_Y),nfft(c_Z)))
     allocate(refx(nfft(c_X),nfft(c_Y),nfft(c_Z)),refy(nfft(c_X),nfft(c_Y),nfft(c_Z)),refz(nfft(c_X),nfft(c_Y),nfft(c_Z)))
-    
+
     call computeOmega3DC(omega_x,omega_y,omega_z,step,refx,refy,refz,offset,lengths)
-    
+
     call solvePoisson3DC(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z)
     call cleanPoissonSolver3D()
 
@@ -236,14 +236,14 @@ contains
     if(rank==0)  write(*,'(a,L2,3f10.4)') 'Solver convergence (y): ', ok,error
     ok = iscloseto_3dc(refz,velocity_z,tolerance,step,error)
     if(rank==0)  write(*,'(a,L2,3f10.4)') 'Solver convergence (z): ', ok,error
-    
+
     deallocate(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z,refx,refy,refz)
 
     if (rank==0) print *, " ======= End of Test 3D Poisson ======= "
 
-    
+
   end subroutine test3D_c2c
 
-  
+
 
 end program mainParmes
diff --git a/HySoP/src/scalesInterface/layout/cart_topology.f90 b/HySoP/src/scalesInterface/layout/cart_topology.f90
index 252efad61..803710c40 100644
--- a/HySoP/src/scalesInterface/layout/cart_topology.f90
+++ b/HySoP/src/scalesInterface/layout/cart_topology.f90
@@ -68,6 +68,8 @@ module cart_topology
 
     ! ----- Communicators -----
     !> Communicator associated with the cartesian topology
+    integer, protected                  :: main_comm
+    !> Communicator associated with the cartesian topology
     integer, protected                  :: cart_comm
     !> Communicators devoted to 1-dimensionnal subgrids (along Y and Z)
     integer, protected                  :: X_comm, Y_comm, Z_comm
@@ -159,6 +161,7 @@ contains
 !> Creation of the cartesian mpi topology and (if needed) of all communicators
 !! used for particles method.
 !!    @param[in]    dims        = array specifying the number of processes in each dimension
+!!    @param[in]    spec_comm   = main communicator
 !!    @param[out]   ierr        = error code
 !!    @param[out]   spec_comm   = mpi communicator used by the spectral part of the code (optional).
 !!    @param[in]    topology    = to choose the dimension of the mpi topology (if 0 then none) (optional).
@@ -167,7 +170,7 @@ contains
 !!    that will be used for all the spectral part of the code (ie everything except
 !!    the particles part). If needed, it also initialzed all the mpi context
 !!    used by the particles solver.
-subroutine cart_create(dims, ierr, spec_comm, topology)
+subroutine cart_create(dims, ierr, parent_comm, spec_comm, topology)
 
     use mpi
     implicit none
@@ -175,6 +178,7 @@ subroutine cart_create(dims, ierr, spec_comm, topology)
     ! Input/Output
     integer, dimension(:), intent(in)   :: dims
     integer, intent(out)                :: ierr
+    integer, intent(in)                 :: parent_comm
     integer, optional, intent(out)      :: spec_comm
     integer, optional, intent(in)       :: topology
     ! Other local variables
@@ -187,6 +191,9 @@ subroutine cart_create(dims, ierr, spec_comm, topology)
     integer, dimension(1)   :: nb_proc                          ! total number of processus
     logical, dimension(1)   :: period_1D = .false.              ! periodicity in case of 1D mpi topology.
 
+    ! Duplicate parent_comm
+    call mpi_comm_dup(parent_comm, main_comm, ierr)
+
     ! If there is some scalar to advec with particles method, then initialized
     ! the 2D mpi topology
     if (present(topology))  then
@@ -214,11 +221,11 @@ subroutine cart_create(dims, ierr, spec_comm, topology)
         else if (size(dims)==3) then
             nb_proc_dim = dims
             if (nb_proc_dim(1)/=1) then
-                call mpi_comm_rank(MPI_COMM_WORLD, cart_rank, ierr)
+                call mpi_comm_rank(main_comm, cart_rank, ierr)
                 if (cart_rank==0) write(*,'(a)') ' XXXXXXXXXX Warning: subdision along X XXXXXXXXXX'
             end if
         else
-            call mpi_comm_rank(MPI_COMM_WORLD, cart_rank, ierr)
+            call mpi_comm_rank(main_comm, cart_rank, ierr)
             if (cart_rank==0) then
                 write(*,'(a)') ' XXXXXXXXXX Error - wrong nb of processus XXXXXXXXXX'
                 write(*,'(a,10(x,i0))') ' input argument dims =', dims
@@ -226,7 +233,7 @@ subroutine cart_create(dims, ierr, spec_comm, topology)
             stop
         end if
 
-        call mpi_cart_create(MPI_COMM_WORLD, 3, nb_proc_dim, periods, reorganisation, &
+        call mpi_cart_create(main_comm, 3, nb_proc_dim, periods, reorganisation, &
                 & cart_comm, ierr)
 
         ! --- Create 1D communicator ---
@@ -285,7 +292,7 @@ subroutine cart_create(dims, ierr, spec_comm, topology)
     case (1)
             ! Construct 1D non-periodic mpi topology
             nb_proc = product(nb_proc_dim)
-            call mpi_cart_create(MPI_COMM_WORLD, 1, nb_proc, period_1D, reorganisation, &
+            call mpi_cart_create(main_comm, 1, nb_proc, period_1D, reorganisation, &
                 & cart_comm, ierr)
             ! Use it as spectral communicator.
             spec_comm = cart_comm
@@ -293,9 +300,9 @@ subroutine cart_create(dims, ierr, spec_comm, topology)
     case default
         ! ===== Do not use mpi topology =====
         if (present(spec_comm)) then
-            spec_comm = MPI_COMM_WORLD
+            spec_comm = main_comm
         end if
-        call mpi_comm_rank(MPI_COMM_WORLD,cart_rank,ierr)
+        call mpi_comm_rank(main_comm,cart_rank,ierr)
     end select
 
 
diff --git a/HySoP/src/scalesInterface/particles/advecX.f90 b/HySoP/src/scalesInterface/particles/advecX.f90
index 9cb350e43..55093d016 100644
--- a/HySoP/src/scalesInterface/particles/advecX.f90
+++ b/HySoP/src/scalesInterface/particles/advecX.f90
@@ -674,11 +674,11 @@ subroutine advecX_limitator_group(gp_s, ind_group, j, k, p_pos, &
     ! ===== Exchange ghost =====
     ! Receive ghost value, ie value from neighbors boundaries.
     tag_table = compute_tag(ind_group, tag_part_slope, direction)
-    call mpi_Irecv(Rbuffer(1,1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_Irecv(Rbuffer(1,1,1), com_size, MPI_REAL_WP, &
             & neighbors(direction,1), tag_table(1), D_comm(direction), rece_request, ierr)
     ! Send ghost for the two first scalar values of each line
     Sbuffer = scalar(1:2,j:j+gp_s(1)-1,k:k+gp_s(2)-1)
-    call mpi_ISsend(Sbuffer(1,1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_ISsend(Sbuffer(1,1,1), com_size, MPI_REAL_WP, &
             & neighbors(direction,-1), tag_table(1), D_comm(direction), send_request, ierr)
 
     ! ===== Compute scalar variation =====
diff --git a/HySoP/src/scalesInterface/particles/advecY.f90 b/HySoP/src/scalesInterface/particles/advecY.f90
index c7c72bbcd..e0330f2e2 100644
--- a/HySoP/src/scalesInterface/particles/advecY.f90
+++ b/HySoP/src/scalesInterface/particles/advecY.f90
@@ -440,7 +440,7 @@ subroutine advecY_limitator_group(gp_s, ind_group, i, k, p_pos, &
     ! ===== Exchange ghost =====
     ! Receive ghost value, ie value from neighbors boundaries.
     tag_table = compute_tag(ind_group, tag_part_slope, direction)
-    call mpi_Irecv(Rbuffer(1,1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_Irecv(Rbuffer(1,1,1), com_size, MPI_REAL_WP, &
             & neighbors(direction,1), tag_table(1), D_comm(direction), rece_request, ierr)
     ! Send ghost for the two first scalar values of each line
     do i1 = 1, gp_s(1)
@@ -449,7 +449,7 @@ subroutine advecY_limitator_group(gp_s, ind_group, i, k, p_pos, &
             Sbuffer(i1,i2,2) = scalar(i+i1-1,2,k+i2-1)
         end do
     end do
-    call mpi_ISsend(Sbuffer(1,1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_ISsend(Sbuffer(1,1,1), com_size, MPI_REAL_WP, &
             & neighbors(direction,-1), tag_table(1), D_comm(direction), send_request, ierr)
 
     ! ===== Compute scalar variation =====
diff --git a/HySoP/src/scalesInterface/particles/advecZ.f90 b/HySoP/src/scalesInterface/particles/advecZ.f90
index e5c5d6f17..f0fad55be 100644
--- a/HySoP/src/scalesInterface/particles/advecZ.f90
+++ b/HySoP/src/scalesInterface/particles/advecZ.f90
@@ -440,11 +440,11 @@ subroutine advecZ_limitator_group(gp_s, ind_group, i, j, p_pos, &
     ! ===== Exchange ghost =====
     ! Receive ghost value, ie value from neighbors boundaries.
     tag_table = compute_tag(ind_group, tag_part_slope, direction)
-    call mpi_Irecv(Rbuffer(1,1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_Irecv(Rbuffer(1,1,1), com_size, MPI_REAL_WP, &
             & neighbors(direction,1), tag_table(1), D_comm(direction), rece_request, ierr)
     ! Send ghost for the two first scalar values of each line
     Sbuffer = scalar(i:i+gp_s(1)-1,j:j+gp_s(2)-1,1:2)
-    call mpi_ISsend(Sbuffer(1,1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_ISsend(Sbuffer(1,1,1), com_size, MPI_REAL_WP, &
             & neighbors(direction,-1), tag_table(1), D_comm(direction), send_request, ierr)
 
     ! ===== Compute scalar variation =====
diff --git a/HySoP/src/scalesInterface/particles/advec_common_interpol.F90 b/HySoP/src/scalesInterface/particles/advec_common_interpol.F90
index defedc610..915671f9e 100644
--- a/HySoP/src/scalesInterface/particles/advec_common_interpol.F90
+++ b/HySoP/src/scalesInterface/particles/advec_common_interpol.F90
@@ -228,7 +228,7 @@ subroutine AC_interpol_lin(direction, gs, ind_group, V_comp, p_inter)
             ! IIa - Compute reception tag
             tag = compute_tag(ind_group, tag_velo_V, direction, -proc_gap)
             ! IIb - Receive message
-            call mpi_Irecv(V_buffer(pos_in_buffer(proc_gap)), cartography(1,proc_gap), MPI_DOUBLE_PRECISION, &
+            call mpi_Irecv(V_buffer(pos_in_buffer(proc_gap)), cartography(1,proc_gap), MPI_REAL_WP, &
                     & neighbors(direction,proc_gap), tag, D_comm(direction), rece_request(proc_gap), ierr)
         end if
     end do
@@ -283,11 +283,11 @@ subroutine AC_interpol_lin(direction, gs, ind_group, V_comp, p_inter)
             tag = compute_tag(ind_group, tag_velo_V, direction, proc_gap)
             ! IIc - Send message
 #ifdef BLOCKING_SEND
-            call mpi_Send(send_buffer(1), com_size, MPI_DOUBLE_PRECISION,  &
+            call mpi_Send(send_buffer(1), com_size, MPI_REAL_WP,  &
                     & neighbors(direction,proc_gap), tag, D_comm(direction),&
                     & ierr)
 #else
-           call mpi_ISend(send_buffer(1), com_size, MPI_DOUBLE_PRECISION,  &
+           call mpi_ISend(send_buffer(1), com_size, MPI_REAL_WP,  &
                    & neighbors(direction,proc_gap), tag, D_comm(direction),&
                    & s_request(proc_gap), ierr)
 #endif
@@ -1036,7 +1036,7 @@ subroutine AC_interpol_plus(direction, gs, ind_group, id1, id2, V_coarse, p_V)
             ! IIa - Compute reception tag
             tag = compute_tag(ind_group, tag_velo_V, direction, -proc_gap)
             ! IIb - Receive message
-            call mpi_Irecv(V_buffer(pos_in_buffer(proc_gap)), cartography(1,proc_gap), MPI_DOUBLE_PRECISION, &
+            call mpi_Irecv(V_buffer(pos_in_buffer(proc_gap)), cartography(1,proc_gap), MPI_REAL_WP, &
                     & neighbors(direction,proc_gap), tag, D_comm(direction), rece_request(proc_gap), ierr)
         end if
     end do
@@ -1082,7 +1082,7 @@ subroutine AC_interpol_plus(direction, gs, ind_group, id1, id2, V_coarse, p_V)
             ! IIb - Compute send tag
             tag = compute_tag(ind_group, tag_velo_V, direction, proc_gap)
             ! IIc - Send message
-            call mpi_Send(send_buffer(1), com_size, MPI_DOUBLE_PRECISION,  &
+            call mpi_Send(send_buffer(1), com_size, MPI_REAL_WP,  &
                     & neighbors(direction,proc_gap), tag, D_comm(direction),&
                     & ierr)
                     !& ierr)
diff --git a/HySoP/src/scalesInterface/particles/advec_common_remesh.F90 b/HySoP/src/scalesInterface/particles/advec_common_remesh.F90
index 83316eec6..2fafde394 100644
--- a/HySoP/src/scalesInterface/particles/advec_common_remesh.F90
+++ b/HySoP/src/scalesInterface/particles/advec_common_remesh.F90
@@ -1480,7 +1480,7 @@ subroutine AC_remesh_finalize(direction, ind_group, gs, j, k, scal, send_gap_abs
         if (neighbors(direction,proc_gap)/= D_rank(direction)) then
             tag = compute_tag(ind_group, tag_bufToScal_buffer, direction, -proc_gap)
             call mpi_Irecv(rece_buffer(rece_pos(proc_gap)), rece_carto(1,ind_gap),  &
-                & MPI_DOUBLE_PRECISION, neighbors(direction,proc_gap), tag,         &
+                & MPI_REAL_WP, neighbors(direction,proc_gap), tag,         &
                 & D_COMM(direction), r_request_sca(ind_gap), ierr)
         end if
     end do
@@ -1498,10 +1498,10 @@ subroutine AC_remesh_finalize(direction, ind_group, gs, j, k, scal, send_gap_abs
             ! Send buffer
             tag = compute_tag(ind_group, tag_bufToScal_buffer, direction, ind_gap-1+send_gap_abs(1))
 #ifdef BLOCKING_SEND
-            call mpi_Send(send_buffer(pos_in_buffer(ind_gap-1)), cartography(1,ind_gap), MPI_DOUBLE_PRECISION, &
+            call mpi_Send(send_buffer(pos_in_buffer(ind_gap-1)), cartography(1,ind_gap), MPI_REAL_WP, &
                 & neighbors(direction,proc_gap), tag, D_comm(direction), r_status, ierr)
 #else
-            call mpi_ISsend(send_buffer(pos_in_buffer(ind_gap-1)), cartography(1,ind_gap), MPI_DOUBLE_PRECISION, &
+            call mpi_ISsend(send_buffer(pos_in_buffer(ind_gap-1)), cartography(1,ind_gap), MPI_REAL_WP, &
                 & neighbors(direction,proc_gap), tag, D_comm(direction), s_request_sca(ind_gap),ierr)
 #endif
         else
diff --git a/HySoP/src/scalesInterface/particles/advec_correction.f90 b/HySoP/src/scalesInterface/particles/advec_correction.f90
index 6549ff5b1..3efad781f 100644
--- a/HySoP/src/scalesInterface/particles/advec_correction.f90
+++ b/HySoP/src/scalesInterface/particles/advec_correction.f90
@@ -101,9 +101,9 @@ subroutine AC_type_and_block_group(dt, dir, gp_s, ind_group, p_V, &
 
     ! Receive ghost value, ie value from neighbors boundaries.
     tag_table = compute_tag(ind_group, tag_part_tag_NP, dir)
-    call mpi_Irecv(lambN(1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_Irecv(lambN(1,1), com_size, MPI_REAL_WP, &
             & neighbors(dir,1), tag_table(1), D_comm(dir), rece_request(1), ierr)
-    call mpi_Irecv(lambP(1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_Irecv(lambP(1,1), com_size, MPI_REAL_WP, &
             &  neighbors(dir,-1), tag_table(2), D_comm(dir), rece_request(2), ierr)
 
     ! -- For the first block (1/2) --
@@ -111,7 +111,7 @@ subroutine AC_type_and_block_group(dt, dir, gp_s, ind_group, p_V, &
     lambB = minval(p_V(1:(bl_size/2)+1,:,:),1)*cfl
     !tag_table = compute_tag(ind_group, tag_part_tag_NP, dir)   ! Tag table is already equals to this.
     ! Send message
-    call mpi_ISsend(lambB(1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_ISsend(lambB(1,1), com_size, MPI_REAL_WP, &
             & neighbors(dir,-1), tag_table(1), D_comm(dir), send_request(1), ierr)
 
     ! -- For the last block (1/2) --
@@ -119,7 +119,7 @@ subroutine AC_type_and_block_group(dt, dir, gp_s, ind_group, p_V, &
     ind = bl_nb(dir) + 1
     lambE = minval(p_V(mesh_sc%N_proc(dir) - (bl_size/2)+1 :mesh_sc%N_proc(dir),:,:),1)*cfl
     ! Send message
-    call mpi_ISsend(lambE(1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_ISsend(lambE(1,1), com_size, MPI_REAL_WP, &
             & neighbors(dir,1), tag_table(2), D_comm(dir), send_request(2), ierr)
 
     ! -- For the "middle" block --
@@ -227,10 +227,10 @@ subroutine AC_limitator_from_slopes(direction, gp_s, p_pos, &
     ! Send these values
     Sbuffer(1,:,:) = limit(mesh_sc%N_proc(direction)+1,:,:)
     Sbuffer(2,:,:) = deltaS(:,:,mesh_sc%N_proc(direction))
-    call mpi_ISsend(Sbuffer(1,1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_ISsend(Sbuffer(1,1,1), com_size, MPI_REAL_WP, &
             & neighbors(direction,1), tag_mpi, D_comm(direction), send_request, ierr)
     ! Receive it !
-    call mpi_recv(Rbuffer(1,1,1), com_size, MPI_DOUBLE_PRECISION, &
+    call mpi_recv(Rbuffer(1,1,1), com_size, MPI_REAL_WP, &
             &  neighbors(direction,-1), tag_mpi, D_comm(direction),rece_status, ierr)
     ! Get limit(1) = limitator at 1/2
     limit(1,:,:) = Rbuffer(1,:,:)
diff --git a/HySoP/src/scalesInterface/particles/advec_line/advec_common_line.f90 b/HySoP/src/scalesInterface/particles/advec_line/advec_common_line.f90
index f4757f4a4..3414804be 100644
--- a/HySoP/src/scalesInterface/particles/advec_line/advec_common_line.f90
+++ b/HySoP/src/scalesInterface/particles/advec_line/advec_common_line.f90
@@ -286,7 +286,7 @@ subroutine AC_particle_velocity_line(dt, direction, ind_group, p_pos_adim, p_V)
             ! IIa - Compute send tag
             tag = compute_tag(ind_group, tag_velo_V, direction, proc_gap)
             ! IIb - Send message
-            call mpi_Isend(p_V(send_range(1)), send_range(2)-send_range(1)+1, MPI_DOUBLE_PRECISION, &
+            call mpi_Isend(p_V(send_range(1)), send_range(2)-send_range(1)+1, MPI_REAL_WP, &
                     & send_rank, tag, D_comm(direction), s_request(proc_gap), ierr)
         end if
     end do
@@ -303,7 +303,7 @@ subroutine AC_particle_velocity_line(dt, direction, ind_group, p_pos_adim, p_V)
             rece_range(1) = max(rece_ind_min, gap+1) ! fortran => indice start from 0
             rece_range(2) = min(rece_ind_max, gap+mesh_sc%N_proc(direction))
             msg_size = rece_range(2)-rece_range(1)+1
-            call mpi_Irecv(V_buffer(ind), msg_size, MPI_DOUBLE_PRECISION, rece_rank(proc_gap), tag, D_comm(direction), &
+            call mpi_Irecv(V_buffer(ind), msg_size, MPI_REAL_WP, rece_rank(proc_gap), tag, D_comm(direction), &
                         & rece_request(proc_gap), ierr)
             ind = ind + msg_size
         end if
@@ -485,18 +485,18 @@ subroutine AC_type_and_block_line(dt, direction, ind_group, p_V, &
     bl_lambdaMin(1) = minval(p_V(1:(bl_size/2)+1))*cfl
     tag_table = compute_tag(ind_group, tag_part_tag_NP, direction)
     ! Send message
-    call mpi_Isend(bl_lambdaMin(1), 1, MPI_DOUBLE_PRECISION, rankP, tag_table(1), D_comm(direction), send_request(1), ierr)
+    call mpi_Isend(bl_lambdaMin(1), 1, MPI_REAL_WP, rankP, tag_table(1), D_comm(direction), send_request(1), ierr)
     ! Receive it
-    call mpi_Irecv(lambN, 1, MPI_DOUBLE_PRECISION, rankN, tag_table(1), D_comm(direction), rece_request(1), ierr)
+    call mpi_Irecv(lambN, 1, MPI_REAL_WP, rankN, tag_table(1), D_comm(direction), rece_request(1), ierr)
 
     ! -- For the last block (1/2) --
     ! The processus contains only its first half => exchange ghost with the next processus
     ind = bl_nb(direction) + 1
     bl_lambdaMin(ind) = minval(p_V(mesh_sc%N_proc(direction)-(bl_size/2)+1:mesh_sc%N_proc(direction)))*cfl
     ! Send message
-    call mpi_Isend(bl_lambdaMin(ind), 1, MPI_DOUBLE_PRECISION, rankN, tag_table(2), D_comm(direction), send_request(2), ierr)
+    call mpi_Isend(bl_lambdaMin(ind), 1, MPI_REAL_WP, rankN, tag_table(2), D_comm(direction), send_request(2), ierr)
     ! Receive it
-    call mpi_Irecv(lambP, 1, MPI_DOUBLE_PRECISION, rankP, tag_table(2), D_comm(direction), rece_request(2), ierr)
+    call mpi_Irecv(lambP, 1, MPI_REAL_WP, rankP, tag_table(2), D_comm(direction), rece_request(2), ierr)
 
     ! -- For the "middle" block --
     do ind = 2, bl_nb(direction)
@@ -743,7 +743,7 @@ subroutine AC_bufferToScalar_line(direction, ind_group, send_i_min, send_i_max,
                     & , ierr)
             ! And send the buffer
             tag = compute_tag(ind_group, tag_bufToScal_buffer, direction, proc_gap)
-            call mpi_ISsend(send_buffer(send_range(1,proc_gap)),comm_size, MPI_DOUBLE_PRECISION, send_rank, &
+            call mpi_ISsend(send_buffer(send_range(1,proc_gap)),comm_size, MPI_REAL_WP, send_rank, &
                         & tag, D_comm(direction), send_request(proc_gap,2), ierr)
             send_request(proc_gap,3) = 1
         else
@@ -777,7 +777,7 @@ subroutine AC_bufferToScalar_line(direction, ind_group, send_i_min, send_i_max,
                 ! by allocating one time to the max size, note that the range use in
                 ! this allocation instruction is include in (1, mesh_sc%N_proc(2))
             tag = compute_tag(ind_group, tag_bufToScal_buffer, direction, -proc_gap)
-            call mpi_recv(rece_buffer(rece_i_min), comm_size, MPI_DOUBLE_PRECISION, &
+            call mpi_recv(rece_buffer(rece_i_min), comm_size, MPI_REAL_WP, &
                     & rece_rank(proc_gap), tag, D_comm(direction), rece_status, ierr)
             ! Update the scalar field
             send_gap = proc_gap*mesh_sc%N_proc(direction)
diff --git a/HySoP/src/scalesInterface/particles/interpolation_velo.f90 b/HySoP/src/scalesInterface/particles/interpolation_velo.f90
index 3acada44e..9fa45338e 100644
--- a/HySoP/src/scalesInterface/particles/interpolation_velo.f90
+++ b/HySoP/src/scalesInterface/particles/interpolation_velo.f90
@@ -463,17 +463,17 @@ subroutine Inter_LastDir_com(dir, V_coarse, dx_c, V_fine, dx_f)
     do ind = 1, com_nb(1)-1
       com_pos = com_pos - N_coarse  ! = 1 + missing ghost lines after this step
       ! Communication
-      call Mpi_Irecv(V_beg(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+      call Mpi_Irecv(V_beg(1,1,com_pos),com_size(2),MPI_REAL_WP, &
         & neighbors(dir,-ind), 100+ind, D_comm(dir), beg_request(ind), ierr)
-      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_REAL_WP, &
         & neighbors(dir,ind), 100+ind, D_comm(dir), ierr)
     end do
     ! Last communication to complete "right" ghost (begining points)
     ! We use that missing ghost lines = com_pos - 1
     com_size(2) = com_size(1)*(com_pos-1)
-    call Mpi_Irecv(V_beg(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+    call Mpi_Irecv(V_beg(1,1,1),com_size(2),MPI_REAL_WP, &
       & neighbors(dir,-com_nb(1)), 1, D_comm(dir), beg_request(com_nb(1)), ierr)
-    call Mpi_Send(V_coarse(1,1,N_coarse-com_pos+2),com_size(2),MPI_DOUBLE_PRECISION, &
+    call Mpi_Send(V_coarse(1,1,N_coarse-com_pos+2),com_size(2),MPI_REAL_WP, &
       & neighbors(dir,com_nb(1)), 1, D_comm(dir), ierr)
   end if
 
@@ -488,9 +488,9 @@ subroutine Inter_LastDir_com(dir, V_coarse, dx_c, V_fine, dx_f)
       ! points along current direction (ie if coarse grid is very coarse)
     do ind = 1, com_nb(2)-1
       ! Communication
-      call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+      call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_REAL_WP, &
         & neighbors(dir,ind), 200+ind, D_comm(dir), end_request(ind), ierr)
-      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_REAL_WP, &
         & neighbors(dir,-ind), 200+ind, D_comm(dir), ierr)
       ! next com_pos = (ind*N_coarse)+1 = com_pos + N_coarse
       com_pos = com_pos + N_coarse
@@ -499,10 +499,10 @@ subroutine Inter_LastDir_com(dir, V_coarse, dx_c, V_fine, dx_f)
     ! Note that: missing ghost lines = stencil_d - (com_nb-1)*N_coarse
     com_size(2) = com_size(1)*(stencil_d-((com_nb(2)-1)*N_coarse))
     ! Perform communication
-    call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+    call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_REAL_WP, &
       & neighbors(dir,com_nb(2)), 2, D_comm(dir), end_request(com_nb(2)), ierr)
     ! Send data
-    call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+    call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_REAL_WP, &
       & neighbors(dir,-com_nb(2)), 2, D_comm(dir), ierr)
   end if
 
@@ -635,17 +635,17 @@ subroutine Inter_LastDir_Permut_com(dir, V_coarse, dx_c, V_fine, dx_f)
     do ind = 1, com_nb(1)-1
       com_pos = com_pos - N_coarse  ! = 1 + missing ghost lines after this step
       ! Communication
-      call Mpi_Irecv(V_beg(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+      call Mpi_Irecv(V_beg(1,1,com_pos),com_size(2),MPI_REAL_WP, &
         & neighbors(dir,-ind), 100+ind, D_comm(dir), beg_request(ind), ierr)
-      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_REAL_WP, &
         & neighbors(dir,ind), 100+ind, D_comm(dir), ierr)
     end do
     ! Last communication to complete "right" ghost (begining points)
     ! We use that missing ghost lines = com_pos - 1
     com_size(2) = com_size(1)*(com_pos-1)
-    call Mpi_Irecv(V_beg(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+    call Mpi_Irecv(V_beg(1,1,1),com_size(2),MPI_REAL_WP, &
       & neighbors(dir,-com_nb(1)), 1, D_comm(dir), beg_request(com_nb(1)), ierr)
-    call Mpi_Send(V_coarse(1,1,N_coarse-com_pos+2),com_size(2),MPI_DOUBLE_PRECISION, &
+    call Mpi_Send(V_coarse(1,1,N_coarse-com_pos+2),com_size(2),MPI_REAL_WP, &
       & neighbors(dir,com_nb(1)), 1, D_comm(dir), ierr)
   end if
 
@@ -660,9 +660,9 @@ subroutine Inter_LastDir_Permut_com(dir, V_coarse, dx_c, V_fine, dx_f)
       ! points along current direction (ie if coarse grid is very coarse)
     do ind = 1, com_nb(2)-1
       ! Communication
-      call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+      call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_REAL_WP, &
         & neighbors(dir,ind), 200+ind, D_comm(dir), end_request(ind), ierr)
-      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_REAL_WP, &
         & neighbors(dir,-ind), 200+ind, D_comm(dir), ierr)
       ! next com_pos = (ind*N_coarse)+1 = com_pos + N_coarse
       com_pos = com_pos + N_coarse
@@ -671,10 +671,10 @@ subroutine Inter_LastDir_Permut_com(dir, V_coarse, dx_c, V_fine, dx_f)
     ! Note that: missing ghost lines = stencil_d - (com_nb-1)*N_coarse
     com_size(2) = com_size(1)*(stencil_d-((com_nb(2)-1)*N_coarse))
     ! Perform communication
-    call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+    call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_REAL_WP, &
       & neighbors(dir,com_nb(2)), 2, D_comm(dir), end_request(com_nb(2)), ierr)
     ! Send data
-    call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+    call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_REAL_WP, &
       & neighbors(dir,-com_nb(2)), 2, D_comm(dir), ierr)
   end if
 
@@ -881,24 +881,24 @@ subroutine Inter_FirstDir_com(dir, V_coarse, dx_c, V_fine, dx_f)
   if(stencil_g>0) then
     allocate(V_beg(stencil_g,size(V_coarse,2),size(V_coarse,3)))
     ! Initiate non blocking receive
-    call Mpi_Irecv(V_beg(1,1,1),com_size(1),MPI_DOUBLE_PRECISION, &
+    call Mpi_Irecv(V_beg(1,1,1),com_size(1),MPI_REAL_WP, &
       & neighbors(dir,-1), 1, D_comm(dir), rece_request(1), ierr)
     ! Send data
     allocate(V_s1(stencil_g,size(V_coarse,2),size(V_coarse,3)))
     V_s1 = V_coarse(N_coarse-stencil_g+1:N_coarse,:,:)
-    call Mpi_Send(V_s1(1,1,1),com_size(1),MPI_DOUBLE_PRECISION, &
+    call Mpi_Send(V_s1(1,1,1),com_size(1),MPI_REAL_WP, &
       & neighbors(dir,1), 1, D_comm(dir), ierr)
   end if
 
   if(stencil_d>0) then
     allocate(V_end(stencil_d,size(V_coarse,2),size(V_coarse,3)))
     ! Initiate non blocking receive
-    call Mpi_Irecv(V_end(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+    call Mpi_Irecv(V_end(1,1,1),com_size(2),MPI_REAL_WP, &
       & neighbors(dir,1), 2, D_comm(dir), rece_request(2), ierr)
     ! Send data
     allocate(V_s2(stencil_d,size(V_coarse,2),size(V_coarse,3)))
     V_s2 = V_coarse(1:stencil_d,:,:)
-    call Mpi_Send(V_s2(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+    call Mpi_Send(V_s2(1,1,1),com_size(2),MPI_REAL_WP, &
       & neighbors(dir,-1), 2, D_comm(dir), ierr)
   else
   end if
diff --git a/HySoP/src/scalesInterface/precision_tools.f90 b/HySoP/src/scalesInterface/precision_tools.f90
index 9e6213968..d0e54903f 100644
--- a/HySoP/src/scalesInterface/precision_tools.f90
+++ b/HySoP/src/scalesInterface/precision_tools.f90
@@ -21,19 +21,21 @@
 
 MODULE precision_tools
 
+  use mpi
   IMPLICIT NONE
+
+  !> Floats precision
   INTEGER, PARAMETER  :: SP = kind(1.0)
   INTEGER, PARAMETER  :: DP = kind(1.0d0)
   INTEGER, PARAMETER  :: WP = DP
+  !> the MPI type for REAL exchanges in simple or double precision
+  INTEGER, PUBLIC     :: MPI_REAL_WP = MPI_DOUBLE_PRECISION
   REAL(WP), PRIVATE   :: sample_real_at_WP
   REAL(WP), PARAMETER :: MAX_REAL_WP = HUGE(sample_real_at_WP)
   INTEGER, PRIVATE    :: sample_int
   INTEGER, PARAMETER  :: MAX_INTEGER = HUGE(sample_int)
   INTEGER, PARAMETER  :: DI = selected_int_kind(r=12)
-  !> the MPI type for REAL exchanges in simple or double precision
-  INTEGER, PUBLIC     :: MPI_REAL_WP
-  !> the MPI type for COMPLEX exchanges in simple or double precision
-  INTEGER, PUBLIC     :: MPI_COMPLEX_WP
+
   !> the string size
   INTEGER, PARAMETER  :: str_short  = 8
   INTEGER, PARAMETER  :: str_medium = 64
-- 
GitLab