diff --git a/HySoP/src/scalesInterface/layout/cart_topology.f90 b/HySoP/src/scalesInterface/layout/cart_topology.f90
index 63554dee5376939eb3dadad690ca59ee66eb0799..b388f631fe81ad211c4b501a2860ba90d27c8b9b 100644
--- a/HySoP/src/scalesInterface/layout/cart_topology.f90
+++ b/HySoP/src/scalesInterface/layout/cart_topology.f90
@@ -74,7 +74,9 @@ module cart_topology
     !> Table of the previous communicators (ie comm devoted to 1D subgrids)
     integer, dimension(3), protected    :: D_comm
     !> Rank of immediate neighbors
-    integer,dimension(3,-2:2),protected :: neighbors
+    integer,dimension(3,-4:4),protected :: neighbors
+    !> Rank of immediate neighbors
+    integer,dimension(1:3,-1:1),protected :: neighbors_cart_topo=0
 
     ! ----- Information about current MPI processus and MPI topology
     !> number of processes in each direction
@@ -93,8 +95,6 @@ module cart_topology
     ! ------ Information about mesh subdivision and on the global grid -----
     !> information about local mesh - for scalar
     type(cartesian_mesh), protected     :: mesh_sc
-    !> REcopy of mesh_sc%N_proc for python interface
-    integer, dimension(3)               :: N_proc
     !> Computation are done by group of line. Here we define their size
     integer, dimension(3,2), protected  :: group_size
     !> To check if group size is initialized
@@ -246,14 +246,19 @@ subroutine cart_create(dims, ierr, spec_comm, topology)
         ! --- Initialise information about the current processus ---
         call mpi_comm_rank(cart_comm, cart_rank, ierr)
         do direction = 1, 3
+            !neighbors on 1D topology
             call mpi_comm_rank(D_comm(direction), D_rank(direction), ierr)
             call mpi_cart_shift(D_comm(direction), 0, 1, neighbors(direction,-1), neighbors(direction,1), ierr)
             call mpi_cart_shift(D_comm(direction), 0, 2, neighbors(direction,-2), neighbors(direction,2), ierr)
+            call mpi_cart_shift(D_comm(direction), 0, 3, neighbors(direction,-3), neighbors(direction,3), ierr)
+            call mpi_cart_shift(D_comm(direction), 0, 3, neighbors(direction,-4), neighbors(direction,4), ierr)
             neighbors(direction,0) = D_rank(direction)
+            !neighbors on 3D topology
+            call mpi_cart_shift(cart_comm , direction-1, 1, neighbors_cart_topo(direction,-1), neighbors_cart_topo(direction,1), ierr)
+            neighbors_cart_topo(direction,0) = cart_rank
         end do
         call mpi_cart_coords(cart_comm, cart_rank, 3, coord, ierr)
         coordYZ = (/ coord(2), coord(3) /)
-
         ! --- Spectral context ---
         ! Initialized the communicator used on which the spectral part
         ! will be based.
@@ -346,7 +351,6 @@ subroutine discretisation_create(Nx, Ny, Nz, Lx, Ly, Lz, verbosity)
     mesh_sc%length(3)= Lz
 
     mesh_sc%N_proc = mesh_sc%N / nb_proc_dim
-    N_proc = mesh_sc%N_proc
     mesh_sc%relative_extend(:,1) = 1
     mesh_sc%relative_extend(:,2) = mesh_sc%N_proc
 
@@ -378,7 +382,6 @@ subroutine discretisation_default(verbosity)
     mesh_sc%N = default_size
     mesh_sc%length = 1.
     mesh_sc%N_proc = mesh_sc%N / nb_proc_dim
-    N_proc = mesh_sc%N_proc
     mesh_sc%relative_extend(:,1) = 1
     mesh_sc%relative_extend(:,2) = mesh_sc%N_proc
 
diff --git a/HySoP/src/scalesInterface/particles/advec.f90 b/HySoP/src/scalesInterface/particles/advec.f90
index 5078f22c064b72a249ac3d1d3858e5d99a3abc54..5d15d869b6e0c5cf45b8014699c7a92f612c1a81 100644
--- a/HySoP/src/scalesInterface/particles/advec.f90
+++ b/HySoP/src/scalesInterface/particles/advec.f90
@@ -116,14 +116,10 @@ subroutine advec_init(order, stab_coeff, verbosity, dim_split)
 
     select case(dim_splitting)
         case('classic')
-            write(*,'(6x,a)') '========== CLASSIC Splitting ========='
-            write(*,'(6x,a)') '======================================'
             advec_step => advec_step_Torder1
             ! Compute stability coefficient
             if (present(stab_coeff)) stab_coeff = 1.0/(2.0*dble(bl_size))
         case default    ! Strang
-            write(*,'(6x,a)') '========== STRANG Splitting ========='
-            write(*,'(6x,a)') '======================================'
             advec_step => advec_step_Torder2
             ! Compute stability coefficient - as each dimension is solved in
             ! dt/2, stab_coef is 2 times bigger
@@ -140,8 +136,8 @@ subroutine advec_init(order, stab_coeff, verbosity, dim_split)
             advec_remesh => AC_remesh_limit_lambda_group    ! limited and corrected lambda 2
         case('p_M4')
             advec_remesh => AC_remesh_Mprime_group ! Xremesh_Mprime4
-        ! Check if interface with parmes is done. Ok in Scales. But needs to
-        ! get diffusion information from Parmes.
+        ! Check if interface is done. Ok in Scales. But needs to
+        ! get diffusion information. Ok for advec_plus variant, but here ?
         !case('d_M4')
         !    advec_remesh_plus => AC_remesh_Mprime_group ! Xremesh_Mprime4 with diffusion
         case('p_M6')
@@ -211,6 +207,96 @@ subroutine advec_setup_alongZ()
     line_dir = 3
 end subroutine advec_setup_alongZ
 
+!> Solve advection equation - order 2 - with basic velocity interpolation
+!!    @param[in]        dt          = time step
+!!    @param[in]        Vx          = velocity along x (could be discretised on a bigger mesh then the scalar)
+!!    @param[in]        Vy          = velocity along y
+!!    @param[in]        Vz          = velocity along z
+!!    @param[in,out]    scal        = scalar field to advect
+subroutine advec_step_Inter_basic(dt, Vx, Vy, Vz, scal)
+
+    ! Input/Output
+    real(WP), intent(in)                        :: dt
+    real(WP), dimension(:,:,:), intent(in)      :: Vx, Vy, Vz
+    real(WP), dimension(:,:,:), intent(inout)   :: scal
+    ! Local
+    real(WP), dimension(:,:,:), allocatable   :: Vx_f, Vy_f, Vz_f
+    integer                                   :: ierr                ! Error code.
+
+    allocate(Vx_f(mesh_sc%N_proc(1),mesh_sc%N_proc(2),mesh_sc%N_proc(3)),stat=ierr)
+    if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vx_f'
+    allocate(Vy_f(mesh_sc%N_proc(1),mesh_sc%N_proc(2),mesh_sc%N_proc(3)),stat=ierr)
+    if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vy_f'
+    allocate(Vz_f(mesh_sc%N_proc(1),mesh_sc%N_proc(2),mesh_sc%N_proc(3)),stat=ierr)
+    if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vz_f'
+
+    call Interpol_3D(Vx, mesh_V%dx, Vx_f, mesh_sc%dx)
+    call Interpol_3D(Vy, mesh_V%dx, Vy_f, mesh_sc%dx)
+    call Interpol_3D(Vz, mesh_V%dx, Vz_f, mesh_sc%dx)
+    if (cart_rank==0) write(6,'(a)') '        [INFO PARTICLES] Interpolation done'
+
+    call advec_step_Torder2(dt, Vx_f, Vy_f, Vz_f, scal)
+
+    deallocate(Vx_f)
+    deallocate(Vy_f)
+    deallocate(Vz_f)
+
+end subroutine advec_step_Inter_basic
+
+
+!> Solve advection equation - order 2 - with more complex velocity interpolation
+!!    @param[in]        dt          = time step
+!!    @param[in]        Vx          = velocity along x (could be discretised on a bigger mesh then the scalar)
+!!    @param[in]        Vy          = velocity along y
+!!    @param[in]        Vz          = velocity along z
+!!    @param[in,out]    scal        = scalar field to advect
+subroutine advec_step_Inter_Two(dt, Vx, Vy, Vz, scal)
+
+    ! Input/Output
+    real(WP), intent(in)                        :: dt
+    real(WP), dimension(:,:,:), intent(in)      :: Vx, Vy, Vz
+    real(WP), dimension(:,:,:), intent(inout)   :: scal
+    ! Local
+    real(WP), dimension(:,:,:), allocatable   :: Vx_c, Vy_c, Vz_c
+    real(WP), dimension(:,:,:), allocatable   :: Vx_f, Vy_f, Vz_f
+    integer                                   :: ierr                ! Error code.
+
+    allocate(Vx_c(mesh_V%N_proc(1),mesh_sc%N_proc(2),mesh_sc%N_proc(3)),stat=ierr)
+    if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vx_c'
+    allocate(Vx_f(mesh_sc%N_proc(1),mesh_sc%N_proc(2),mesh_sc%N_proc(3)),stat=ierr)
+    if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vx_f'
+    allocate(Vy_c(mesh_V%N_proc(2),mesh_sc%N_proc(1),mesh_sc%N_proc(3)),stat=ierr)
+    if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vy_c'
+    allocate(Vy_f(mesh_sc%N_proc(2),mesh_sc%N_proc(1),mesh_sc%N_proc(3)),stat=ierr)
+    if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vy_f'
+    allocate(Vz_c(mesh_V%N_proc(3),mesh_sc%N_proc(1),mesh_sc%N_proc(2)),stat=ierr)
+    if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vz_c'
+    allocate(Vz_f(mesh_sc%N_proc(3),mesh_sc%N_proc(1),mesh_sc%N_proc(2)),stat=ierr)
+    if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vz_f'
+
+    call Interpol_2D_3D_vect(mesh_sc%dx, mesh_V%dx, Vx, Vy, Vz, Vx_c, Vx_f, Vy_c, Vy_f, Vz_c, Vz_f)
+
+    call advec_setup_alongX()
+    call advec_X_basic_no_com(dt/2.0, gsX, Vx_f, scal)
+    call advec_setup_alongY()
+    call advec_1D_Vcoarse(dt/2.0, gsY, Vy_c, Vy_f, scal)
+    call advec_setup_alongZ()
+    call advec_1D_Vcoarse(dt/2.0, gsZ, Vz_c, Vz_f, scal)
+    call advec_1D_Vcoarse(dt/2.0, gsZ, Vz_c, Vz_f, scal)
+    call advec_setup_alongY()
+    call advec_1D_Vcoarse(dt/2.0, gsY, Vy_c, Vy_f, scal)
+    call advec_setup_alongX()
+    call advec_X_basic_no_com(dt/2.0, gsX, Vx_f, scal)
+
+    deallocate(Vx_f)
+    deallocate(Vy_f)
+    deallocate(Vz_f)
+
+    deallocate(Vx_c)
+    deallocate(Vy_c)
+    deallocate(Vz_c)
+
+end subroutine advec_step_Inter_Two
 
 !> Solve advection equation - order 1 in time (order 2 dimensional splitting)
 !!    @param[in]        dt          = time step
diff --git a/HySoP/src/scalesInterface/particles/interpolation_velo.F90 b/HySoP/src/scalesInterface/particles/interpolation_velo.F90
index 229573d1ab9c32f328297f624b605a76c465f35f..94b76df362bd6f38b2b74c153b846767b1a0f84f 100644
--- a/HySoP/src/scalesInterface/particles/interpolation_velo.F90
+++ b/HySoP/src/scalesInterface/particles/interpolation_velo.F90
@@ -53,7 +53,7 @@ module Interpolation_velo
     !> Generic subroutine (pointer intialize to the choosen formula)
     procedure(weight_M4), pointer,  public :: get_weight => null()
     !> Specific interpolation formula
-    public :: weight_M4, weight_Mprime4, weight_Lambda4_4
+    public :: weight_M4, weight_Mprime4, weight_Lambda4_4, weight_linear
 
     ! ===== Private variables =====
     character(len=4), protected :: interpol = 'Mp4'
@@ -87,6 +87,13 @@ subroutine interpol_init(formula, verbose)
     if(present(verbose)) verbosity = verbose
 
     select case(trim(interpol))
+    case('lin')
+      stencil_size = 2
+      stencil_d = 1
+      stencil_g = 0
+      get_weight => weight_linear
+      if ((cart_rank==0).and.(verbosity)) &
+        & write(*,'(6x,a)') '============= Interpolation = linear  ==========='
     case('L4_4')
       stencil_size = 6
       stencil_d = 3
@@ -189,7 +196,7 @@ end subroutine Interpol_3D
 ! ====================        2D interpolation        ====================
 ! ========================================================================
 
-!> Interpolate a field (ordannate along X,Y,Z) along X and Y-axis
+!> Interpolate a field (ordonnate along X,Y,Z) along X and Y-axis
 !!    @param[in]        V_coarse    = velocity to interpolate along the last direction
 !!    @param[in]        dx_c        = space step on the coarse grid (for last direction)
 !!    @param[in]        V_fine      = interpolated velocity
@@ -404,49 +411,99 @@ subroutine Inter_LastDir_com(dir, V_coarse, dx_c, V_fine, dx_f)
   real(WP), dimension(:,:,:), allocatable   :: V_beg, V_end ! ghost values of velocity
   real(WP), dimension(stencil_size)         :: weight       ! interpolation weight
   integer               :: i,ind,i_bis, V_ind   ! some loop indices
-  integer               :: ind_max, ind_min, ind_limit
+  integer               :: ind_max, ind_min, ind_limit, ind_limit_2
   real(WP)              :: pos
   integer               :: N_coarse, N_fine     ! number of grid points
+  integer               :: com_pos              ! to deal with multiple communications - if (stencil size)/2 > local size of coarse grid
+                                                ! = position where ghost values are recieved in V_beg and  V_end
+  integer, dimension(2) :: com_nb               ! number of communcation (if (stencil size)/2 > local size of coarse grid)
   integer, dimension(2) :: com_size             ! size of mpi communication for ghost points
-  integer, dimension(2) :: rece_request         ! mpi communication request (handle) of nonblocking receive
-  integer, dimension(MPI_STATUS_SIZE)         :: status  ! mpi status (for mpi_wait)
   integer               :: ierr                 ! mpi error code
+  integer, dimension(:),allocatable   :: beg_request  ! mpi communication request (handle) of nonblocking receive
+  integer, dimension(:),allocatable   :: end_request  ! mpi communication request (handle) of nonblocking receive
+  real(WP), parameter :: eps = 1e-6
 
   ! Initialisation
-  com_size = size(V_coarse,1)*size(V_coarse,2)
-  com_size(1) = com_size(1)*(stencil_g)
-  com_size(2) = com_size(2)*(stencil_d)
+  com_size(1) = size(V_coarse,1)*size(V_coarse,2)
   N_coarse = size(V_coarse,3)
   N_fine = size(V_fine,3)
-  ! ind_max = max(indice ind on fine grid as V_fine(i) can be computed without communnication)
-  !         = max{ind : V_ind=floor[(ind-1)*dx_f/dx_c]+1 <= (N_coarse-stencil_d)}
-  !         = max{ind : V_ind=floor[(ind-1)*dx_f/dx_c]+1 < (N_coarse-stencil_d+1)}
-  !         = max{ind : pos=(ind-1)*dx_f < [(N_coarse-stencil_d+1)-1]*dx_c}
-  !         = max{ind : pos=(ind-1) < [(N_coarse-stencil_d+1)-1]*dx_c/dx_f}
-  !         = max{ind : pos=ind < [(N_coarse-stencil_d+1)-1]*dx_c/dx_f + 1}
-  ind_max = ceiling((N_coarse-stencil_d)*dx_c/dx_f) - 1
-  ind_min = ceiling((stencil_g)*dx_c/dx_f)+1
+  ! ind_max = max(indice ind on fine grid as V_fine(i) can be computed without communication)
+  !         = max{ind : V_ind=floor[(ind-1)*dx_f/dx_c]+1  <=  (N_coarse-stencil_d)            }
+  !         = max{ind : V_ind=floor[(ind-1)*dx_f/dx_c]    <=  (N_coarse-stencil_d-1)          }
+  !         = max{ind : pos  =      (ind-1)*dx_f/dx_c     <   (N_coarse-stencil_d-1)+1        }
+  !         = max{ind :             (ind-1)               <   (N_coarse-stencil_d)*dx_c/dx_f  }
+  !         = max{ind : ind < [(N_coarse-stencil_d)*(dx_c/dx_f)]+1}
+  ! Define real_max = [(N_coarse-stencil_d)*(dx_c/dx_f)] as a real. One gets:
+  ! ind_max = max{ind integer as : ind < real_max+1} and thus
+  ! ind_max = real_max            if real_max is an integer
+  !           floor(real_max+1)   else
+  ! ie ind_max = ceiling(real_max)
+  !ind_max = ceiling((N_coarse-stencil_d)*dx_c/dx_f)
+  ind_max = ceiling(((N_coarse-stencil_d)*dx_c/dx_f)-eps)  ! To avoid numerical error and thus segmentation fault
+  ! ind_min = min(indice ind on fine grid as V_fine(i) can be computed without communication)
+  !         = min{ind : V_ind=floor[(ind-1)*dx_f/dx_c]+1 - stencil_g > 0}
+  !         = min{ind : V_ind=floor[(ind-1)*dx_f/dx_c] >  stencil_g -1  }
+  !         = min{ind : V_ind=floor[(ind-1)*dx_f/dx_c] >= stencil_g     }
+  !         = min{ind :             (ind-1)*dx_f/dx_c  >= stencil_g     }
+  !         = min{ind : pos=        (ind-1)*dx_f       >= stencil_g*dx_c}
+  !         = min{ind : ind >= (stencil_g*dx_c/dx_f) + 1}
+  !         = ceiling[(stencil_g*dx_c/dx_f) + 1]
+  ind_min = ceiling((stencil_g)*dx_c/dx_f)+1 ! here numerical truncature can not lead to seg. fault
 
   ! ==== Communication ====
   if(stencil_g>0) then
     allocate(V_beg(size(V_coarse,1),size(V_coarse,2),stencil_g))
-    ! Initiate non blocking receive
-    call Mpi_Irecv(V_beg(1,1,1),com_size(1),MPI_DOUBLE_PRECISION, &
-      & neighbors(dir,-1), 1, D_comm(dir), rece_request(1), ierr)
-    ! Send data
-    call Mpi_Send(V_coarse(:,:,N_coarse-stencil_g+1),com_size(1),MPI_DOUBLE_PRECISION, &
-      & neighbors(dir,1), 1, D_comm(dir), ierr)
+    com_nb(1) = ceiling(real(stencil_g)/N_coarse) ! number of required communication to get ghost
+    allocate(beg_request(com_nb(1)))
+    com_pos = stencil_g+1              ! i = 1 + missing (or remainding) ghost lines
+    com_size(2) = com_size(1)*N_coarse
+    ! Except for last communication, send all local coarse data.
+    ! Note that it happen if local coarse grid containt less than (stencil_size/2)
+      ! points along current direction (ie if coarse grid is very coarse)
+    do ind = 1, com_nb(1)-1
+      com_pos = com_pos - N_coarse  ! = 1 + missing ghost lines after this step
+      ! Communication
+      call Mpi_Irecv(V_beg(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+        & neighbors(dir,-ind), 100+ind, D_comm(dir), beg_request(ind), ierr)
+      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+        & neighbors(dir,ind), 100+ind, D_comm(dir), ierr)
+    end do
+    ! Last communication to complete "right" ghost (begining points)
+    ! We use that missing ghost lines = com_pos - 1
+    com_size(2) = com_size(1)*(com_pos-1)
+    call Mpi_Irecv(V_beg(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+      & neighbors(dir,-com_nb(1)), 1, D_comm(dir), beg_request(com_nb(1)), ierr)
+    call Mpi_Send(V_coarse(1,1,N_coarse-com_pos+2),com_size(2),MPI_DOUBLE_PRECISION, &
+      & neighbors(dir,com_nb(1)), 1, D_comm(dir), ierr)
   end if
 
   if(stencil_d>0) then
     allocate(V_end(size(V_coarse,1),size(V_coarse,2),stencil_d))
-    ! Initiate non blocking receive
-    call Mpi_Irecv(V_end(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
-      & neighbors(dir,1), 2, D_comm(dir), rece_request(2), ierr)
+    com_nb(2) = ceiling(real(stencil_d)/N_coarse) ! number of required communication to get ghost
+    allocate(end_request(com_nb(2)))
+    com_pos = 1   ! Reception from next processus is done in position 1
+    com_size(2) = com_size(1)*N_coarse
+    ! Except for last communication, send all local coarse data.
+    ! Note that it happen if local coarse grid containt less than (stencil_size/2)
+      ! points along current direction (ie if coarse grid is very coarse)
+    do ind = 1, com_nb(2)-1
+      ! Communication
+      call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+        & neighbors(dir,ind), 200+ind, D_comm(dir), end_request(ind), ierr)
+      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+        & neighbors(dir,-ind), 200+ind, D_comm(dir), ierr)
+      ! next com_pos = (ind*N_coarse)+1 = com_pos + N_coarse
+      com_pos = com_pos + N_coarse
+      end do
+    ! Last step
+    ! Note that: missing ghost lines = stencil_d - (com_nb-1)*N_coarse
+    com_size(2) = com_size(1)*(stencil_d-((com_nb(2)-1)*N_coarse))
+    ! Perform communication
+    call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+      & neighbors(dir,com_nb(2)), 2, D_comm(dir), end_request(com_nb(2)), ierr)
     ! Send data
-    call Mpi_Send(V_coarse(:,:,1),com_size(2),MPI_DOUBLE_PRECISION, &
-      & neighbors(dir,-1), 2, D_comm(dir), ierr)
-  else
+    call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+      & neighbors(dir,-com_nb(2)), 2, D_comm(dir), ierr)
   end if
 
   ! ==== Interpolation ====
@@ -461,12 +518,20 @@ subroutine Inter_LastDir_com(dir, V_coarse, dx_c, V_fine, dx_f)
       V_fine(:,:,ind) = V_fine(:,:,ind) + weight(i+1)*V_coarse(:,:,V_ind+i)
     end do
   end do
+  ! -- Wait for communication completion before dealing with the end --
+  if(stencil_g>0) then
+    call mpi_waitall(com_nb(1),beg_request, MPI_STATUSES_IGNORE, ierr)
+    deallocate(beg_request)
+  end if
+  if(stencil_d>0) then
+    call mpi_waitall(com_nb(2),end_request, MPI_STATUSES_IGNORE, ierr)
+    deallocate(end_request)
+  end if
   ! -- For begining --
-  if(stencil_g>0) call mpi_wait(rece_request(1), status, ierr)
-  ! Use that interpolation formula are exact
+  ! Use that interpolation formula are exact - no computation for the first point of each line
   V_fine(:,:,1) = V_coarse(:,:,1)
   ! For other first points
-  do ind = 2, ind_min-1
+  do ind = 2, min(ind_min-1, N_fine)  ! Be carful, in some massively parrallel context, ind_min could bigger than N_fine +1
     pos = (ind-1)*dx_f/dx_c
     V_ind = floor(pos)+1
     call get_weight(pos-V_ind+1, weight)
@@ -478,14 +543,22 @@ subroutine Inter_LastDir_com(dir, V_coarse, dx_c, V_fine, dx_f)
       !V_fine(:,:,ind) = V_fine(:,:,ind) + weight(i)*V_beg(:,:,V_ind+stencil_g+i)
       V_fine(:,:,ind) = V_fine(:,:,ind) + weight(i)*V_beg(:,:,V_ind-1+i) ! first point in V_beg stands for 1-stencil_g position
     end do
-    ! We look for first local value of V_coarse at position (:,:,1) ! (array starts at 1)
-    do i_bis = ind_limit+1, stencil_size
+    ! If N_coarse < stencil_size, last interpolation points does not belong to local domain
+      ! but to domain of processus of coordinnates = (mine+1) inside the mpi-topology.
+      ! Then we search in V_end for values at last interpolation point,
+    ind_limit_2 = min(stencil_size, N_coarse+ind_limit) ! for very coarse grid, stencil size could be bigger than N_coarse
+    do i_bis = ind_limit+1, ind_limit_2
+      ! We look for first local value of V_coarse at position (:,:,1) ! (array starts at 1)
       V_fine(:,:,ind) = V_fine(:,:,ind) + weight(i_bis)*V_coarse(:,:,i_bis-ind_limit)
     end do
+    ! Values in V_end
+    do i_bis = ind_limit_2+1, stencil_size
+      V_fine(:,:,ind) = V_fine(:,:,ind) + weight(i_bis)*V_end(:,:,i_bis-ind_limit_2)
+    end do
   end do
   ! -- For point of at the end of a line along the current direction --
-  if(stencil_d>0) call mpi_wait(rece_request(2), status, ierr)
-  do ind = ind_max+1, N_fine
+  ! If ind_max<= ind_min (ie if stencil_size>N_coarse), computation are already done for first point
+  do ind = max(ind_max+1,ind_min), N_fine
     pos = (ind-1)*dx_f/dx_c
     V_ind = floor(pos)+1
     call get_weight(pos-V_ind+1, weight)
@@ -534,40 +607,75 @@ subroutine Inter_LastDir_Permut_com(dir, V_coarse, dx_c, V_fine, dx_f)
   integer               :: ind_max, ind_min, ind_limit, ind_limit_2
   real(WP)              :: pos
   integer               :: N_coarse, N_fine     ! number of grid points
+  integer               :: com_pos              ! to deal with multiple communications - if (stencil size)/2 > local size of coarse grid
+                                                ! = position where ghost values are recieved in V_beg and  V_end
+  integer, dimension(2) :: com_nb               ! number of communcation (if (stencil size)/2 > local size of coarse grid)
   integer, dimension(2) :: com_size             ! size of mpi communication for ghost points
-  integer, dimension(2) :: rece_request         ! mpi communication request (handle) of nonblocking receive
-  integer, dimension(MPI_STATUS_SIZE)         :: status  ! mpi status (for mpi_wait)
   integer               :: ierr                 ! mpi error code
+  integer, dimension(:),allocatable   :: beg_request  ! mpi communication request (handle) of nonblocking receive
+  integer, dimension(:),allocatable   :: end_request  ! mpi communication request (handle) of nonblocking receive
 
   ! Initialisation
-  com_size = size(V_coarse,1)*size(V_coarse,2)
-  com_size(1) = com_size(1)*(stencil_g)
-  com_size(2) = com_size(2)*(stencil_d)
+  com_size(1) = size(V_coarse,1)*size(V_coarse,2)
   N_coarse = size(V_coarse,3)
   N_fine = size(V_fine,2)
-  ind_max = ceiling((N_coarse-stencil_d)*dx_c/dx_f) - 1
+ind_max = ceiling(((N_coarse-stencil_d)*dx_c/dx_f)-1e-6)  ! To avoid numerical error and thus segmentation fault
   ind_min = ceiling((stencil_g)*dx_c/dx_f)+1
 
   ! ==== Communication ====
   if(stencil_g>0) then
     allocate(V_beg(size(V_coarse,1),size(V_coarse,2),stencil_g))
-    ! Initiate non blocking receive
-    call Mpi_Irecv(V_beg(1,1,1),com_size(1),MPI_DOUBLE_PRECISION, &
-      & neighbors(dir,-1), 1, D_comm(dir), rece_request(1), ierr)
-    ! Send data
-    call Mpi_Send(V_coarse(1,1,N_coarse-stencil_g+1),com_size(1),MPI_DOUBLE_PRECISION, &
-      & neighbors(dir,1), 1, D_comm(dir), ierr)
+    com_nb(1) = ceiling(real(stencil_g)/N_coarse) ! number of required communication to get ghost
+    allocate(beg_request(com_nb(1)))
+    com_pos = stencil_g+1              ! i = 1 + missing (or remainding) ghost lines
+    com_size(2) = com_size(1)*N_coarse
+    ! Except for last communication, send all local coarse data.
+    ! Note that it happen if local coarse grid containt less than (stencil_size/2)
+      ! points along current direction (ie if coarse grid is very coarse)
+    do ind = 1, com_nb(1)-1
+      com_pos = com_pos - N_coarse  ! = 1 + missing ghost lines after this step
+      ! Communication
+      call Mpi_Irecv(V_beg(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+        & neighbors(dir,-ind), 100+ind, D_comm(dir), beg_request(ind), ierr)
+      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+        & neighbors(dir,ind), 100+ind, D_comm(dir), ierr)
+    end do
+    ! Last communication to complete "right" ghost (begining points)
+    ! We use that missing ghost lines = com_pos - 1
+    com_size(2) = com_size(1)*(com_pos-1)
+    call Mpi_Irecv(V_beg(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+      & neighbors(dir,-com_nb(1)), 1, D_comm(dir), beg_request(com_nb(1)), ierr)
+    call Mpi_Send(V_coarse(1,1,N_coarse-com_pos+2),com_size(2),MPI_DOUBLE_PRECISION, &
+      & neighbors(dir,com_nb(1)), 1, D_comm(dir), ierr)
   end if
 
   if(stencil_d>0) then
     allocate(V_end(size(V_coarse,1),size(V_coarse,2),stencil_d))
-    ! Initiate non blocking receive
-    call Mpi_Irecv(V_end(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
-      & neighbors(dir,1), 2, D_comm(dir), rece_request(2), ierr)
+    com_nb(2) = ceiling(real(stencil_d)/N_coarse) ! number of required communication to get ghost
+    allocate(end_request(com_nb(2)))
+    com_pos = 1   ! Reception from next processus is done in position 1
+    com_size(2) = com_size(1)*N_coarse
+    ! Except for last communication, send all local coarse data.
+    ! Note that it happen if local coarse grid containt less than (stencil_size/2)
+      ! points along current direction (ie if coarse grid is very coarse)
+    do ind = 1, com_nb(2)-1
+      ! Communication
+      call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+        & neighbors(dir,ind), 200+ind, D_comm(dir), end_request(ind), ierr)
+      call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
+        & neighbors(dir,-ind), 200+ind, D_comm(dir), ierr)
+      ! next com_pos = (ind*N_coarse)+1 = com_pos + N_coarse
+      com_pos = com_pos + N_coarse
+      end do
+    ! Last step
+    ! Note that: missing ghost lines = stencil_d - (com_nb-1)*N_coarse
+    com_size(2) = com_size(1)*(stencil_d-((com_nb(2)-1)*N_coarse))
+    ! Perform communication
+    call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, &
+      & neighbors(dir,com_nb(2)), 2, D_comm(dir), end_request(com_nb(2)), ierr)
     ! Send data
     call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, &
-      & neighbors(dir,-1), 2, D_comm(dir), ierr)
-  else
+      & neighbors(dir,-com_nb(2)), 2, D_comm(dir), ierr)
   end if
 
   ! ==== Interpolation ====
@@ -582,12 +690,22 @@ subroutine Inter_LastDir_Permut_com(dir, V_coarse, dx_c, V_fine, dx_f)
       V_fine(:,ind,:) = V_fine(:,ind,:) + weight(i+1)*V_coarse(:,:,V_ind+i)
     end do
   end do
+
+  ! -- Wait for communication completion before dealing with the end --
+  if(stencil_g>0) then
+    call mpi_waitall(com_nb(1),beg_request, MPI_STATUSES_IGNORE, ierr)
+    deallocate(beg_request)
+  end if
+  if(stencil_d>0) then
+    call mpi_waitall(com_nb(2),end_request, MPI_STATUSES_IGNORE, ierr)
+    deallocate(end_request)
+  end if
+
   ! -- For begining --
-  if(stencil_g>0) call mpi_wait(rece_request(1), status, ierr)
   ! Use that interpolation formula are exact
   V_fine(:,1,:) = V_coarse(:,:,1)
   ! For other first points
-  do ind = 2, ind_min-1
+  do ind = 2, min(ind_min-1, N_fine)  ! Be carful, in some massively parrallel context, ind_min could bigger than N_fine +1
     pos = (ind-1)*(dx_f/dx_c)
     V_ind = floor(pos)+1
     call get_weight(pos-V_ind+1, weight)
@@ -601,11 +719,10 @@ subroutine Inter_LastDir_Permut_com(dir, V_coarse, dx_c, V_fine, dx_f)
       V_fine(:,ind,:) = V_fine(:,ind,:) + weight(i_bis)*V_coarse(:,:,i_bis-ind_limit)
     end do
     do i_bis = ind_limit_2+1, stencil_size
-      V_fine(:,ind,:) = V_fine(:,ind,:) + weight(i_bis)*V_end(:,:,i_bis-ind_limit-N_coarse)
+      V_fine(:,ind,:) = V_fine(:,ind,:) + weight(i_bis)*V_end(:,:,i_bis-ind_limit_2)
     end do
   end do
   ! -- For point of at the end of a line along the current direction --
-  if(stencil_d>0) call mpi_wait(rece_request(2), status, ierr)
   ! If ind_max<= ind_min (ie if stencil_size>N_coarse), computation are already done for first point
   do ind = max(ind_max+1,ind_min), N_fine
     pos = (ind-1)*(dx_f/dx_c)
@@ -651,6 +768,8 @@ subroutine Inter_FirstDir_no_com(V_coarse, dx_c, V_fine, dx_f)
   integer               :: i, ind, V_ind                    ! some loop indices
   real(WP)              :: pos
 
+V_Fine = 0.0_WP
+
   ! ==== Initialisation ====
   N_coarse = size(V_coarse,1)
   N_fine = size(V_fine,1)
@@ -861,6 +980,7 @@ subroutine weight_Mprime4(pos,weight)
 end subroutine weight_Mprime4
 
 
+!> Interpolation with M4 kernel. Order 2 everywhere ?
 subroutine weight_M4(pos,weight)
 
   real(WP), intent(in)              :: pos
@@ -882,6 +1002,7 @@ subroutine weight_M4(pos,weight)
 end subroutine weight_M4
 
 
+!> Interpolation with Lambda(4,4) kernel. Order 4 everywhere.
 subroutine weight_Lambda4_4(pos,weight)
 
   real(WP), intent(in)              :: pos
@@ -899,5 +1020,18 @@ subroutine weight_Lambda4_4(pos,weight)
 end subroutine weight_Lambda4_4
 
 
+!> Basic interpolation formula. Be careful, this kernel may create unphysical oscillation
+!! in low frequency. This is rather implemented to show the requirement of "better" 
+!! interpolation kernel.
+subroutine weight_linear(pos,weight)
+
+  real(WP), intent(in)              :: pos
+  real(WP), dimension(:), intent(out) :: weight
+
+    weight(1) = 1-pos
+    weight(2) = pos
+
+end subroutine weight_linear
+
 end module Interpolation_velo
 !> @}