diff --git a/HySoP/ParmesToSinglePrecision.patch b/HySoP/ParmesToSinglePrecision.patch
new file mode 100644
index 0000000000000000000000000000000000000000..61079e7b3f8060cf3df23b072f30d21538f04727
--- /dev/null
+++ b/HySoP/ParmesToSinglePrecision.patch
@@ -0,0 +1,950 @@
+diff --git parmepy/constants.py parmepy/constants.py
+index c92cc42..8d85877 100644
+--- parmepy/constants.py
++++ parmepy/constants.py
+@@ -18,13 +18,13 @@ else:
+ 
+ PI = math.pi
+ # Set default type for real and integer numbers
+-PARMES_REAL = np.float64
++PARMES_REAL = np.float32
+ # type for array indices
+ PARMES_INDEX = np.uint32
+ # type for integers
+-PARMES_INTEGER = np.int64
++PARMES_INTEGER = np.int32
+ # float type for MPI messages
+-PARMES_MPI_REAL = MPI.DOUBLE
++PARMES_MPI_REAL = MPI.REAL
+ ## default array layout (fortran or C convention)
+ ORDER = 'F'
+ # to check array ordering with :
+diff --git parmepy/f2py/fftw2py.f90 parmepy/f2py/fftw2py.f90
+index 8645a9f..718eb19 100755
+--- parmepy/f2py/fftw2py.f90
++++ parmepy/f2py/fftw2py.f90
+@@ -5,7 +5,7 @@
+ module fftw2py
+ 
+   use client_data
+-  use parmesparam
++  use parmesparam_sp
+   !> 2d case
+    use fft2d
+   !> 3d case
+@@ -81,7 +81,7 @@ contains
+   subroutine solve_poisson_2d(omega,velocity_x,velocity_y)
+     real(pk),dimension(:,:),intent(in):: omega
+     real(pk),dimension(size(omega,1),size(omega,2)),intent(out) :: velocity_x,velocity_y
+-    real(pk) :: start
++    real(8) :: start
+     !f2py intent(in,out) :: velocity_x,velocity_y
+     start = MPI_WTime()
+ 
+@@ -117,7 +117,7 @@ contains
+     real(pk),dimension(:,:,:),intent(in):: omega_x,omega_y,omega_z
+     real(pk),dimension(size(omega_x,1),size(omega_y,2),size(omega_z,3)),intent(out) :: velocity_x,velocity_y,velocity_z
+     integer, dimension(3), intent(in) :: ghosts_vort, ghosts_velo
+-    real(pk) :: start
++    real(8) :: start
+     !f2py intent(in,out) :: velocity_x,velocity_y,velocity_z
+     start = MPI_WTime()
+     call r2c_3d(omega_x,omega_y,omega_z, ghosts_vort)
+diff --git parmepy/f2py/scales2py.f90 parmepy/f2py/scales2py.f90
+index e574369..391e127 100755
+--- parmepy/f2py/scales2py.f90
++++ parmepy/f2py/scales2py.f90
+@@ -5,7 +5,7 @@ use cart_topology, only : cart_create,set_group_size,discretisation_create,N_pro
+ use advec, only : advec_init,advec_step,advec_step_Inter_basic,advec_step_Inter_Two
+ use interpolation_velo, only : interpol_init
+ use mpi
+-use parmesparam
++use parmesparam_sp
+ 
+ 
+ implicit none
+@@ -91,7 +91,7 @@ contains
+     real(pk), dimension(size(vx,1),size(vx,2),size(vx,3)), intent(inout) :: scal
+     !f2py real(pk) intent(in,out), depend(size(vx,1)) :: scal
+ 
+-    real(pk) :: t0
++    real(8) :: t0
+ 
+     t0 = MPI_Wtime()
+     call advec_step(dt,vx,vy,vz,scal)
+diff --git parmepy/operator/tests/test_velocity_correction.py parmepy/operator/tests/test_velocity_correction.py
+old mode 100755
+new mode 100644
+diff --git setup.py.in setup.py.in
+index 3c75c97..4a8c17a 100644
+--- setup.py.in
++++ setup.py.in
+@@ -71,8 +71,8 @@ if(enable_fortran is "ON"):
+         fortran_src.append(fortran_dir+'fftw2py.f90')
+         fftwdir = '@FFTWLIB@'
+         fftwdir = os.path.split(fftwdir)[0]
+-        parmeslib.append('fftw3')
+-        parmeslib.append('fftw3_mpi')
++        parmeslib.append('fftw3f')
++        parmeslib.append('fftw3f_mpi')
+         parmes_libdir.append(fftwdir)
+ 
+     withscales = '@WITH_SCALES@'
+diff --git src/client_data.f90 src/client_data.f90
+index 46b5268..77178d9 100755
+--- src/client_data.f90
++++ src/client_data.f90
+@@ -1,14 +1,14 @@
+ !> Some global parameters and variables
+ module client_data
+ 
+-  use MPI, only : MPI_DOUBLE_PRECISION
++  use MPI, only : MPI_REAL
+   use, intrinsic :: iso_c_binding ! required for fftw
+   implicit none
+ 
+   !> kind for real variables (simple or double precision)
+-  integer, parameter :: mk = kind(1.0d0) ! double precision
++  integer, parameter :: mk = kind(1.0) ! single precision
+   !> kind for real variables in mpi routines
+-  integer, parameter :: mpi_mk = MPI_DOUBLE_PRECISION
++  integer, parameter :: mpi_mk = MPI_REAL
+   !> Problem dimension (model, required for ppm to work properly)
+   integer, parameter :: dime = 2
+   !> Real dimension
+@@ -26,7 +26,7 @@ module client_data
+   !> to activate (or not) screen output
+   logical,parameter :: verbose = .True.
+   !> i (sqrt(-1) ...)
+-  complex(C_DOUBLE_COMPLEX), parameter :: Icmplx = cmplx(0._mk,1._mk, kind=mk)
++  complex(C_FLOAT_COMPLEX), parameter :: Icmplx = cmplx(0._mk,1._mk, kind=mk)
+   !> tolerance used to compute error
+   real(mk), parameter :: tolerance = 1e-12
+ 
+diff --git src/fftw/Poisson.f90 src/fftw/Poisson.f90
+index 78b355c..8804e3d 100755
+--- src/fftw/Poisson.f90
++++ src/fftw/Poisson.f90
+@@ -53,7 +53,7 @@ contains
+     real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z
+     real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z
+     integer, dimension(3), intent(in) :: ghosts_w, ghosts_v
+-    real(mk) :: start
++    real(8) :: start
+     !! Compute fftw forward transform
+     !! Omega is used to initialize the fftw buffer for input field.
+ 
+@@ -74,7 +74,7 @@ contains
+ 
+     real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z
+     real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z
+-    real(mk) :: start
++    real(8) :: start
+     !! Compute fftw forward transform
+     !! Omega is used to initialize the fftw buffer for input field.
+ 
+diff --git src/fftw/fft2d.f90 src/fftw/fft2d.f90
+index c56412f..7edbfd3 100755
+--- src/fftw/fft2d.f90
++++ src/fftw/fft2d.f90
+@@ -36,15 +36,15 @@ module fft2d
+   type(C_PTR) :: cbuffer2
+   !! Note Franck : check if local declarations of datain/out works and improve perfs.
+   !> Field (complex values) for fftw input
+-  complex(C_DOUBLE_COMPLEX), pointer :: datain1(:,:),datain2(:,:)
++  complex(C_FLOAT_COMPLEX), pointer :: datain1(:,:),datain2(:,:)
+   !> Field (real values) for fftw input
+-  real(C_DOUBLE), pointer :: rdatain1(:,:)
++  real(C_FLOAT), pointer :: rdatain1(:,:)
+   !> Field (complex values) for fftw (forward) output
+-  complex(C_DOUBLE_COMPLEX), pointer :: dataout1(:,:)
++  complex(C_FLOAT_COMPLEX), pointer :: dataout1(:,:)
+   !> Field (real values) for fftw output
+-  real(C_DOUBLE), pointer :: rdatain2(:,:)
++  real(C_FLOAT), pointer :: rdatain2(:,:)
+   !> Field (complex values) for fftw (forward) output
+-  complex(C_DOUBLE_COMPLEX), pointer :: dataout2(:,:)
++  complex(C_FLOAT_COMPLEX), pointer :: dataout2(:,:)
+   !> GLOBAL number of points in each direction
+   integer(C_INTPTR_T),pointer :: fft_resolution(:)
+   !> LOCAL resolution
+@@ -52,13 +52,13 @@ module fft2d
+   !> Offset in the direction of distribution
+   integer(c_INTPTR_T),dimension(2) :: local_offset
+   !> wave numbers for fft in x direction
+-  real(C_DOUBLE), pointer :: kx(:)
++  real(C_FLOAT), pointer :: kx(:)
+   !> wave numbers for fft in y direction
+-  real(C_DOUBLE), pointer :: ky(:)
++  real(C_FLOAT), pointer :: ky(:)
+   !> log file for fftw
+   character(len=20),parameter :: filename ="parmesfftw.log"
+   !> normalization factor
+-  real(C_DOUBLE) :: normFFT
++  real(C_FLOAT) :: normFFT
+   !> true if all the allocation stuff for global variables has been done.
+   logical :: is2DUpToDate = .false.
+ 
+@@ -81,7 +81,7 @@ contains
+     if(is2DUpToDate) return
+ 
+     ! init fftw mpi context
+-    call fftw_mpi_init()
++    call fftwf_mpi_init()
+ 
+     if(rank==0) open(unit=21,file=filename,form="formatted")
+ 
+@@ -90,27 +90,27 @@ contains
+     fft_resolution = resolution-1
+ 
+     ! compute "optimal" size (according to fftw) for local date (warning : dimension reversal)
+-    alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y),fft_resolution(c_X),main_comm,&
++    alloc_local = fftwf_mpi_local_size_2d_transposed(fft_resolution(c_Y),fft_resolution(c_X),main_comm,&
+          local_resolution(c_Y),local_offset(c_Y),local_resolution(c_X),local_offset(c_X));
+ 
+     ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!)
+-    cbuffer1 = fftw_alloc_complex(alloc_local)
++    cbuffer1 = fftwf_alloc_complex(alloc_local)
+     ! link datain and dataout1 to cbuffer, setting the right dimensions for each
+     call c_f_pointer(cbuffer1, datain1, [fft_resolution(c_X),local_resolution(c_Y)])
+     call c_f_pointer(cbuffer1, dataout1, [fft_resolution(c_Y),local_resolution(c_X)])
+ 
+     ! second buffer used for backward transform. Used to copy dataout1 into dataout2 (input for backward transform and filter)
+     ! and to save (in-place) the transform of the second component of the velocity
+-    cbuffer2 = fftw_alloc_complex(alloc_local)
++    cbuffer2 = fftwf_alloc_complex(alloc_local)
+     call c_f_pointer(cbuffer2, datain2, [fft_resolution(c_X),local_resolution(c_Y)])
+     call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)])
+ 
+     !   create MPI plan for in-place forward/backward DFT (note dimension reversal)
+-    plan_forward1 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,&
++    plan_forward1 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,&
+          main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+-    plan_backward1 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,&
++    plan_backward1 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,&
+          main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+-    plan_backward2 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,&
++    plan_backward2 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,&
+          main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+ 
+     call computeKxC(lengths(c_X))
+@@ -139,7 +139,7 @@ contains
+     end do
+ 
+     ! compute transform (as many times as desired)
+-    call fftw_mpi_execute_dft(plan_forward1, datain1, dataout1)
++    call fftwf_mpi_execute_dft(plan_forward1, datain1, dataout1)
+ 
+ !!$    do i = 1, fft_resolution(c_Y)
+ !!$       write(*,'(a,i5,a,16f10.4)') 'out[',rank,'] ', dataout1(i,1:local_resolution(c_X))
+@@ -147,8 +147,8 @@ contains
+ !!$
+     call filter_poisson_2d()
+ 
+-    call fftw_mpi_execute_dft(plan_backward1, dataout1, datain1)
+-    call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2)
++    call fftwf_mpi_execute_dft(plan_backward1, dataout1, datain1)
++    call fftwf_mpi_execute_dft(plan_backward2,dataout2,datain2)
+     do j = 1, local_resolution(c_Y)
+        do i = 1, fft_resolution(c_X)
+           velocity_x(i,j) = datain1(i,j)*normFFT
+@@ -183,7 +183,7 @@ contains
+     if(is2DUpToDate) return
+ 
+     ! init fftw mpi context
+-    call fftw_mpi_init()
++    call fftwf_mpi_init()
+ 
+     if(rank==0) open(unit=21,file=filename,form="formatted")
+ 
+@@ -191,11 +191,11 @@ contains
+     fft_resolution(:) = resolution(:)-1
+     halfLength = fft_resolution(c_X)/2+1
+     ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!)
+-    alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y),halfLength,main_comm,local_resolution(c_Y),&
++    alloc_local = fftwf_mpi_local_size_2d_transposed(fft_resolution(c_Y),halfLength,main_comm,local_resolution(c_Y),&
+          local_offset(c_Y),local_resolution(c_X),local_offset(c_X));
+ 
+     ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!)
+-    cbuffer1 = fftw_alloc_complex(alloc_local)
++    cbuffer1 = fftwf_alloc_complex(alloc_local)
+ 
+     ! link rdatain1 and dataout1 to cbuffer, setting the right dimensions for each
+     call c_f_pointer(cbuffer1, rdatain1, [2*halfLength,local_resolution(c_Y)])
+@@ -203,17 +203,17 @@ contains
+ 
+     ! second buffer used for backward transform. Used to copy dataout1 into dataout2 (input for backward transform and filter)
+     ! and to save (in-place) the transform of the second component of the velocity
+-    cbuffer2 = fftw_alloc_complex(alloc_local)
++    cbuffer2 = fftwf_alloc_complex(alloc_local)
+ 
+     call c_f_pointer(cbuffer2, rdatain2, [2*halfLength,local_resolution(c_Y)])
+     call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)])
+ 
+     !   create MPI plans for in-place forward/backward DFT (note dimension reversal)
+-    plan_forward1 = fftw_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, &
++    plan_forward1 = fftwf_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+-    plan_backward1 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, &
++    plan_backward1 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+-    plan_backward2 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, &
++    plan_backward2 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+ 
+     call computeKx(lengths(c_X))
+@@ -248,7 +248,7 @@ contains
+ !!$    end do
+ !!$
+     ! compute transform (as many times as desired)
+-    call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1)
++    call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1)
+ 
+ !!$    do i = 1, fft_resolution(c_Y)
+ !!$       write(*,'(a,i5,a,16f10.4)') 'aaaa[',rank,'] ', dataout1(i,1:local_resolution(c_X))
+@@ -261,8 +261,8 @@ contains
+     real(mk),dimension(:,:),intent(inout) :: velocity_x,velocity_y
+     integer(C_INTPTR_T) :: i, j
+ 
+-    call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1)
+-    call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2)
++    call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1)
++    call fftwf_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2)
+     do j = 1, local_resolution(c_Y)
+        do i = 1, fft_resolution(c_X)
+           velocity_x(i,j) = rdatain1(i,j)*normFFT
+@@ -288,7 +288,7 @@ contains
+     real(mk),dimension(:,:),intent(inout) :: omega
+     integer(C_INTPTR_T) :: i, j
+ 
+-    call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1)
++    call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1)
+     do j = 1, local_resolution(c_Y)
+        do i = 1, fft_resolution(c_X)
+           omega(i,j) = rdatain1(i,j)*normFFT
+@@ -390,7 +390,7 @@ contains
+   subroutine filter_poisson_2d()
+ 
+     integer(C_INTPTR_T) :: i, j
+-    complex(C_DOUBLE_COMPLEX) :: coeff
++    complex(C_FLOAT_COMPLEX) :: coeff
+     if(local_offset(c_X)==0) then
+        if(local_offset(c_Y) == 0) then
+           dataout1(1,1) = 0.0
+@@ -437,9 +437,9 @@ contains
+ 
+   subroutine filter_diffusion_2d(nudt)
+ 
+-    real(C_DOUBLE), intent(in) :: nudt
++    real(C_FLOAT), intent(in) :: nudt
+     integer(C_INTPTR_T) :: i, j
+-    complex(C_DOUBLE_COMPLEX) :: coeff
++    complex(C_FLOAT_COMPLEX) :: coeff
+ 
+     do i = 1,local_resolution(c_X)
+        do j = 1, fft_resolution(c_Y)
+@@ -452,20 +452,20 @@ contains
+ 
+   !> Clean fftw context (free memory, plans ...)
+   subroutine cleanFFTW_2d()
+-    call fftw_destroy_plan(plan_forward1)
+-    call fftw_destroy_plan(plan_backward1)
+-    !call fftw_destroy_plan(plan_forward2)
+-    !call fftw_destroy_plan(plan_backward2)
+-    call fftw_free(cbuffer1)
+-    call fftw_free(cbuffer2)
+-    call fftw_mpi_cleanup()
++    call fftwf_destroy_plan(plan_forward1)
++    call fftwf_destroy_plan(plan_backward1)
++    !call fftwf_destroy_plan(plan_forward2)
++    !call fftwf_destroy_plan(plan_backward2)
++    call fftwf_free(cbuffer1)
++    call fftwf_free(cbuffer2)
++    call fftwf_mpi_cleanup()
+     deallocate(fft_resolution)
+     if(rank==0) close(21)
+   end subroutine cleanFFTW_2d
+ 
+   subroutine fft2d_diagnostics(nbelem)
+     integer(C_INTPTR_T), intent(in) :: nbelem
+-    complex(C_DOUBLE_COMPLEX) :: memoryAllocated
++    complex(C_FLOAT_COMPLEX) :: memoryAllocated
+     memoryAllocated = real(nbelem*sizeof(memoryAllocated),mk)*1e-6
+     write(*,'(a,i5,a,i12,f10.2)') '[',rank,'] size of each buffer (elements / memory in MB):', &
+          nbelem, memoryAllocated
+@@ -499,10 +499,10 @@ contains
+     integer(C_INTPTR_T), dimension(2) :: n
+ 
+     !> Field (real values) for fftw input
+-    real(C_DOUBLE), pointer :: rdatain1Many(:,:,:)
++    real(C_FLOAT), pointer :: rdatain1Many(:,:,:)
+ 
+     ! init fftw mpi context
+-    call fftw_mpi_init()
++    call fftwf_mpi_init()
+     howmany = 1
+     if(rank==0) open(unit=21,file=filename,form="formatted")
+ 
+@@ -514,12 +514,12 @@ contains
+     n(1) = fft_resolution(2)
+     n(2) = halfLength
+     ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!)
+-    alloc_local = fftw_mpi_local_size_many_transposed(2,n,howmany,FFTW_MPI_DEFAULT_BLOCK,&
++    alloc_local = fftwf_mpi_local_size_many_transposed(2,n,howmany,FFTW_MPI_DEFAULT_BLOCK,&
+          FFTW_MPI_DEFAULT_BLOCK,main_comm,local_resolution(c_Y),&
+          local_offset(c_Y),local_resolution(c_X),local_offset(c_X));
+ 
+     ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!)
+-    cbuffer1 = fftw_alloc_complex(alloc_local)
++    cbuffer1 = fftwf_alloc_complex(alloc_local)
+ 
+     ! link rdatain1 and dataout1 to cbuffer, setting the right dimensions for each
+     call c_f_pointer(cbuffer1, rdatain1Many, [howmany,2*halfLength,local_resolution(c_Y)])
+@@ -527,17 +527,17 @@ contains
+ 
+     ! second buffer used for backward transform. Used to copy dataout1 into dataout2 (input for backward transform and filter)
+     ! and to save (in-place) the transform of the second component of the velocity
+-    cbuffer2 = fftw_alloc_complex(alloc_local)
++    cbuffer2 = fftwf_alloc_complex(alloc_local)
+ 
+     call c_f_pointer(cbuffer2, rdatain1Many, [howmany,2*halfLength,local_resolution(c_Y)])
+     call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)])
+ 
+     !   create MPI plans for in-place forward/backward DFT (note dimension reversal)
+-    plan_forward1 = fftw_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1Many, dataout1, &
++    plan_forward1 = fftwf_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1Many, dataout1, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+-    plan_backward1 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, &
++    plan_backward1 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+-    plan_backward2 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, &
++    plan_backward2 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+ 
+     call computeKx(lengths(c_X))
+diff --git src/fftw/fft3d.f90 src/fftw/fft3d.f90
+index 1b06457..a857855 100755
+--- src/fftw/fft3d.f90
++++ src/fftw/fft3d.f90
+@@ -40,15 +40,15 @@ module fft3d
+   type(C_PTR) :: cbuffer3
+   !! Note Franck : check if local declarations of datain/out works and improve perfs.
+   !> Field (complex values) for fftw input
+-  complex(C_DOUBLE_COMPLEX), pointer :: datain1(:,:,:)=>NULL(), datain2(:,:,:)=>NULL(), datain3(:,:,:)=>NULL()
++  complex(C_FLOAT_COMPLEX), pointer :: datain1(:,:,:)=>NULL(), datain2(:,:,:)=>NULL(), datain3(:,:,:)=>NULL()
+   !> Field (real values) for fftw input (these are only pointers to the cbuffers)
+-  real(C_DOUBLE), pointer :: rdatain1(:,:,:)=>NULL() ,rdatain2(:,:,:)=>NULL() ,rdatain3(:,:,:)=>NULL()
++  real(C_FLOAT), pointer :: rdatain1(:,:,:)=>NULL() ,rdatain2(:,:,:)=>NULL() ,rdatain3(:,:,:)=>NULL()
+   !> Field (real values) for fftw input in the fftw-many case
+-  real(C_DOUBLE), pointer :: rdatain_many(:,:,:,:)=>NULL()
++  real(C_FLOAT), pointer :: rdatain_many(:,:,:,:)=>NULL()
+   !> Field (complex values) for fftw (forward) output
+-  complex(C_DOUBLE_COMPLEX), pointer :: dataout1(:,:,:)=>NULL() ,dataout2(:,:,:)=>NULL() ,dataout3(:,:,:)=>NULL()
++  complex(C_FLOAT_COMPLEX), pointer :: dataout1(:,:,:)=>NULL() ,dataout2(:,:,:)=>NULL() ,dataout3(:,:,:)=>NULL()
+   !> Field (complex values) for fftw (forward) output in the fftw-many case
+-  complex(C_DOUBLE_COMPLEX), pointer :: dataout_many(:,:,:,:)=>NULL()
++  complex(C_FLOAT_COMPLEX), pointer :: dataout_many(:,:,:,:)=>NULL()
+   !> GLOBAL number of points in each direction on which fft is applied (--> corresponds to "real" resolution - 1)
+   integer(C_INTPTR_T),pointer :: fft_resolution(:)=>NULL()
+   !> LOCAL number of points for fft
+@@ -56,15 +56,15 @@ module fft3d
+   !> Offset in the direction of distribution
+   integer(c_INTPTR_T),dimension(3) :: local_offset
+   !> wave numbers for fft in x direction
+-  real(C_DOUBLE), pointer :: kx(:)
++  real(C_FLOAT), pointer :: kx(:)
+   !> wave numbers for fft in y direction
+-  real(C_DOUBLE), pointer :: ky(:)
++  real(C_FLOAT), pointer :: ky(:)
+   !> wave numbers for fft in z direction
+-  real(C_DOUBLE), pointer :: kz(:)
++  real(C_FLOAT), pointer :: kz(:)
+   !> log file for fftw
+   character(len=20),parameter :: filename ="parmesfftw.log"
+   !> normalization factor
+-  real(C_DOUBLE) :: normFFT
++  real(C_FLOAT) :: normFFT
+   !> true if we use fftw-many routines
+   logical :: manycase
+   !> true if all the allocation stuff for global variables has been done.
+@@ -89,7 +89,7 @@ contains
+     if(is3DUpToDate) return
+ 
+     ! init fftw mpi context
+-    call fftw_mpi_init()
++    call fftwf_mpi_init()
+ 
+     if(rank==0) open(unit=21,file=filename,form="formatted")
+ 
+@@ -98,7 +98,7 @@ contains
+     fft_resolution(:) = resolution(:)-1
+ 
+     ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal)
+-    alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),main_comm,&
++    alloc_local = fftwf_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),main_comm,&
+          local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y));
+ 
+     ! Set a default value for c_X components.
+@@ -106,35 +106,35 @@ contains
+     local_resolution(c_X) = fft_resolution(c_X)
+ 
+     ! allocate local buffer (used to save datain/dataout ==> in-place transform!!)
+-    cbuffer1 = fftw_alloc_complex(alloc_local)
++    cbuffer1 = fftwf_alloc_complex(alloc_local)
+     ! link datain and dataout to cbuffer, setting the right dimensions for each
+     call c_f_pointer(cbuffer1, datain1, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)])
+     call c_f_pointer(cbuffer1, dataout1, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)])
+ 
+     ! second buffer used for backward transform. Used to copy dataout into dataout2 (input for backward transform and filter)
+     ! and to save (in-place) the transform of the second component of the velocity
+-    cbuffer2 = fftw_alloc_complex(alloc_local)
++    cbuffer2 = fftwf_alloc_complex(alloc_local)
+     call c_f_pointer(cbuffer2, datain2, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)])
+     call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)])
+ 
+     ! second buffer used for backward transform. Used to copy dataout into dataout2 (input for backward transform and filter)
+     ! and to save (in-place) the transform of the second component of the velocity
+-    cbuffer3 = fftw_alloc_complex(alloc_local)
++    cbuffer3 = fftwf_alloc_complex(alloc_local)
+     call c_f_pointer(cbuffer3, datain3, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)])
+     call c_f_pointer(cbuffer3, dataout3, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)])
+ 
+     !   create MPI plan for in-place forward/backward DFT (note dimension reversal)
+-    plan_forward1 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,&
++    plan_forward1 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,&
+          main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+-    plan_backward1 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,&
++    plan_backward1 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,&
+          main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+-    plan_forward2 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain2,dataout2,&
++    plan_forward2 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain2,dataout2,&
+          main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+-    plan_backward2 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,&
++    plan_backward2 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,&
+          main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+-    plan_forward3 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain3,dataout3,&
++    plan_forward3 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain3,dataout3,&
+          main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+-    plan_backward3 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout3,datain3,&
++    plan_backward3 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout3,datain3,&
+          main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+ 
+     call computeKx(lengths(c_X))
+@@ -173,17 +173,17 @@ contains
+        end do
+     end do
+     ! compute transform (as many times as desired)
+-    call fftw_mpi_execute_dft(plan_forward1, datain1, dataout1)
+-    call fftw_mpi_execute_dft(plan_forward2, datain2, dataout2)
+-    call fftw_mpi_execute_dft(plan_forward3, datain3, dataout3)
++    call fftwf_mpi_execute_dft(plan_forward1, datain1, dataout1)
++    call fftwf_mpi_execute_dft(plan_forward2, datain2, dataout2)
++    call fftwf_mpi_execute_dft(plan_forward3, datain3, dataout3)
+ 
+     ! apply poisson filter
+     call filter_poisson_3d()
+ 
+     ! inverse transform to retrieve velocity
+-    call fftw_mpi_execute_dft(plan_backward1, dataout1,datain1)
+-    call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2)
+-    call fftw_mpi_execute_dft(plan_backward3,dataout3,datain3)
++    call fftwf_mpi_execute_dft(plan_backward1, dataout1,datain1)
++    call fftwf_mpi_execute_dft(plan_backward2,dataout2,datain2)
++    call fftwf_mpi_execute_dft(plan_backward3,dataout3,datain3)
+     do k =1, local_resolution(c_Z)
+        do j = 1, fft_resolution(c_Y)
+           do i = 1, fft_resolution(c_X)
+@@ -213,7 +213,7 @@ contains
+     if(is3DUpToDate) return
+ 
+     ! init fftw mpi context
+-    call fftw_mpi_init()
++    call fftwf_mpi_init()
+ 
+     if(rank==0) open(unit=21,file=filename,form="formatted")
+     allocate(fft_resolution(3))
+@@ -221,7 +221,7 @@ contains
+     halfLength = fft_resolution(c_X)/2+1
+ 
+     ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal)
+-    alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,&
++    alloc_local = fftwf_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,&
+          main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y));
+ 
+     ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases.
+@@ -229,9 +229,9 @@ contains
+     local_resolution(c_X) = halfLength
+ 
+     ! allocate local buffer (used to save datain/dataout ==> in-place transform!!)
+-    cbuffer1 = fftw_alloc_complex(alloc_local)
+-    cbuffer2 = fftw_alloc_complex(alloc_local)
+-    cbuffer3 = fftw_alloc_complex(alloc_local)
++    cbuffer1 = fftwf_alloc_complex(alloc_local)
++    cbuffer2 = fftwf_alloc_complex(alloc_local)
++    cbuffer3 = fftwf_alloc_complex(alloc_local)
+ 
+     ! link rdatain and dataout to cbuffer, setting the right dimensions for each
+     call c_f_pointer(cbuffer1, rdatain1, [2*halfLength,fft_resolution(c_Y),local_resolution(c_Z)])
+@@ -246,17 +246,17 @@ contains
+     rdatain3 = 0.0
+ 
+     !   create MPI plans for in-place forward/backward DFT (note dimension reversal)
+-   plan_forward1 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, &
++   plan_forward1 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+-    plan_backward1 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, &
++    plan_backward1 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+-    plan_forward2 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain2, dataout2, &
++    plan_forward2 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain2, dataout2, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+-    plan_backward2 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, &
++    plan_backward2 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+-    plan_forward3 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain3, dataout3, &
++    plan_forward3 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain3, dataout3, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+-    plan_backward3 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout3, rdatain3, &
++    plan_backward3 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout3, rdatain3, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+ 
+     call computeKx(lengths(c_X))
+@@ -279,7 +279,7 @@ contains
+ 
+     real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z
+     integer, dimension(3), intent(in) :: ghosts
+-    real(mk) :: start
++    real(8) :: start
+     integer(C_INTPTR_T) :: i,j,k, ig, jg, kg
+ 
+     ! ig, jg, kg are used to take into account
+@@ -301,9 +301,9 @@ contains
+ 
+     ! compute transforms for each component
+     start = MPI_WTIME()
+-    call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1)
+-    call fftw_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2)
+-    call fftw_mpi_execute_dft_r2c(plan_forward3, rdatain3, dataout3)
++    call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1)
++    call fftwf_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2)
++    call fftwf_mpi_execute_dft_r2c(plan_forward3, rdatain3, dataout3)
+     !!print *, "r2c time = ", MPI_WTIME() - start
+ 
+   end subroutine r2c_3d
+@@ -316,12 +316,12 @@ contains
+   subroutine c2r_3d(velocity_x,velocity_y,velocity_z, ghosts)
+     real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z
+     integer, dimension(3), intent(in) :: ghosts
+-    real(mk) :: start
++    real(8) :: start
+     integer(C_INTPTR_T) :: i,j,k, ig, jg, kg
+     start = MPI_WTIME()
+-    call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1)
+-    call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2)
+-    call fftw_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3)
++    call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1)
++    call fftwf_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2)
++    call fftwf_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3)
+ !!    print *, "c2r time : ", MPI_WTIME() -start
+     ! copy back to velocity and normalisation
+     do k =1, local_resolution(c_Z)
+@@ -346,7 +346,7 @@ contains
+ 
+     real(mk),dimension(:,:,:),intent(in) :: omega
+     integer, dimension(3), intent(in) :: ghosts
+-    real(mk) :: start
++    real(8) :: start
+     integer(C_INTPTR_T) :: i,j,k, ig, jg, kg
+ 
+     ! ig, jg, kg are used to take into account
+@@ -366,7 +366,7 @@ contains
+ 
+     ! compute transforms for each component
+     start = MPI_WTIME()
+-    call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1)
++    call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1)
+     !!print *, "r2c time = ", MPI_WTIME() - start
+ 
+   end subroutine r2c_scalar_3d
+@@ -377,10 +377,10 @@ contains
+   subroutine c2r_scalar_3d(omega, ghosts)
+     real(mk),dimension(:,:,:),intent(inout) :: omega
+     integer, dimension(3), intent(in) :: ghosts
+-    real(mk) :: start
++    real(8) :: start
+     integer(C_INTPTR_T) :: i,j,k, ig, jg, kg
+     start = MPI_WTIME()
+-    call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1)
++    call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1)
+ !!    print *, "c2r time : ", MPI_WTIME() -start
+     ! copy back to velocity and normalisation
+     do k =1, local_resolution(c_Z)
+@@ -412,7 +412,7 @@ contains
+     integer(C_INTPTR_T),dimension(3) :: n
+ 
+     ! init fftw mpi context
+-    call fftw_mpi_init()
++    call fftwf_mpi_init()
+     blocksize = FFTW_MPI_DEFAULT_BLOCK
+     if(rank==0) open(unit=21,file=filename,form="formatted")
+     allocate(fft_resolution(3))
+@@ -423,7 +423,7 @@ contains
+     n(3) = halfLength
+     howmany = 3
+     ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal)
+-    alloc_local = fftw_mpi_local_size_many_transposed(3,n,howmany,blocksize,blocksize,&
++    alloc_local = fftwf_mpi_local_size_many_transposed(3,n,howmany,blocksize,blocksize,&
+          main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y));
+ 
+     ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases.
+@@ -431,7 +431,7 @@ contains
+     local_resolution(c_X) = halfLength
+ 
+     ! allocate local buffer (used to save datain/dataout ==> in-place transform!!)
+-    cbuffer1 = fftw_alloc_complex(alloc_local)
++    cbuffer1 = fftwf_alloc_complex(alloc_local)
+ 
+     ! link rdatain and dataout to cbuffer, setting the right dimensions for each
+     call c_f_pointer(cbuffer1, rdatain_many, [howmany,2*halfLength,fft_resolution(c_Y),local_resolution(c_Z)])
+@@ -440,9 +440,9 @@ contains
+     !   create MPI plans for in-place forward/backward DFT (note dimension reversal)
+     n(3) = fft_resolution(c_X)
+ 
+-    plan_forward1 = fftw_mpi_plan_many_dft_r2c(3,n,howmany,blocksize,blocksize, rdatain_many, dataout_many, &
++    plan_forward1 = fftwf_mpi_plan_many_dft_r2c(3,n,howmany,blocksize,blocksize, rdatain_many, dataout_many, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT))
+-    plan_backward1 = fftw_mpi_plan_many_dft_c2r(3,n,howmany,blocksize,blocksize, dataout_many, rdatain_many, &
++    plan_backward1 = fftwf_mpi_plan_many_dft_c2r(3,n,howmany,blocksize,blocksize, dataout_many, rdatain_many, &
+          main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN))
+     call computeKx(lengths(c_X))
+     call computeKy(lengths(c_Y))
+@@ -464,7 +464,7 @@ contains
+   subroutine r2c_3d_many(omega_x,omega_y,omega_z)
+ 
+     real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z
+-    real(mk) :: start
++    real(8) :: start
+     integer(C_INTPTR_T) :: i,j,k
+ 
+     ! init
+@@ -480,7 +480,7 @@ contains
+ 
+     ! compute transform (as many times as desired)
+     start = MPI_WTIME()
+-    call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain_many, dataout_many)
++    call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain_many, dataout_many)
+ !!    print *, "r2c time = ", MPI_WTIME() - start
+ 
+   end subroutine r2c_3d_many
+@@ -491,11 +491,11 @@ contains
+   !!  @param[in,out] velocity_z 3d scalar field, z-component of the output vector field
+   subroutine c2r_3d_many(velocity_x,velocity_y,velocity_z)
+     real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z
+-    real(mk) :: start
++    real(8) :: start
+     integer(C_INTPTR_T) :: i,j,k
+ 
+     start = MPI_WTIME()
+-    call fftw_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many)
++    call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many)
+ !!    print *, "c2r time : ", MPI_WTIME() -start
+     do k =1, local_resolution(c_Z)
+        do j = 1, fft_resolution(c_Y)
+@@ -536,7 +536,7 @@ contains
+   !> Computation of frequencies coeff, over distributed direction(s)
+   !> @param lengths size of the domain
+   subroutine computeKy(length)
+-    real(C_DOUBLE), intent(in) :: length
++    real(C_FLOAT), intent(in) :: length
+ 
+     !! Local loops indices
+     integer(C_INTPTR_T) :: i
+@@ -589,8 +589,8 @@ contains
+   subroutine filter_poisson_3d()
+ 
+     integer(C_INTPTR_T) :: i,j,k
+-    complex(C_DOUBLE_COMPLEX) :: coeff
+-    complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2
++    complex(C_FLOAT_COMPLEX) :: coeff
++    complex(C_FLOAT_COMPLEX) :: buffer1,buffer2
+ 
+     ! Set first coeff (check for "all freq = 0" case)
+     if(local_offset(c_Y) == 0) then
+@@ -648,10 +648,10 @@ contains
+   !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step
+   subroutine filter_curl_diffusion_3d(nudt)
+ 
+-    real(C_DOUBLE), intent(in) :: nudt
++    real(C_FLOAT), intent(in) :: nudt
+     integer(C_INTPTR_T) :: i,j,k
+-    complex(C_DOUBLE_COMPLEX) :: coeff
+-    complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2
++    complex(C_FLOAT_COMPLEX) :: coeff
++    complex(C_FLOAT_COMPLEX) :: buffer1,buffer2
+ 
+     !! mind the transpose -> index inversion between y and z
+     do j = 1,local_resolution(c_Y)
+@@ -674,9 +674,9 @@ contains
+   !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step
+   subroutine filter_diffusion_3d(nudt)
+ 
+-    real(C_DOUBLE), intent(in) :: nudt
++    real(C_FLOAT), intent(in) :: nudt
+     integer(C_INTPTR_T) :: i,j,k
+-    complex(C_DOUBLE_COMPLEX) :: coeff
++    complex(C_FLOAT_COMPLEX) :: coeff
+ 
+     !! mind the transpose -> index inversion between y and z
+     do j = 1,local_resolution(c_Y)
+@@ -697,8 +697,8 @@ contains
+   subroutine filter_curl_3d()
+ 
+     integer(C_INTPTR_T) :: i,j,k
+-    complex(C_DOUBLE_COMPLEX) :: coeff
+-    complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2
++    complex(C_FLOAT_COMPLEX) :: coeff
++    complex(C_FLOAT_COMPLEX) :: buffer1,buffer2
+ 
+     !! mind the transpose -> index inversion between y and z
+     do j = 1,local_resolution(c_Y)
+@@ -721,8 +721,8 @@ contains
+   subroutine filter_projection_om_3d()
+ 
+     integer(C_INTPTR_T) :: i,j,k
+-    complex(C_DOUBLE_COMPLEX) :: coeff
+-    complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2,buffer3
++    complex(C_FLOAT_COMPLEX) :: coeff
++    complex(C_FLOAT_COMPLEX) :: buffer1,buffer2,buffer3
+ 
+     ! Set first coeff (check for "all freq = 0" case)
+     if(local_offset(c_Y) == 0) then
+@@ -783,9 +783,9 @@ contains
+   !! @param[in] dxf, dyf, dzf: grid filter size = domainLength/(CoarseRes-1)
+   subroutine filter_multires_om_3d(dxf, dyf, dzf)
+ 
+-    real(C_DOUBLE), intent(in) :: dxf, dyf, dzf
++    real(C_FLOAT), intent(in) :: dxf, dyf, dzf
+     integer(C_INTPTR_T) :: i,j,k
+-    real(C_DOUBLE) :: kxc, kyc, kzc
++    real(C_FLOAT) :: kxc, kyc, kzc
+ 
+     kxc = pi / dxf
+     kyc = pi / dyf
+@@ -810,7 +810,7 @@ contains
+   !! pressure from velocity in the Fourier space
+   subroutine filter_pressure_3d()
+     integer(C_INTPTR_T) :: i,j,k
+-    complex(C_DOUBLE_COMPLEX) :: coeff
++    complex(C_FLOAT_COMPLEX) :: coeff
+ 
+     ! Set first coeff (check for "all freq = 0" case)
+     if(local_offset(c_Y) == 0) then
+@@ -851,8 +851,8 @@ contains
+   subroutine filter_poisson_3d_many()
+ 
+     integer(C_INTPTR_T) :: i,j,k
+-    complex(C_DOUBLE_COMPLEX) :: coeff
+-    complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2
++    complex(C_FLOAT_COMPLEX) :: coeff
++    complex(C_FLOAT_COMPLEX) :: buffer1,buffer2
+ 
+     ! Set first coeff (check for "all freq = 0" case)
+     if(local_offset(c_Y) == 0) then
+@@ -908,10 +908,10 @@ contains
+   !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step
+   subroutine filter_diffusion_3d_many(nudt)
+ 
+-    real(C_DOUBLE), intent(in) :: nudt
++    real(C_FLOAT), intent(in) :: nudt
+     integer(C_INTPTR_T) :: i,j,k
+-    complex(C_DOUBLE_COMPLEX) :: coeff
+-    complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2
++    complex(C_FLOAT_COMPLEX) :: coeff
++    complex(C_FLOAT_COMPLEX) :: buffer1,buffer2
+ 
+     !! mind the transpose -> index inversion between y and z
+     do j = 1,local_resolution(c_Y)
+@@ -931,18 +931,18 @@ contains
+ 
+   !> Clean fftw context (free memory, plans ...)
+   subroutine cleanFFTW_3d()
+-    call fftw_destroy_plan(plan_forward1)
+-    call fftw_destroy_plan(plan_backward1)
++    call fftwf_destroy_plan(plan_forward1)
++    call fftwf_destroy_plan(plan_backward1)
+     if(.not.manycase) then
+-       call fftw_destroy_plan(plan_forward2)
+-       call fftw_destroy_plan(plan_backward2)
+-       call fftw_destroy_plan(plan_forward3)
+-       call fftw_destroy_plan(plan_backward3)
+-       call fftw_free(cbuffer2)
+-       call fftw_free(cbuffer3)
++       call fftwf_destroy_plan(plan_forward2)
++       call fftwf_destroy_plan(plan_backward2)
++       call fftwf_destroy_plan(plan_forward3)
++       call fftwf_destroy_plan(plan_backward3)
++       call fftwf_free(cbuffer2)
++       call fftwf_free(cbuffer3)
+     endif
+-    call fftw_free(cbuffer1)
+-    call fftw_mpi_cleanup()
++    call fftwf_free(cbuffer1)
++    call fftwf_mpi_cleanup()
+     deallocate(fft_resolution)
+     deallocate(kx,ky,kz)
+     if(rank==0) close(21)
+@@ -953,7 +953,7 @@ contains
+     integer(C_INTPTR_T), intent(in) :: nbelem
+     ! number of buffers used for fftw
+     integer, optional,intent(in) :: howmany
+-    complex(C_DOUBLE_COMPLEX) :: memoryAllocated
++    complex(C_FLOAT_COMPLEX) :: memoryAllocated
+ 
+     integer :: nbFields
+     if(present(howmany)) then
+diff --git src/main/main.f90 src/main/main.f90
+index 38cc6a5..0fc8f5f 100755
+--- src/main/main.f90
++++ src/main/main.f90
+@@ -9,7 +9,7 @@ use vectorcalculus
+ implicit none
+ 
+ integer :: info
+-real(mk) :: start, end
++real(8) :: start, end
+ 
+ !complex(mk), dimension(resolution(1),resolution(2)) :: omega,velocity_x,velocity_y
+ call MPI_Init(info)
+@@ -116,7 +116,8 @@ contains
+     real(mk),dimension(3) :: lengths,step
+     integer, dimension(3) :: ghosts_v, ghosts_w
+     integer(C_INTPTR_T),dimension(3) :: nfft,offset
+-    real(mk) :: error,start
++    real(mk) :: error
++    real(8) :: start
+     logical :: ok
+ 
+     if (rank==0) print *, " ======= Test 3D Poisson (r2c) solver for resolution  ", resolution
+diff --git src/scalesInterface/precision_tools.f90 src/scalesInterface/precision_tools.f90
+index 5dacf3f..4ae1a6a 100644
+--- src/scalesInterface/precision_tools.f90
++++ src/scalesInterface/precision_tools.f90
+@@ -20,15 +20,15 @@
+ !------------------------------------------------------------------------------
+ 
+ MODULE precision_tools
+-  use mpi, only: MPI_DOUBLE_PRECISION
++  use mpi, only: MPI_REAL
+   implicit None
+ 
+   !> Floats precision
+   INTEGER, PARAMETER  :: SP = kind(1.0)
+   INTEGER, PARAMETER  :: DP = kind(1.0d0)
+-  INTEGER, PARAMETER  :: WP = DP
++  INTEGER, PARAMETER  :: WP = SP
+   !> the MPI type for REAL exchanges in simple or double precision
+-  INTEGER, parameter     :: MPI_REAL_WP = MPI_DOUBLE_PRECISION
++  INTEGER, parameter     :: MPI_REAL_WP = MPI_REAL
+   REAL(WP), PRIVATE   :: sample_real_at_WP
+   REAL(WP), PARAMETER :: MAX_REAL_WP = HUGE(sample_real_at_WP)
+   INTEGER, PRIVATE    :: sample_int