diff --git a/HySoP/ParmesToSinglePrecision.patch b/HySoP/ParmesToSinglePrecision.patch new file mode 100644 index 0000000000000000000000000000000000000000..61079e7b3f8060cf3df23b072f30d21538f04727 --- /dev/null +++ b/HySoP/ParmesToSinglePrecision.patch @@ -0,0 +1,950 @@ +diff --git parmepy/constants.py parmepy/constants.py +index c92cc42..8d85877 100644 +--- parmepy/constants.py ++++ parmepy/constants.py +@@ -18,13 +18,13 @@ else: + + PI = math.pi + # Set default type for real and integer numbers +-PARMES_REAL = np.float64 ++PARMES_REAL = np.float32 + # type for array indices + PARMES_INDEX = np.uint32 + # type for integers +-PARMES_INTEGER = np.int64 ++PARMES_INTEGER = np.int32 + # float type for MPI messages +-PARMES_MPI_REAL = MPI.DOUBLE ++PARMES_MPI_REAL = MPI.REAL + ## default array layout (fortran or C convention) + ORDER = 'F' + # to check array ordering with : +diff --git parmepy/f2py/fftw2py.f90 parmepy/f2py/fftw2py.f90 +index 8645a9f..718eb19 100755 +--- parmepy/f2py/fftw2py.f90 ++++ parmepy/f2py/fftw2py.f90 +@@ -5,7 +5,7 @@ + module fftw2py + + use client_data +- use parmesparam ++ use parmesparam_sp + !> 2d case + use fft2d + !> 3d case +@@ -81,7 +81,7 @@ contains + subroutine solve_poisson_2d(omega,velocity_x,velocity_y) + real(pk),dimension(:,:),intent(in):: omega + real(pk),dimension(size(omega,1),size(omega,2)),intent(out) :: velocity_x,velocity_y +- real(pk) :: start ++ real(8) :: start + !f2py intent(in,out) :: velocity_x,velocity_y + start = MPI_WTime() + +@@ -117,7 +117,7 @@ contains + real(pk),dimension(:,:,:),intent(in):: omega_x,omega_y,omega_z + real(pk),dimension(size(omega_x,1),size(omega_y,2),size(omega_z,3)),intent(out) :: velocity_x,velocity_y,velocity_z + integer, dimension(3), intent(in) :: ghosts_vort, ghosts_velo +- real(pk) :: start ++ real(8) :: start + !f2py intent(in,out) :: velocity_x,velocity_y,velocity_z + start = MPI_WTime() + call r2c_3d(omega_x,omega_y,omega_z, ghosts_vort) +diff --git parmepy/f2py/scales2py.f90 parmepy/f2py/scales2py.f90 +index e574369..391e127 100755 +--- parmepy/f2py/scales2py.f90 ++++ parmepy/f2py/scales2py.f90 +@@ -5,7 +5,7 @@ use cart_topology, only : cart_create,set_group_size,discretisation_create,N_pro + use advec, only : advec_init,advec_step,advec_step_Inter_basic,advec_step_Inter_Two + use interpolation_velo, only : interpol_init + use mpi +-use parmesparam ++use parmesparam_sp + + + implicit none +@@ -91,7 +91,7 @@ contains + real(pk), dimension(size(vx,1),size(vx,2),size(vx,3)), intent(inout) :: scal + !f2py real(pk) intent(in,out), depend(size(vx,1)) :: scal + +- real(pk) :: t0 ++ real(8) :: t0 + + t0 = MPI_Wtime() + call advec_step(dt,vx,vy,vz,scal) +diff --git parmepy/operator/tests/test_velocity_correction.py parmepy/operator/tests/test_velocity_correction.py +old mode 100755 +new mode 100644 +diff --git setup.py.in setup.py.in +index 3c75c97..4a8c17a 100644 +--- setup.py.in ++++ setup.py.in +@@ -71,8 +71,8 @@ if(enable_fortran is "ON"): + fortran_src.append(fortran_dir+'fftw2py.f90') + fftwdir = '@FFTWLIB@' + fftwdir = os.path.split(fftwdir)[0] +- parmeslib.append('fftw3') +- parmeslib.append('fftw3_mpi') ++ parmeslib.append('fftw3f') ++ parmeslib.append('fftw3f_mpi') + parmes_libdir.append(fftwdir) + + withscales = '@WITH_SCALES@' +diff --git src/client_data.f90 src/client_data.f90 +index 46b5268..77178d9 100755 +--- src/client_data.f90 ++++ src/client_data.f90 +@@ -1,14 +1,14 @@ + !> Some global parameters and variables + module client_data + +- use MPI, only : MPI_DOUBLE_PRECISION ++ use MPI, only : MPI_REAL + use, intrinsic :: iso_c_binding ! required for fftw + implicit none + + !> kind for real variables (simple or double precision) +- integer, parameter :: mk = kind(1.0d0) ! double precision ++ integer, parameter :: mk = kind(1.0) ! single precision + !> kind for real variables in mpi routines +- integer, parameter :: mpi_mk = MPI_DOUBLE_PRECISION ++ integer, parameter :: mpi_mk = MPI_REAL + !> Problem dimension (model, required for ppm to work properly) + integer, parameter :: dime = 2 + !> Real dimension +@@ -26,7 +26,7 @@ module client_data + !> to activate (or not) screen output + logical,parameter :: verbose = .True. + !> i (sqrt(-1) ...) +- complex(C_DOUBLE_COMPLEX), parameter :: Icmplx = cmplx(0._mk,1._mk, kind=mk) ++ complex(C_FLOAT_COMPLEX), parameter :: Icmplx = cmplx(0._mk,1._mk, kind=mk) + !> tolerance used to compute error + real(mk), parameter :: tolerance = 1e-12 + +diff --git src/fftw/Poisson.f90 src/fftw/Poisson.f90 +index 78b355c..8804e3d 100755 +--- src/fftw/Poisson.f90 ++++ src/fftw/Poisson.f90 +@@ -53,7 +53,7 @@ contains + real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z + real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z + integer, dimension(3), intent(in) :: ghosts_w, ghosts_v +- real(mk) :: start ++ real(8) :: start + !! Compute fftw forward transform + !! Omega is used to initialize the fftw buffer for input field. + +@@ -74,7 +74,7 @@ contains + + real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z + real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z +- real(mk) :: start ++ real(8) :: start + !! Compute fftw forward transform + !! Omega is used to initialize the fftw buffer for input field. + +diff --git src/fftw/fft2d.f90 src/fftw/fft2d.f90 +index c56412f..7edbfd3 100755 +--- src/fftw/fft2d.f90 ++++ src/fftw/fft2d.f90 +@@ -36,15 +36,15 @@ module fft2d + type(C_PTR) :: cbuffer2 + !! Note Franck : check if local declarations of datain/out works and improve perfs. + !> Field (complex values) for fftw input +- complex(C_DOUBLE_COMPLEX), pointer :: datain1(:,:),datain2(:,:) ++ complex(C_FLOAT_COMPLEX), pointer :: datain1(:,:),datain2(:,:) + !> Field (real values) for fftw input +- real(C_DOUBLE), pointer :: rdatain1(:,:) ++ real(C_FLOAT), pointer :: rdatain1(:,:) + !> Field (complex values) for fftw (forward) output +- complex(C_DOUBLE_COMPLEX), pointer :: dataout1(:,:) ++ complex(C_FLOAT_COMPLEX), pointer :: dataout1(:,:) + !> Field (real values) for fftw output +- real(C_DOUBLE), pointer :: rdatain2(:,:) ++ real(C_FLOAT), pointer :: rdatain2(:,:) + !> Field (complex values) for fftw (forward) output +- complex(C_DOUBLE_COMPLEX), pointer :: dataout2(:,:) ++ complex(C_FLOAT_COMPLEX), pointer :: dataout2(:,:) + !> GLOBAL number of points in each direction + integer(C_INTPTR_T),pointer :: fft_resolution(:) + !> LOCAL resolution +@@ -52,13 +52,13 @@ module fft2d + !> Offset in the direction of distribution + integer(c_INTPTR_T),dimension(2) :: local_offset + !> wave numbers for fft in x direction +- real(C_DOUBLE), pointer :: kx(:) ++ real(C_FLOAT), pointer :: kx(:) + !> wave numbers for fft in y direction +- real(C_DOUBLE), pointer :: ky(:) ++ real(C_FLOAT), pointer :: ky(:) + !> log file for fftw + character(len=20),parameter :: filename ="parmesfftw.log" + !> normalization factor +- real(C_DOUBLE) :: normFFT ++ real(C_FLOAT) :: normFFT + !> true if all the allocation stuff for global variables has been done. + logical :: is2DUpToDate = .false. + +@@ -81,7 +81,7 @@ contains + if(is2DUpToDate) return + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + + if(rank==0) open(unit=21,file=filename,form="formatted") + +@@ -90,27 +90,27 @@ contains + fft_resolution = resolution-1 + + ! compute "optimal" size (according to fftw) for local date (warning : dimension reversal) +- alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y),fft_resolution(c_X),main_comm,& ++ alloc_local = fftwf_mpi_local_size_2d_transposed(fft_resolution(c_Y),fft_resolution(c_X),main_comm,& + local_resolution(c_Y),local_offset(c_Y),local_resolution(c_X),local_offset(c_X)); + + ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) + ! link datain and dataout1 to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, datain1, [fft_resolution(c_X),local_resolution(c_Y)]) + call c_f_pointer(cbuffer1, dataout1, [fft_resolution(c_Y),local_resolution(c_X)]) + + ! second buffer used for backward transform. Used to copy dataout1 into dataout2 (input for backward transform and filter) + ! and to save (in-place) the transform of the second component of the velocity +- cbuffer2 = fftw_alloc_complex(alloc_local) ++ cbuffer2 = fftwf_alloc_complex(alloc_local) + call c_f_pointer(cbuffer2, datain2, [fft_resolution(c_X),local_resolution(c_Y)]) + call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)]) + + ! create MPI plan for in-place forward/backward DFT (note dimension reversal) +- plan_forward1 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& ++ plan_forward1 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& + main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& ++ plan_backward1 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& + main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_backward2 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& ++ plan_backward2 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& + main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + + call computeKxC(lengths(c_X)) +@@ -139,7 +139,7 @@ contains + end do + + ! compute transform (as many times as desired) +- call fftw_mpi_execute_dft(plan_forward1, datain1, dataout1) ++ call fftwf_mpi_execute_dft(plan_forward1, datain1, dataout1) + + !!$ do i = 1, fft_resolution(c_Y) + !!$ write(*,'(a,i5,a,16f10.4)') 'out[',rank,'] ', dataout1(i,1:local_resolution(c_X)) +@@ -147,8 +147,8 @@ contains + !!$ + call filter_poisson_2d() + +- call fftw_mpi_execute_dft(plan_backward1, dataout1, datain1) +- call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2) ++ call fftwf_mpi_execute_dft(plan_backward1, dataout1, datain1) ++ call fftwf_mpi_execute_dft(plan_backward2,dataout2,datain2) + do j = 1, local_resolution(c_Y) + do i = 1, fft_resolution(c_X) + velocity_x(i,j) = datain1(i,j)*normFFT +@@ -183,7 +183,7 @@ contains + if(is2DUpToDate) return + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + + if(rank==0) open(unit=21,file=filename,form="formatted") + +@@ -191,11 +191,11 @@ contains + fft_resolution(:) = resolution(:)-1 + halfLength = fft_resolution(c_X)/2+1 + ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) +- alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y),halfLength,main_comm,local_resolution(c_Y),& ++ alloc_local = fftwf_mpi_local_size_2d_transposed(fft_resolution(c_Y),halfLength,main_comm,local_resolution(c_Y),& + local_offset(c_Y),local_resolution(c_X),local_offset(c_X)); + + ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) + + ! link rdatain1 and dataout1 to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, rdatain1, [2*halfLength,local_resolution(c_Y)]) +@@ -203,17 +203,17 @@ contains + + ! second buffer used for backward transform. Used to copy dataout1 into dataout2 (input for backward transform and filter) + ! and to save (in-place) the transform of the second component of the velocity +- cbuffer2 = fftw_alloc_complex(alloc_local) ++ cbuffer2 = fftwf_alloc_complex(alloc_local) + + call c_f_pointer(cbuffer2, rdatain2, [2*halfLength,local_resolution(c_Y)]) + call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)]) + + ! create MPI plans for in-place forward/backward DFT (note dimension reversal) +- plan_forward1 = fftw_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & ++ plan_forward1 = fftwf_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & ++ plan_backward1 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_backward2 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & ++ plan_backward2 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + + call computeKx(lengths(c_X)) +@@ -248,7 +248,7 @@ contains + !!$ end do + !!$ + ! compute transform (as many times as desired) +- call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) ++ call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) + + !!$ do i = 1, fft_resolution(c_Y) + !!$ write(*,'(a,i5,a,16f10.4)') 'aaaa[',rank,'] ', dataout1(i,1:local_resolution(c_X)) +@@ -261,8 +261,8 @@ contains + real(mk),dimension(:,:),intent(inout) :: velocity_x,velocity_y + integer(C_INTPTR_T) :: i, j + +- call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) +- call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) ++ call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) ++ call fftwf_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) + do j = 1, local_resolution(c_Y) + do i = 1, fft_resolution(c_X) + velocity_x(i,j) = rdatain1(i,j)*normFFT +@@ -288,7 +288,7 @@ contains + real(mk),dimension(:,:),intent(inout) :: omega + integer(C_INTPTR_T) :: i, j + +- call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) ++ call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) + do j = 1, local_resolution(c_Y) + do i = 1, fft_resolution(c_X) + omega(i,j) = rdatain1(i,j)*normFFT +@@ -390,7 +390,7 @@ contains + subroutine filter_poisson_2d() + + integer(C_INTPTR_T) :: i, j +- complex(C_DOUBLE_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: coeff + if(local_offset(c_X)==0) then + if(local_offset(c_Y) == 0) then + dataout1(1,1) = 0.0 +@@ -437,9 +437,9 @@ contains + + subroutine filter_diffusion_2d(nudt) + +- real(C_DOUBLE), intent(in) :: nudt ++ real(C_FLOAT), intent(in) :: nudt + integer(C_INTPTR_T) :: i, j +- complex(C_DOUBLE_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: coeff + + do i = 1,local_resolution(c_X) + do j = 1, fft_resolution(c_Y) +@@ -452,20 +452,20 @@ contains + + !> Clean fftw context (free memory, plans ...) + subroutine cleanFFTW_2d() +- call fftw_destroy_plan(plan_forward1) +- call fftw_destroy_plan(plan_backward1) +- !call fftw_destroy_plan(plan_forward2) +- !call fftw_destroy_plan(plan_backward2) +- call fftw_free(cbuffer1) +- call fftw_free(cbuffer2) +- call fftw_mpi_cleanup() ++ call fftwf_destroy_plan(plan_forward1) ++ call fftwf_destroy_plan(plan_backward1) ++ !call fftwf_destroy_plan(plan_forward2) ++ !call fftwf_destroy_plan(plan_backward2) ++ call fftwf_free(cbuffer1) ++ call fftwf_free(cbuffer2) ++ call fftwf_mpi_cleanup() + deallocate(fft_resolution) + if(rank==0) close(21) + end subroutine cleanFFTW_2d + + subroutine fft2d_diagnostics(nbelem) + integer(C_INTPTR_T), intent(in) :: nbelem +- complex(C_DOUBLE_COMPLEX) :: memoryAllocated ++ complex(C_FLOAT_COMPLEX) :: memoryAllocated + memoryAllocated = real(nbelem*sizeof(memoryAllocated),mk)*1e-6 + write(*,'(a,i5,a,i12,f10.2)') '[',rank,'] size of each buffer (elements / memory in MB):', & + nbelem, memoryAllocated +@@ -499,10 +499,10 @@ contains + integer(C_INTPTR_T), dimension(2) :: n + + !> Field (real values) for fftw input +- real(C_DOUBLE), pointer :: rdatain1Many(:,:,:) ++ real(C_FLOAT), pointer :: rdatain1Many(:,:,:) + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + howmany = 1 + if(rank==0) open(unit=21,file=filename,form="formatted") + +@@ -514,12 +514,12 @@ contains + n(1) = fft_resolution(2) + n(2) = halfLength + ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) +- alloc_local = fftw_mpi_local_size_many_transposed(2,n,howmany,FFTW_MPI_DEFAULT_BLOCK,& ++ alloc_local = fftwf_mpi_local_size_many_transposed(2,n,howmany,FFTW_MPI_DEFAULT_BLOCK,& + FFTW_MPI_DEFAULT_BLOCK,main_comm,local_resolution(c_Y),& + local_offset(c_Y),local_resolution(c_X),local_offset(c_X)); + + ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) + + ! link rdatain1 and dataout1 to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, rdatain1Many, [howmany,2*halfLength,local_resolution(c_Y)]) +@@ -527,17 +527,17 @@ contains + + ! second buffer used for backward transform. Used to copy dataout1 into dataout2 (input for backward transform and filter) + ! and to save (in-place) the transform of the second component of the velocity +- cbuffer2 = fftw_alloc_complex(alloc_local) ++ cbuffer2 = fftwf_alloc_complex(alloc_local) + + call c_f_pointer(cbuffer2, rdatain1Many, [howmany,2*halfLength,local_resolution(c_Y)]) + call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)]) + + ! create MPI plans for in-place forward/backward DFT (note dimension reversal) +- plan_forward1 = fftw_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1Many, dataout1, & ++ plan_forward1 = fftwf_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1Many, dataout1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & ++ plan_backward1 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_backward2 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & ++ plan_backward2 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + + call computeKx(lengths(c_X)) +diff --git src/fftw/fft3d.f90 src/fftw/fft3d.f90 +index 1b06457..a857855 100755 +--- src/fftw/fft3d.f90 ++++ src/fftw/fft3d.f90 +@@ -40,15 +40,15 @@ module fft3d + type(C_PTR) :: cbuffer3 + !! Note Franck : check if local declarations of datain/out works and improve perfs. + !> Field (complex values) for fftw input +- complex(C_DOUBLE_COMPLEX), pointer :: datain1(:,:,:)=>NULL(), datain2(:,:,:)=>NULL(), datain3(:,:,:)=>NULL() ++ complex(C_FLOAT_COMPLEX), pointer :: datain1(:,:,:)=>NULL(), datain2(:,:,:)=>NULL(), datain3(:,:,:)=>NULL() + !> Field (real values) for fftw input (these are only pointers to the cbuffers) +- real(C_DOUBLE), pointer :: rdatain1(:,:,:)=>NULL() ,rdatain2(:,:,:)=>NULL() ,rdatain3(:,:,:)=>NULL() ++ real(C_FLOAT), pointer :: rdatain1(:,:,:)=>NULL() ,rdatain2(:,:,:)=>NULL() ,rdatain3(:,:,:)=>NULL() + !> Field (real values) for fftw input in the fftw-many case +- real(C_DOUBLE), pointer :: rdatain_many(:,:,:,:)=>NULL() ++ real(C_FLOAT), pointer :: rdatain_many(:,:,:,:)=>NULL() + !> Field (complex values) for fftw (forward) output +- complex(C_DOUBLE_COMPLEX), pointer :: dataout1(:,:,:)=>NULL() ,dataout2(:,:,:)=>NULL() ,dataout3(:,:,:)=>NULL() ++ complex(C_FLOAT_COMPLEX), pointer :: dataout1(:,:,:)=>NULL() ,dataout2(:,:,:)=>NULL() ,dataout3(:,:,:)=>NULL() + !> Field (complex values) for fftw (forward) output in the fftw-many case +- complex(C_DOUBLE_COMPLEX), pointer :: dataout_many(:,:,:,:)=>NULL() ++ complex(C_FLOAT_COMPLEX), pointer :: dataout_many(:,:,:,:)=>NULL() + !> GLOBAL number of points in each direction on which fft is applied (--> corresponds to "real" resolution - 1) + integer(C_INTPTR_T),pointer :: fft_resolution(:)=>NULL() + !> LOCAL number of points for fft +@@ -56,15 +56,15 @@ module fft3d + !> Offset in the direction of distribution + integer(c_INTPTR_T),dimension(3) :: local_offset + !> wave numbers for fft in x direction +- real(C_DOUBLE), pointer :: kx(:) ++ real(C_FLOAT), pointer :: kx(:) + !> wave numbers for fft in y direction +- real(C_DOUBLE), pointer :: ky(:) ++ real(C_FLOAT), pointer :: ky(:) + !> wave numbers for fft in z direction +- real(C_DOUBLE), pointer :: kz(:) ++ real(C_FLOAT), pointer :: kz(:) + !> log file for fftw + character(len=20),parameter :: filename ="parmesfftw.log" + !> normalization factor +- real(C_DOUBLE) :: normFFT ++ real(C_FLOAT) :: normFFT + !> true if we use fftw-many routines + logical :: manycase + !> true if all the allocation stuff for global variables has been done. +@@ -89,7 +89,7 @@ contains + if(is3DUpToDate) return + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + + if(rank==0) open(unit=21,file=filename,form="formatted") + +@@ -98,7 +98,7 @@ contains + fft_resolution(:) = resolution(:)-1 + + ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal) +- alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),main_comm,& ++ alloc_local = fftwf_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),main_comm,& + local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y)); + + ! Set a default value for c_X components. +@@ -106,35 +106,35 @@ contains + local_resolution(c_X) = fft_resolution(c_X) + + ! allocate local buffer (used to save datain/dataout ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) + ! link datain and dataout to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, datain1, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)]) + call c_f_pointer(cbuffer1, dataout1, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)]) + + ! second buffer used for backward transform. Used to copy dataout into dataout2 (input for backward transform and filter) + ! and to save (in-place) the transform of the second component of the velocity +- cbuffer2 = fftw_alloc_complex(alloc_local) ++ cbuffer2 = fftwf_alloc_complex(alloc_local) + call c_f_pointer(cbuffer2, datain2, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)]) + call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)]) + + ! second buffer used for backward transform. Used to copy dataout into dataout2 (input for backward transform and filter) + ! and to save (in-place) the transform of the second component of the velocity +- cbuffer3 = fftw_alloc_complex(alloc_local) ++ cbuffer3 = fftwf_alloc_complex(alloc_local) + call c_f_pointer(cbuffer3, datain3, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)]) + call c_f_pointer(cbuffer3, dataout3, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)]) + + ! create MPI plan for in-place forward/backward DFT (note dimension reversal) +- plan_forward1 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& ++ plan_forward1 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& + main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& ++ plan_backward1 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& + main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_forward2 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain2,dataout2,& ++ plan_forward2 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain2,dataout2,& + main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward2 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& ++ plan_backward2 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& + main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_forward3 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain3,dataout3,& ++ plan_forward3 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain3,dataout3,& + main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward3 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout3,datain3,& ++ plan_backward3 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout3,datain3,& + main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + + call computeKx(lengths(c_X)) +@@ -173,17 +173,17 @@ contains + end do + end do + ! compute transform (as many times as desired) +- call fftw_mpi_execute_dft(plan_forward1, datain1, dataout1) +- call fftw_mpi_execute_dft(plan_forward2, datain2, dataout2) +- call fftw_mpi_execute_dft(plan_forward3, datain3, dataout3) ++ call fftwf_mpi_execute_dft(plan_forward1, datain1, dataout1) ++ call fftwf_mpi_execute_dft(plan_forward2, datain2, dataout2) ++ call fftwf_mpi_execute_dft(plan_forward3, datain3, dataout3) + + ! apply poisson filter + call filter_poisson_3d() + + ! inverse transform to retrieve velocity +- call fftw_mpi_execute_dft(plan_backward1, dataout1,datain1) +- call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2) +- call fftw_mpi_execute_dft(plan_backward3,dataout3,datain3) ++ call fftwf_mpi_execute_dft(plan_backward1, dataout1,datain1) ++ call fftwf_mpi_execute_dft(plan_backward2,dataout2,datain2) ++ call fftwf_mpi_execute_dft(plan_backward3,dataout3,datain3) + do k =1, local_resolution(c_Z) + do j = 1, fft_resolution(c_Y) + do i = 1, fft_resolution(c_X) +@@ -213,7 +213,7 @@ contains + if(is3DUpToDate) return + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + + if(rank==0) open(unit=21,file=filename,form="formatted") + allocate(fft_resolution(3)) +@@ -221,7 +221,7 @@ contains + halfLength = fft_resolution(c_X)/2+1 + + ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal) +- alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,& ++ alloc_local = fftwf_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,& + main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y)); + + ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases. +@@ -229,9 +229,9 @@ contains + local_resolution(c_X) = halfLength + + ! allocate local buffer (used to save datain/dataout ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) +- cbuffer2 = fftw_alloc_complex(alloc_local) +- cbuffer3 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) ++ cbuffer2 = fftwf_alloc_complex(alloc_local) ++ cbuffer3 = fftwf_alloc_complex(alloc_local) + + ! link rdatain and dataout to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, rdatain1, [2*halfLength,fft_resolution(c_Y),local_resolution(c_Z)]) +@@ -246,17 +246,17 @@ contains + rdatain3 = 0.0 + + ! create MPI plans for in-place forward/backward DFT (note dimension reversal) +- plan_forward1 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & ++ plan_forward1 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & ++ plan_backward1 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_forward2 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain2, dataout2, & ++ plan_forward2 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain2, dataout2, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward2 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & ++ plan_backward2 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_forward3 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain3, dataout3, & ++ plan_forward3 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain3, dataout3, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward3 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout3, rdatain3, & ++ plan_backward3 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout3, rdatain3, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + + call computeKx(lengths(c_X)) +@@ -279,7 +279,7 @@ contains + + real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z + integer, dimension(3), intent(in) :: ghosts +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k, ig, jg, kg + + ! ig, jg, kg are used to take into account +@@ -301,9 +301,9 @@ contains + + ! compute transforms for each component + start = MPI_WTIME() +- call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) +- call fftw_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2) +- call fftw_mpi_execute_dft_r2c(plan_forward3, rdatain3, dataout3) ++ call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) ++ call fftwf_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2) ++ call fftwf_mpi_execute_dft_r2c(plan_forward3, rdatain3, dataout3) + !!print *, "r2c time = ", MPI_WTIME() - start + + end subroutine r2c_3d +@@ -316,12 +316,12 @@ contains + subroutine c2r_3d(velocity_x,velocity_y,velocity_z, ghosts) + real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z + integer, dimension(3), intent(in) :: ghosts +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k, ig, jg, kg + start = MPI_WTIME() +- call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) +- call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) +- call fftw_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3) ++ call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) ++ call fftwf_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) ++ call fftwf_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3) + !! print *, "c2r time : ", MPI_WTIME() -start + ! copy back to velocity and normalisation + do k =1, local_resolution(c_Z) +@@ -346,7 +346,7 @@ contains + + real(mk),dimension(:,:,:),intent(in) :: omega + integer, dimension(3), intent(in) :: ghosts +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k, ig, jg, kg + + ! ig, jg, kg are used to take into account +@@ -366,7 +366,7 @@ contains + + ! compute transforms for each component + start = MPI_WTIME() +- call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) ++ call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) + !!print *, "r2c time = ", MPI_WTIME() - start + + end subroutine r2c_scalar_3d +@@ -377,10 +377,10 @@ contains + subroutine c2r_scalar_3d(omega, ghosts) + real(mk),dimension(:,:,:),intent(inout) :: omega + integer, dimension(3), intent(in) :: ghosts +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k, ig, jg, kg + start = MPI_WTIME() +- call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) ++ call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) + !! print *, "c2r time : ", MPI_WTIME() -start + ! copy back to velocity and normalisation + do k =1, local_resolution(c_Z) +@@ -412,7 +412,7 @@ contains + integer(C_INTPTR_T),dimension(3) :: n + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + blocksize = FFTW_MPI_DEFAULT_BLOCK + if(rank==0) open(unit=21,file=filename,form="formatted") + allocate(fft_resolution(3)) +@@ -423,7 +423,7 @@ contains + n(3) = halfLength + howmany = 3 + ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal) +- alloc_local = fftw_mpi_local_size_many_transposed(3,n,howmany,blocksize,blocksize,& ++ alloc_local = fftwf_mpi_local_size_many_transposed(3,n,howmany,blocksize,blocksize,& + main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y)); + + ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases. +@@ -431,7 +431,7 @@ contains + local_resolution(c_X) = halfLength + + ! allocate local buffer (used to save datain/dataout ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) + + ! link rdatain and dataout to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, rdatain_many, [howmany,2*halfLength,fft_resolution(c_Y),local_resolution(c_Z)]) +@@ -440,9 +440,9 @@ contains + ! create MPI plans for in-place forward/backward DFT (note dimension reversal) + n(3) = fft_resolution(c_X) + +- plan_forward1 = fftw_mpi_plan_many_dft_r2c(3,n,howmany,blocksize,blocksize, rdatain_many, dataout_many, & ++ plan_forward1 = fftwf_mpi_plan_many_dft_r2c(3,n,howmany,blocksize,blocksize, rdatain_many, dataout_many, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_many_dft_c2r(3,n,howmany,blocksize,blocksize, dataout_many, rdatain_many, & ++ plan_backward1 = fftwf_mpi_plan_many_dft_c2r(3,n,howmany,blocksize,blocksize, dataout_many, rdatain_many, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + call computeKx(lengths(c_X)) + call computeKy(lengths(c_Y)) +@@ -464,7 +464,7 @@ contains + subroutine r2c_3d_many(omega_x,omega_y,omega_z) + + real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k + + ! init +@@ -480,7 +480,7 @@ contains + + ! compute transform (as many times as desired) + start = MPI_WTIME() +- call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain_many, dataout_many) ++ call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain_many, dataout_many) + !! print *, "r2c time = ", MPI_WTIME() - start + + end subroutine r2c_3d_many +@@ -491,11 +491,11 @@ contains + !! @param[in,out] velocity_z 3d scalar field, z-component of the output vector field + subroutine c2r_3d_many(velocity_x,velocity_y,velocity_z) + real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k + + start = MPI_WTIME() +- call fftw_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many) ++ call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many) + !! print *, "c2r time : ", MPI_WTIME() -start + do k =1, local_resolution(c_Z) + do j = 1, fft_resolution(c_Y) +@@ -536,7 +536,7 @@ contains + !> Computation of frequencies coeff, over distributed direction(s) + !> @param lengths size of the domain + subroutine computeKy(length) +- real(C_DOUBLE), intent(in) :: length ++ real(C_FLOAT), intent(in) :: length + + !! Local loops indices + integer(C_INTPTR_T) :: i +@@ -589,8 +589,8 @@ contains + subroutine filter_poisson_3d() + + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + + ! Set first coeff (check for "all freq = 0" case) + if(local_offset(c_Y) == 0) then +@@ -648,10 +648,10 @@ contains + !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step + subroutine filter_curl_diffusion_3d(nudt) + +- real(C_DOUBLE), intent(in) :: nudt ++ real(C_FLOAT), intent(in) :: nudt + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + + !! mind the transpose -> index inversion between y and z + do j = 1,local_resolution(c_Y) +@@ -674,9 +674,9 @@ contains + !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step + subroutine filter_diffusion_3d(nudt) + +- real(C_DOUBLE), intent(in) :: nudt ++ real(C_FLOAT), intent(in) :: nudt + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: coeff + + !! mind the transpose -> index inversion between y and z + do j = 1,local_resolution(c_Y) +@@ -697,8 +697,8 @@ contains + subroutine filter_curl_3d() + + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + + !! mind the transpose -> index inversion between y and z + do j = 1,local_resolution(c_Y) +@@ -721,8 +721,8 @@ contains + subroutine filter_projection_om_3d() + + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2,buffer3 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2,buffer3 + + ! Set first coeff (check for "all freq = 0" case) + if(local_offset(c_Y) == 0) then +@@ -783,9 +783,9 @@ contains + !! @param[in] dxf, dyf, dzf: grid filter size = domainLength/(CoarseRes-1) + subroutine filter_multires_om_3d(dxf, dyf, dzf) + +- real(C_DOUBLE), intent(in) :: dxf, dyf, dzf ++ real(C_FLOAT), intent(in) :: dxf, dyf, dzf + integer(C_INTPTR_T) :: i,j,k +- real(C_DOUBLE) :: kxc, kyc, kzc ++ real(C_FLOAT) :: kxc, kyc, kzc + + kxc = pi / dxf + kyc = pi / dyf +@@ -810,7 +810,7 @@ contains + !! pressure from velocity in the Fourier space + subroutine filter_pressure_3d() + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: coeff + + ! Set first coeff (check for "all freq = 0" case) + if(local_offset(c_Y) == 0) then +@@ -851,8 +851,8 @@ contains + subroutine filter_poisson_3d_many() + + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + + ! Set first coeff (check for "all freq = 0" case) + if(local_offset(c_Y) == 0) then +@@ -908,10 +908,10 @@ contains + !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step + subroutine filter_diffusion_3d_many(nudt) + +- real(C_DOUBLE), intent(in) :: nudt ++ real(C_FLOAT), intent(in) :: nudt + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + + !! mind the transpose -> index inversion between y and z + do j = 1,local_resolution(c_Y) +@@ -931,18 +931,18 @@ contains + + !> Clean fftw context (free memory, plans ...) + subroutine cleanFFTW_3d() +- call fftw_destroy_plan(plan_forward1) +- call fftw_destroy_plan(plan_backward1) ++ call fftwf_destroy_plan(plan_forward1) ++ call fftwf_destroy_plan(plan_backward1) + if(.not.manycase) then +- call fftw_destroy_plan(plan_forward2) +- call fftw_destroy_plan(plan_backward2) +- call fftw_destroy_plan(plan_forward3) +- call fftw_destroy_plan(plan_backward3) +- call fftw_free(cbuffer2) +- call fftw_free(cbuffer3) ++ call fftwf_destroy_plan(plan_forward2) ++ call fftwf_destroy_plan(plan_backward2) ++ call fftwf_destroy_plan(plan_forward3) ++ call fftwf_destroy_plan(plan_backward3) ++ call fftwf_free(cbuffer2) ++ call fftwf_free(cbuffer3) + endif +- call fftw_free(cbuffer1) +- call fftw_mpi_cleanup() ++ call fftwf_free(cbuffer1) ++ call fftwf_mpi_cleanup() + deallocate(fft_resolution) + deallocate(kx,ky,kz) + if(rank==0) close(21) +@@ -953,7 +953,7 @@ contains + integer(C_INTPTR_T), intent(in) :: nbelem + ! number of buffers used for fftw + integer, optional,intent(in) :: howmany +- complex(C_DOUBLE_COMPLEX) :: memoryAllocated ++ complex(C_FLOAT_COMPLEX) :: memoryAllocated + + integer :: nbFields + if(present(howmany)) then +diff --git src/main/main.f90 src/main/main.f90 +index 38cc6a5..0fc8f5f 100755 +--- src/main/main.f90 ++++ src/main/main.f90 +@@ -9,7 +9,7 @@ use vectorcalculus + implicit none + + integer :: info +-real(mk) :: start, end ++real(8) :: start, end + + !complex(mk), dimension(resolution(1),resolution(2)) :: omega,velocity_x,velocity_y + call MPI_Init(info) +@@ -116,7 +116,8 @@ contains + real(mk),dimension(3) :: lengths,step + integer, dimension(3) :: ghosts_v, ghosts_w + integer(C_INTPTR_T),dimension(3) :: nfft,offset +- real(mk) :: error,start ++ real(mk) :: error ++ real(8) :: start + logical :: ok + + if (rank==0) print *, " ======= Test 3D Poisson (r2c) solver for resolution ", resolution +diff --git src/scalesInterface/precision_tools.f90 src/scalesInterface/precision_tools.f90 +index 5dacf3f..4ae1a6a 100644 +--- src/scalesInterface/precision_tools.f90 ++++ src/scalesInterface/precision_tools.f90 +@@ -20,15 +20,15 @@ + !------------------------------------------------------------------------------ + + MODULE precision_tools +- use mpi, only: MPI_DOUBLE_PRECISION ++ use mpi, only: MPI_REAL + implicit None + + !> Floats precision + INTEGER, PARAMETER :: SP = kind(1.0) + INTEGER, PARAMETER :: DP = kind(1.0d0) +- INTEGER, PARAMETER :: WP = DP ++ INTEGER, PARAMETER :: WP = SP + !> the MPI type for REAL exchanges in simple or double precision +- INTEGER, parameter :: MPI_REAL_WP = MPI_DOUBLE_PRECISION ++ INTEGER, parameter :: MPI_REAL_WP = MPI_REAL + REAL(WP), PRIVATE :: sample_real_at_WP + REAL(WP), PARAMETER :: MAX_REAL_WP = HUGE(sample_real_at_WP) + INTEGER, PRIVATE :: sample_int