From aeae15a3abf4c930125c0088f21cfedc117c91a1 Mon Sep 17 00:00:00 2001 From: Jean-Matthieu Etancelin <jean-matthieu.etancelin@univ-reims.fr> Date: Tue, 28 Oct 2014 10:46:24 +0100 Subject: [PATCH] Ajout patch pour passer en simple precision. Corrections de tests pour la simple presision. --- HySoP/hysop/domain/tests/test_control_box.py | 2 + HySoP/hysop/domain/tests/test_mesh.py | 6 +- .../hysop/domain/tests/test_regular_subset.py | 3 + HySoP/hysop/domain/tests/test_subset.py | 12 +- ToSinglePrecision.patch | 952 ++++++++++++++++++ 5 files changed, 967 insertions(+), 8 deletions(-) create mode 100644 ToSinglePrecision.patch diff --git a/HySoP/hysop/domain/tests/test_control_box.py b/HySoP/hysop/domain/tests/test_control_box.py index 20d85b710..dc7b2cda2 100644 --- a/HySoP/hysop/domain/tests/test_control_box.py +++ b/HySoP/hysop/domain/tests/test_control_box.py @@ -42,6 +42,8 @@ xdef = orig + 0.2 def check_control_box(discr, xr, lr): + xr = npw.asrealarray(xr) + lr = npw.asrealarray(lr) dim = len(discr.resolution) dom = Box(dimension=dim, length=ldom[:dim], origin=orig[:dim]) diff --git a/HySoP/hysop/domain/tests/test_mesh.py b/HySoP/hysop/domain/tests/test_mesh.py index 0999bd537..5cdb6a168 100644 --- a/HySoP/hysop/domain/tests/test_mesh.py +++ b/HySoP/hysop/domain/tests/test_mesh.py @@ -5,7 +5,7 @@ Testing regular grids. from parmepy.domain.box import Box from parmepy.tools.parameters import Discretization from parmepy.mpi import main_size, main_rank -import numpy as np +import parmepy.tools.numpywrappers as npw Nx = Ny = Nz = 32 @@ -26,7 +26,7 @@ def create_mesh(discr): assert mesh.discretization == discr assert (mesh.space_step == dom.length / (discr.resolution - 1)).all() # Position compared with global grid - shift = np.asarray([0, ] * dim) + shift = npw.asrealarray([0, ] * dim) shift[-1] = Nz / main_size * main_rank resolution = (discr.resolution - 1) / topo.shape + 2 * gh end = shift + resolution - 2 * gh @@ -52,7 +52,7 @@ def create_mesh(discr): assert (mesh.global_indices(point) == req_point).all() if main_rank == main_size - 1: assert mesh.is_inside(point) - pos = np.asarray(req_point) + pos = npw.asrealarray(req_point) pos[-1] = Nz / main_size - 1 pos += gh assert (mesh.local_indices(point) == pos).all() diff --git a/HySoP/hysop/domain/tests/test_regular_subset.py b/HySoP/hysop/domain/tests/test_regular_subset.py index d2d36b2a0..1c1fd4dc4 100644 --- a/HySoP/hysop/domain/tests/test_regular_subset.py +++ b/HySoP/hysop/domain/tests/test_regular_subset.py @@ -1,6 +1,7 @@ from parmepy.domain.subsets.boxes import SubBox from parmepy.tools.parameters import Discretization from parmepy import Field, Box +import parmepy.tools.numpywrappers as npw import numpy as np import math @@ -40,6 +41,8 @@ xdef = xdom + 0.2 def check_subset(discr, xr, lr): + xr = npw.asrealarray(xr) + lr = npw.asrealarray(lr) dim = len(discr.resolution) dom = Box(dimension=dim, length=ldom[:dim], origin=xdom[:dim]) diff --git a/HySoP/hysop/domain/tests/test_subset.py b/HySoP/hysop/domain/tests/test_subset.py index 555916e67..9c5bb9498 100644 --- a/HySoP/hysop/domain/tests/test_subset.py +++ b/HySoP/hysop/domain/tests/test_subset.py @@ -6,9 +6,11 @@ from parmepy.operator.hdf_io import HDF_Reader from parmepy.tools.parameters import Discretization, IO_params from parmepy import Field, Box from parmepy.mpi.topology import Cartesian +import parmepy.tools.numpywrappers as npw import numpy as np import math from parmepy.mpi import main_size +import mpi4py Nx = 128 @@ -17,13 +19,13 @@ Nz = 102 g = 2 -ldef = [0.3, 0.4, 1.0] +ldef = npw.asrealarray([0.3, 0.4, 1.0]) discr3D = Discretization([Nx + 1, Ny + 1, Nz + 1], [g - 1, g - 2, g]) discr2D = Discretization([Nx + 1, Ny + 1], [g - 1, g]) -xdom = np.asarray([0.1, -0.3, 0.5]) -ldom = np.asarray([math.pi * 2., ] * 3) -xdef = xdom + 0.2 -xpos = ldom * 0.5 +xdom = npw.asrealarray([0.1, -0.3, 0.5]) +ldom = npw.asrealarray([math.pi * 2., ] * 3) +xdef = npw.asrealarray(xdom + 0.2) +xpos = npw.asrealarray(ldom * 0.5) xpos[-1] += 0.1 import os working_dir = os.getcwd() + '/p' + str(main_size) + '/' diff --git a/ToSinglePrecision.patch b/ToSinglePrecision.patch new file mode 100644 index 000000000..4632059ed --- /dev/null +++ b/ToSinglePrecision.patch @@ -0,0 +1,952 @@ +diff --git parmepy/constants.py parmepy/constants.py +index d77a3e9..ee24b47 100644 +--- parmepy/constants.py ++++ parmepy/constants.py +@@ -10,7 +10,7 @@ from parmepy.mpi import MPI + + PI = math.pi + # Set default type for real and integer numbers +-PARMES_REAL = np.float64 ++PARMES_REAL = np.float32 + SIZEOF_PARMES_REAL = int(PARMES_REAL(1.).nbytes) + # type for array indices + PARMES_INDEX = np.uint32 +@@ -19,7 +19,7 @@ PARMES_INTEGER = np.int32 + # integer used for arrays dimensions + PARMES_DIM = np.int16 + # float type for MPI messages +-PARMES_MPI_REAL = MPI.DOUBLE ++PARMES_MPI_REAL = MPI.REAL + # int type for MPI messages + PARMES_MPI_INTEGER = MPI.INT + ## default array layout (fortran or C convention) +diff --git parmepy/f2py/fftw2py.f90 parmepy/f2py/fftw2py.f90 +index c032cd0..113db90 100755 +--- parmepy/f2py/fftw2py.f90 ++++ parmepy/f2py/fftw2py.f90 +@@ -5,7 +5,7 @@ + module fftw2py + + use client_data +- use parmesparam ++ use parmesparam_sp + !> 2d case + use fft2d + !> 3d case +@@ -81,7 +81,7 @@ contains + subroutine solve_poisson_2d(omega,velocity_x,velocity_y) + real(pk),dimension(:,:),intent(in):: omega + real(pk),dimension(size(omega,1),size(omega,2)),intent(out) :: velocity_x,velocity_y +- real(pk) :: start ++ real(8) :: start + !f2py intent(in,out) :: velocity_x,velocity_y + start = MPI_WTime() + +@@ -117,7 +117,7 @@ contains + real(pk),dimension(:,:,:),intent(in):: omega_x,omega_y,omega_z + real(pk),dimension(size(omega_x,1),size(omega_y,2),size(omega_z,3)),intent(out) :: velocity_x,velocity_y,velocity_z + integer, dimension(3), intent(in) :: ghosts_vort, ghosts_velo +- real(pk) :: start ++ real(8) :: start + !f2py intent(in,out) :: velocity_x,velocity_y,velocity_z + start = MPI_WTime() + call r2c_3d(omega_x,omega_y,omega_z, ghosts_vort) +diff --git parmepy/f2py/scales2py.f90 parmepy/f2py/scales2py.f90 +index 7884535..f901685 100755 +--- parmepy/f2py/scales2py.f90 ++++ parmepy/f2py/scales2py.f90 +@@ -6,7 +6,7 @@ use advec, only : advec_init,advec_step,advec_step_Inter_basic,advec_step_Inter_ + use advec_vect, only : advec_step_Vect,advec_step_Inter_basic_Vect + use interpolation_velo, only : interpol_init + use mpi +-use parmesparam ++use parmesparam_sp + + + implicit none +@@ -93,7 +93,7 @@ contains + real(pk), dimension(size(vx,1),size(vx,2),size(vx,3)), intent(inout) :: scal + !f2py real(pk) intent(in,out), depend(size(vx,1)) :: scal + +- real(pk) :: t0 ++ real(8) :: t0 + + t0 = MPI_Wtime() + call advec_step(dt,vx,vy,vz,scal) +diff --git setup.py.in setup.py.in +index dec8ebd..559ad8c 100644 +--- setup.py.in ++++ setup.py.in +@@ -70,8 +70,8 @@ if enable_fortran is "ON": + fortran_src.append(fortran_dir+'parameters.f90') + fortran_src.append(fortran_dir+'fftw2py.f90') + fftwdir = '@FFTWLIB@' +- parmeslib.append('fftw3') +- parmeslib.append('fftw3_mpi') ++ parmeslib.append('fftw3f') ++ parmeslib.append('fftw3f_mpi') + parmes_libdir.append(fftwdir) + else: + packages.append('parmepy.fakef2py') +diff --git src/client_data.f90 src/client_data.f90 +index 46b5268..77178d9 100755 +--- src/client_data.f90 ++++ src/client_data.f90 +@@ -1,14 +1,14 @@ + !> Some global parameters and variables + module client_data + +- use MPI, only : MPI_DOUBLE_PRECISION ++ use MPI, only : MPI_REAL + use, intrinsic :: iso_c_binding ! required for fftw + implicit none + + !> kind for real variables (simple or double precision) +- integer, parameter :: mk = kind(1.0d0) ! double precision ++ integer, parameter :: mk = kind(1.0) ! single precision + !> kind for real variables in mpi routines +- integer, parameter :: mpi_mk = MPI_DOUBLE_PRECISION ++ integer, parameter :: mpi_mk = MPI_REAL + !> Problem dimension (model, required for ppm to work properly) + integer, parameter :: dime = 2 + !> Real dimension +@@ -26,7 +26,7 @@ module client_data + !> to activate (or not) screen output + logical,parameter :: verbose = .True. + !> i (sqrt(-1) ...) +- complex(C_DOUBLE_COMPLEX), parameter :: Icmplx = cmplx(0._mk,1._mk, kind=mk) ++ complex(C_FLOAT_COMPLEX), parameter :: Icmplx = cmplx(0._mk,1._mk, kind=mk) + !> tolerance used to compute error + real(mk), parameter :: tolerance = 1e-12 + +diff --git src/fftw/Poisson.f90 src/fftw/Poisson.f90 +index 78b355c..8804e3d 100755 +--- src/fftw/Poisson.f90 ++++ src/fftw/Poisson.f90 +@@ -53,7 +53,7 @@ contains + real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z + real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z + integer, dimension(3), intent(in) :: ghosts_w, ghosts_v +- real(mk) :: start ++ real(8) :: start + !! Compute fftw forward transform + !! Omega is used to initialize the fftw buffer for input field. + +@@ -74,7 +74,7 @@ contains + + real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z + real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z +- real(mk) :: start ++ real(8) :: start + !! Compute fftw forward transform + !! Omega is used to initialize the fftw buffer for input field. + +diff --git src/fftw/fft2d.f90 src/fftw/fft2d.f90 +index cf2a69a..fd0b337 100755 +--- src/fftw/fft2d.f90 ++++ src/fftw/fft2d.f90 +@@ -42,15 +42,15 @@ module fft2d + type(C_PTR) :: cbuffer2 + !! Note Franck : check if local declarations of datain/out works and improve perfs. + !> Field (complex values) for fftw input +- complex(C_DOUBLE_COMPLEX), pointer :: datain1(:,:),datain2(:,:) ++ complex(C_FLOAT_COMPLEX), pointer :: datain1(:,:),datain2(:,:) + !> Field (real values) for fftw input +- real(C_DOUBLE), pointer :: rdatain1(:,:) ++ real(C_FLOAT), pointer :: rdatain1(:,:) + !> Field (complex values) for fftw (forward) output +- complex(C_DOUBLE_COMPLEX), pointer :: dataout1(:,:) ++ complex(C_FLOAT_COMPLEX), pointer :: dataout1(:,:) + !> Field (real values) for fftw output +- real(C_DOUBLE), pointer :: rdatain2(:,:) ++ real(C_FLOAT), pointer :: rdatain2(:,:) + !> Field (complex values) for fftw (forward) output +- complex(C_DOUBLE_COMPLEX), pointer :: dataout2(:,:) ++ complex(C_FLOAT_COMPLEX), pointer :: dataout2(:,:) + !> GLOBAL number of points in each direction + integer(C_INTPTR_T),pointer :: fft_resolution(:) + !> LOCAL resolution +@@ -58,13 +58,13 @@ module fft2d + !> Offset in the direction of distribution + integer(c_INTPTR_T),dimension(2) :: local_offset + !> wave numbers for fft in x direction +- real(C_DOUBLE), pointer :: kx(:) ++ real(C_FLOAT), pointer :: kx(:) + !> wave numbers for fft in y direction +- real(C_DOUBLE), pointer :: ky(:) ++ real(C_FLOAT), pointer :: ky(:) + !> log file for fftw + character(len=20),parameter :: filename ="parmesfftw.log" + !> normalization factor +- real(C_DOUBLE) :: normFFT ++ real(C_FLOAT) :: normFFT + !> true if all the allocation stuff for global variables has been done. + logical :: is2DUpToDate = .false. + +@@ -88,7 +88,7 @@ contains + if(is2DUpToDate) return + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + + if(rank==0) open(unit=21,file=filename,form="formatted") + +@@ -98,13 +98,13 @@ contains + + ! compute "optimal" size (according to fftw) for local date + ! (warning : dimension reversal) +- alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y), & ++ alloc_local = fftwf_mpi_local_size_2d_transposed(fft_resolution(c_Y), & + fft_resolution(c_X),main_comm, local_resolution(c_Y), & + local_offset(c_Y), local_resolution(c_X),local_offset(c_X)); + + ! allocate local buffer (used to save datain/dataout1 + ! ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) + ! link datain and dataout1 to cbuffer, setting the right dimensions + call c_f_pointer(cbuffer1, datain1, & + [fft_resolution(c_X),local_resolution(c_Y)]) +@@ -115,17 +115,17 @@ contains + ! into dataout2 (input for backward transform and filter) + ! and to save (in-place) the transform of the second component + ! of the velocity +- cbuffer2 = fftw_alloc_complex(alloc_local) ++ cbuffer2 = fftwf_alloc_complex(alloc_local) + call c_f_pointer(cbuffer2, datain2,& + [fft_resolution(c_X),local_resolution(c_Y)]) + call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)]) + + ! create MPI plan for in-place forward/backward DFT (note dimension reversal) +- plan_forward1 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& ++ plan_forward1 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& + main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& ++ plan_backward1 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& + main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_backward2 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& ++ plan_backward2 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& + main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + + call computeKxC(lengths(c_X)) +@@ -154,7 +154,7 @@ contains + end do + + ! compute transform (as many times as desired) +- call fftw_mpi_execute_dft(plan_forward1, datain1, dataout1) ++ call fftwf_mpi_execute_dft(plan_forward1, datain1, dataout1) + + !!$ do i = 1, fft_resolution(c_Y) + !!$ write(*,'(a,i5,a,16f10.4)') 'out[',rank,'] ', dataout1(i,1:local_resolution(c_X)) +@@ -162,8 +162,8 @@ contains + !!$ + call filter_poisson_2d() + +- call fftw_mpi_execute_dft(plan_backward1, dataout1, datain1) +- call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2) ++ call fftwf_mpi_execute_dft(plan_backward1, dataout1, datain1) ++ call fftwf_mpi_execute_dft(plan_backward2,dataout2,datain2) + do j = 1, local_resolution(c_Y) + do i = 1, fft_resolution(c_X) + velocity_x(i,j) = datain1(i,j)*normFFT +@@ -198,7 +198,7 @@ contains + if(is2DUpToDate) return + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + + if(rank==0) open(unit=21,file=filename,form="formatted") + +@@ -206,11 +206,11 @@ contains + fft_resolution(:) = resolution(:)-1 + halfLength = fft_resolution(c_X)/2+1 + ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) +- alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y),halfLength,main_comm,local_resolution(c_Y),& ++ alloc_local = fftwf_mpi_local_size_2d_transposed(fft_resolution(c_Y),halfLength,main_comm,local_resolution(c_Y),& + local_offset(c_Y),local_resolution(c_X),local_offset(c_X)); + + ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) + + ! link rdatain1 and dataout1 to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, rdatain1, [2*halfLength,local_resolution(c_Y)]) +@@ -218,17 +218,17 @@ contains + + ! second buffer used for backward transform. Used to copy dataout1 into dataout2 (input for backward transform and filter) + ! and to save (in-place) the transform of the second component of the velocity +- cbuffer2 = fftw_alloc_complex(alloc_local) ++ cbuffer2 = fftwf_alloc_complex(alloc_local) + + call c_f_pointer(cbuffer2, rdatain2, [2*halfLength,local_resolution(c_Y)]) + call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)]) + + ! create MPI plans for in-place forward/backward DFT (note dimension reversal) +- plan_forward1 = fftw_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & ++ plan_forward1 = fftwf_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & ++ plan_backward1 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_backward2 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & ++ plan_backward2 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + + call computeKx(lengths(c_X)) +@@ -263,7 +263,7 @@ contains + !!$ end do + !!$ + ! compute transform (as many times as desired) +- call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) ++ call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) + + !!$ do i = 1, fft_resolution(c_Y) + !!$ write(*,'(a,i5,a,16f10.4)') 'aaaa[',rank,'] ', dataout1(i,1:local_resolution(c_X)) +@@ -276,8 +276,8 @@ contains + real(mk),dimension(:,:),intent(inout) :: velocity_x,velocity_y + integer(C_INTPTR_T) :: i, j + +- call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) +- call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) ++ call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) ++ call fftwf_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) + do j = 1, local_resolution(c_Y) + do i = 1, fft_resolution(c_X) + velocity_x(i,j) = rdatain1(i,j)*normFFT +@@ -303,7 +303,7 @@ contains + real(mk),dimension(:,:),intent(inout) :: omega + integer(C_INTPTR_T) :: i, j + +- call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) ++ call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) + do j = 1, local_resolution(c_Y) + do i = 1, fft_resolution(c_X) + omega(i,j) = rdatain1(i,j)*normFFT +@@ -405,7 +405,7 @@ contains + subroutine filter_poisson_2d() + + integer(C_INTPTR_T) :: i, j +- complex(C_DOUBLE_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: coeff + if(local_offset(c_X)==0) then + if(local_offset(c_Y) == 0) then + dataout1(1,1) = 0.0 +@@ -452,9 +452,9 @@ contains + + subroutine filter_diffusion_2d(nudt) + +- real(C_DOUBLE), intent(in) :: nudt ++ real(C_FLOAT), intent(in) :: nudt + integer(C_INTPTR_T) :: i, j +- complex(C_DOUBLE_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: coeff + + do i = 1,local_resolution(c_X) + do j = 1, fft_resolution(c_Y) +@@ -467,20 +467,20 @@ contains + + !> Clean fftw context (free memory, plans ...) + subroutine cleanFFTW_2d() +- call fftw_destroy_plan(plan_forward1) +- call fftw_destroy_plan(plan_backward1) +- !call fftw_destroy_plan(plan_forward2) +- !call fftw_destroy_plan(plan_backward2) +- call fftw_free(cbuffer1) +- call fftw_free(cbuffer2) +- call fftw_mpi_cleanup() ++ call fftwf_destroy_plan(plan_forward1) ++ call fftwf_destroy_plan(plan_backward1) ++ !call fftwf_destroy_plan(plan_forward2) ++ !call fftwf_destroy_plan(plan_backward2) ++ call fftwf_free(cbuffer1) ++ call fftwf_free(cbuffer2) ++ call fftwf_mpi_cleanup() + deallocate(fft_resolution) + if(rank==0) close(21) + end subroutine cleanFFTW_2d + + subroutine fft2d_diagnostics(nbelem) + integer(C_INTPTR_T), intent(in) :: nbelem +- complex(C_DOUBLE_COMPLEX) :: memoryAllocated ++ complex(C_FLOAT_COMPLEX) :: memoryAllocated + memoryAllocated = real(nbelem*sizeof(memoryAllocated),mk)*1e-6 + write(*,'(a,i5,a,i12,f10.2)') '[',rank,'] size of each buffer (elements / memory in MB):', & + nbelem, memoryAllocated +@@ -514,10 +514,10 @@ contains + integer(C_INTPTR_T), dimension(2) :: n + + !> Field (real values) for fftw input +- real(C_DOUBLE), pointer :: rdatain1Many(:,:,:) ++ real(C_FLOAT), pointer :: rdatain1Many(:,:,:) + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + howmany = 1 + if(rank==0) open(unit=21,file=filename,form="formatted") + +@@ -527,12 +527,12 @@ contains + n(1) = fft_resolution(2) + n(2) = halfLength + ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) +- alloc_local = fftw_mpi_local_size_many_transposed(2,n,howmany,FFTW_MPI_DEFAULT_BLOCK,& ++ alloc_local = fftwf_mpi_local_size_many_transposed(2,n,howmany,FFTW_MPI_DEFAULT_BLOCK,& + FFTW_MPI_DEFAULT_BLOCK,main_comm,local_resolution(c_Y),& + local_offset(c_Y),local_resolution(c_X),local_offset(c_X)); + + ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) + + ! link rdatain1 and dataout1 to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, rdatain1Many, [howmany,2*halfLength,local_resolution(c_Y)]) +@@ -540,17 +540,17 @@ contains + + ! second buffer used for backward transform. Used to copy dataout1 into dataout2 (input for backward transform and filter) + ! and to save (in-place) the transform of the second component of the velocity +- cbuffer2 = fftw_alloc_complex(alloc_local) ++ cbuffer2 = fftwf_alloc_complex(alloc_local) + + call c_f_pointer(cbuffer2, rdatain1Many, [howmany,2*halfLength,local_resolution(c_Y)]) + call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)]) + + ! create MPI plans for in-place forward/backward DFT (note dimension reversal) +- plan_forward1 = fftw_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1Many, dataout1, & ++ plan_forward1 = fftwf_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1Many, dataout1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & ++ plan_backward1 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_backward2 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & ++ plan_backward2 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + + call computeKx(lengths(c_X)) +diff --git src/fftw/fft3d.f90 src/fftw/fft3d.f90 +index 0ad428e..dcefd1f 100755 +--- src/fftw/fft3d.f90 ++++ src/fftw/fft3d.f90 +@@ -40,15 +40,15 @@ module fft3d + type(C_PTR) :: cbuffer3 + !! Note Franck : check if local declarations of datain/out works and improve perfs. + !> Field (complex values) for fftw input +- complex(C_DOUBLE_COMPLEX), pointer :: datain1(:,:,:)=>NULL(), datain2(:,:,:)=>NULL(), datain3(:,:,:)=>NULL() ++ complex(C_FLOAT_COMPLEX), pointer :: datain1(:,:,:)=>NULL(), datain2(:,:,:)=>NULL(), datain3(:,:,:)=>NULL() + !> Field (real values) for fftw input (these are only pointers to the cbuffers) +- real(C_DOUBLE), pointer :: rdatain1(:,:,:)=>NULL() ,rdatain2(:,:,:)=>NULL() ,rdatain3(:,:,:)=>NULL() ++ real(C_FLOAT), pointer :: rdatain1(:,:,:)=>NULL() ,rdatain2(:,:,:)=>NULL() ,rdatain3(:,:,:)=>NULL() + !> Field (real values) for fftw input in the fftw-many case +- real(C_DOUBLE), pointer :: rdatain_many(:,:,:,:)=>NULL() ++ real(C_FLOAT), pointer :: rdatain_many(:,:,:,:)=>NULL() + !> Field (complex values) for fftw (forward) output +- complex(C_DOUBLE_COMPLEX), pointer :: dataout1(:,:,:)=>NULL() ,dataout2(:,:,:)=>NULL() ,dataout3(:,:,:)=>NULL() ++ complex(C_FLOAT_COMPLEX), pointer :: dataout1(:,:,:)=>NULL() ,dataout2(:,:,:)=>NULL() ,dataout3(:,:,:)=>NULL() + !> Field (complex values) for fftw (forward) output in the fftw-many case +- complex(C_DOUBLE_COMPLEX), pointer :: dataout_many(:,:,:,:)=>NULL() ++ complex(C_FLOAT_COMPLEX), pointer :: dataout_many(:,:,:,:)=>NULL() + !> GLOBAL number of points in each direction on which fft is applied (--> corresponds to "real" resolution - 1) + integer(C_INTPTR_T),pointer :: fft_resolution(:)=>NULL() + !> LOCAL number of points for fft +@@ -56,15 +56,15 @@ module fft3d + !> Offset in the direction of distribution + integer(c_INTPTR_T),dimension(3) :: local_offset + !> wave numbers for fft in x direction +- real(C_DOUBLE), pointer :: kx(:) ++ real(C_FLOAT), pointer :: kx(:) + !> wave numbers for fft in y direction +- real(C_DOUBLE), pointer :: ky(:) ++ real(C_FLOAT), pointer :: ky(:) + !> wave numbers for fft in z direction +- real(C_DOUBLE), pointer :: kz(:) ++ real(C_FLOAT), pointer :: kz(:) + !> log file for fftw + character(len=20),parameter :: filename ="parmesfftw.log" + !> normalization factor +- real(C_DOUBLE) :: normFFT ++ real(C_FLOAT) :: normFFT + !> true if we use fftw-many routines + logical :: manycase + !> true if all the allocation stuff for global variables has been done. +@@ -89,7 +89,7 @@ contains + if(is3DUpToDate) return + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + + if(rank==0) open(unit=21,file=filename,form="formatted") + +@@ -98,7 +98,7 @@ contains + fft_resolution(:) = resolution(:)-1 + + ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal) +- alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),main_comm,& ++ alloc_local = fftwf_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),main_comm,& + local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y)); + + ! Set a default value for c_X components. +@@ -106,35 +106,35 @@ contains + local_resolution(c_X) = fft_resolution(c_X) + + ! allocate local buffer (used to save datain/dataout ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) + ! link datain and dataout to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, datain1, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)]) + call c_f_pointer(cbuffer1, dataout1, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)]) + + ! second buffer used for backward transform. Used to copy dataout into dataout2 (input for backward transform and filter) + ! and to save (in-place) the transform of the second component of the velocity +- cbuffer2 = fftw_alloc_complex(alloc_local) ++ cbuffer2 = fftwf_alloc_complex(alloc_local) + call c_f_pointer(cbuffer2, datain2, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)]) + call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)]) + + ! second buffer used for backward transform. Used to copy dataout into dataout2 (input for backward transform and filter) + ! and to save (in-place) the transform of the second component of the velocity +- cbuffer3 = fftw_alloc_complex(alloc_local) ++ cbuffer3 = fftwf_alloc_complex(alloc_local) + call c_f_pointer(cbuffer3, datain3, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)]) + call c_f_pointer(cbuffer3, dataout3, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)]) + + ! create MPI plan for in-place forward/backward DFT (note dimension reversal) +- plan_forward1 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& ++ plan_forward1 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& + main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& ++ plan_backward1 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& + main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_forward2 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain2,dataout2,& ++ plan_forward2 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain2,dataout2,& + main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward2 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& ++ plan_backward2 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& + main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_forward3 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain3,dataout3,& ++ plan_forward3 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain3,dataout3,& + main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward3 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout3,datain3,& ++ plan_backward3 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout3,datain3,& + main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + + call computeKx(lengths(c_X)) +@@ -173,17 +173,17 @@ contains + end do + end do + ! compute transform (as many times as desired) +- call fftw_mpi_execute_dft(plan_forward1, datain1, dataout1) +- call fftw_mpi_execute_dft(plan_forward2, datain2, dataout2) +- call fftw_mpi_execute_dft(plan_forward3, datain3, dataout3) ++ call fftwf_mpi_execute_dft(plan_forward1, datain1, dataout1) ++ call fftwf_mpi_execute_dft(plan_forward2, datain2, dataout2) ++ call fftwf_mpi_execute_dft(plan_forward3, datain3, dataout3) + + ! apply poisson filter + call filter_poisson_3d() + + ! inverse transform to retrieve velocity +- call fftw_mpi_execute_dft(plan_backward1, dataout1,datain1) +- call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2) +- call fftw_mpi_execute_dft(plan_backward3,dataout3,datain3) ++ call fftwf_mpi_execute_dft(plan_backward1, dataout1,datain1) ++ call fftwf_mpi_execute_dft(plan_backward2,dataout2,datain2) ++ call fftwf_mpi_execute_dft(plan_backward3,dataout3,datain3) + do k =1, local_resolution(c_Z) + do j = 1, fft_resolution(c_Y) + do i = 1, fft_resolution(c_X) +@@ -213,7 +213,7 @@ contains + if(is3DUpToDate) return + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + + if(rank==0) open(unit=21,file=filename,form="formatted") + allocate(fft_resolution(3)) +@@ -221,7 +221,7 @@ contains + halfLength = fft_resolution(c_X)/2+1 + + ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal) +- alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,& ++ alloc_local = fftwf_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,& + main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y)); + + ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases. +@@ -229,9 +229,9 @@ contains + local_resolution(c_X) = halfLength + + ! allocate local buffer (used to save datain/dataout ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) +- cbuffer2 = fftw_alloc_complex(alloc_local) +- cbuffer3 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) ++ cbuffer2 = fftwf_alloc_complex(alloc_local) ++ cbuffer3 = fftwf_alloc_complex(alloc_local) + + ! link rdatain and dataout to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, rdatain1, [2*halfLength,fft_resolution(c_Y),local_resolution(c_Z)]) +@@ -246,17 +246,17 @@ contains + rdatain3 = 0.0 + + ! create MPI plans for in-place forward/backward DFT (note dimension reversal) +- plan_forward1 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & ++ plan_forward1 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & ++ plan_backward1 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_forward2 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain2, dataout2, & ++ plan_forward2 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain2, dataout2, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward2 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & ++ plan_backward2 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) +- plan_forward3 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain3, dataout3, & ++ plan_forward3 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain3, dataout3, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward3 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout3, rdatain3, & ++ plan_backward3 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout3, rdatain3, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + + call computeKx(lengths(c_X)) +@@ -279,7 +279,7 @@ contains + + real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z + integer, dimension(3), intent(in) :: ghosts +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k, ig, jg, kg + + ! ig, jg, kg are used to take into account +@@ -301,9 +301,9 @@ contains + + ! compute transforms for each component + start = MPI_WTIME() +- call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) +- call fftw_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2) +- call fftw_mpi_execute_dft_r2c(plan_forward3, rdatain3, dataout3) ++ call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) ++ call fftwf_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2) ++ call fftwf_mpi_execute_dft_r2c(plan_forward3, rdatain3, dataout3) + !!print *, "r2c time = ", MPI_WTIME() - start + + end subroutine r2c_3d +@@ -316,12 +316,12 @@ contains + subroutine c2r_3d(velocity_x,velocity_y,velocity_z, ghosts) + real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z + integer, dimension(3), intent(in) :: ghosts +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k, ig, jg, kg + start = MPI_WTIME() +- call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) +- call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) +- call fftw_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3) ++ call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) ++ call fftwf_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) ++ call fftwf_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3) + !! print *, "c2r time : ", MPI_WTIME() -start + ! copy back to velocity and normalisation + do k =1, local_resolution(c_Z) +@@ -346,7 +346,7 @@ contains + + real(mk),dimension(:,:,:),intent(in) :: omega + integer, dimension(3), intent(in) :: ghosts +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k, ig, jg, kg + + ! ig, jg, kg are used to take into account +@@ -366,7 +366,7 @@ contains + + ! compute transforms for each component + start = MPI_WTIME() +- call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) ++ call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) + !!print *, "r2c time = ", MPI_WTIME() - start + + end subroutine r2c_scalar_3d +@@ -377,10 +377,10 @@ contains + subroutine c2r_scalar_3d(omega, ghosts) + real(mk),dimension(:,:,:),intent(inout) :: omega + integer, dimension(3), intent(in) :: ghosts +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k, ig, jg, kg + start = MPI_WTIME() +- call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) ++ call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) + !! print *, "c2r time : ", MPI_WTIME() -start + ! copy back to velocity and normalisation + do k =1, local_resolution(c_Z) +@@ -412,7 +412,7 @@ contains + integer(C_INTPTR_T),dimension(3) :: n + + ! init fftw mpi context +- call fftw_mpi_init() ++ call fftwf_mpi_init() + blocksize = FFTW_MPI_DEFAULT_BLOCK + if(rank==0) open(unit=21,file=filename,form="formatted") + allocate(fft_resolution(3)) +@@ -423,7 +423,7 @@ contains + n(3) = halfLength + howmany = 3 + ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal) +- alloc_local = fftw_mpi_local_size_many_transposed(3,n,howmany,blocksize,blocksize,& ++ alloc_local = fftwf_mpi_local_size_many_transposed(3,n,howmany,blocksize,blocksize,& + main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y)); + + ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases. +@@ -431,7 +431,7 @@ contains + local_resolution(c_X) = halfLength + + ! allocate local buffer (used to save datain/dataout ==> in-place transform!!) +- cbuffer1 = fftw_alloc_complex(alloc_local) ++ cbuffer1 = fftwf_alloc_complex(alloc_local) + + ! link rdatain and dataout to cbuffer, setting the right dimensions for each + call c_f_pointer(cbuffer1, rdatain_many, [howmany,2*halfLength,fft_resolution(c_Y),local_resolution(c_Z)]) +@@ -440,9 +440,9 @@ contains + ! create MPI plans for in-place forward/backward DFT (note dimension reversal) + n(3) = fft_resolution(c_X) + +- plan_forward1 = fftw_mpi_plan_many_dft_r2c(3,n,howmany,blocksize,blocksize, rdatain_many, dataout_many, & ++ plan_forward1 = fftwf_mpi_plan_many_dft_r2c(3,n,howmany,blocksize,blocksize, rdatain_many, dataout_many, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) +- plan_backward1 = fftw_mpi_plan_many_dft_c2r(3,n,howmany,blocksize,blocksize, dataout_many, rdatain_many, & ++ plan_backward1 = fftwf_mpi_plan_many_dft_c2r(3,n,howmany,blocksize,blocksize, dataout_many, rdatain_many, & + main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) + call computeKx(lengths(c_X)) + call computeKy(lengths(c_Y)) +@@ -464,7 +464,7 @@ contains + subroutine r2c_3d_many(omega_x,omega_y,omega_z) + + real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k + + ! init +@@ -480,7 +480,7 @@ contains + + ! compute transform (as many times as desired) + start = MPI_WTIME() +- call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain_many, dataout_many) ++ call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain_many, dataout_many) + !! print *, "r2c time = ", MPI_WTIME() - start + + end subroutine r2c_3d_many +@@ -491,11 +491,11 @@ contains + !! @param[in,out] velocity_z 3d scalar field, z-component of the output vector field + subroutine c2r_3d_many(velocity_x,velocity_y,velocity_z) + real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z +- real(mk) :: start ++ real(8) :: start + integer(C_INTPTR_T) :: i,j,k + + start = MPI_WTIME() +- call fftw_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many) ++ call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many) + !! print *, "c2r time : ", MPI_WTIME() -start + do k =1, local_resolution(c_Z) + do j = 1, fft_resolution(c_Y) +@@ -536,7 +536,7 @@ contains + !> Computation of frequencies coeff, over distributed direction(s) + !> @param lengths size of the domain + subroutine computeKy(length) +- real(C_DOUBLE), intent(in) :: length ++ real(C_FLOAT), intent(in) :: length + + !! Local loops indices + integer(C_INTPTR_T) :: i +@@ -589,8 +589,8 @@ contains + subroutine filter_poisson_3d() + + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + + ! Set first coeff (check for "all freq = 0" case) + if(local_offset(c_Y) == 0) then +@@ -648,10 +648,10 @@ contains + !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step + subroutine filter_curl_diffusion_3d(nudt) + +- real(C_DOUBLE), intent(in) :: nudt ++ real(C_FLOAT), intent(in) :: nudt + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + + !! mind the transpose -> index inversion between y and z + do j = 1,local_resolution(c_Y) +@@ -674,9 +674,9 @@ contains + !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step + subroutine filter_diffusion_3d(nudt) + +- real(C_DOUBLE), intent(in) :: nudt ++ real(C_FLOAT), intent(in) :: nudt + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: coeff + + !! mind the transpose -> index inversion between y and z + do j = 1,local_resolution(c_Y) +@@ -697,8 +697,8 @@ contains + subroutine filter_curl_3d() + + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + + !! mind the transpose -> index inversion between y and z + do j = 1,local_resolution(c_Y) +@@ -721,8 +721,8 @@ contains + subroutine filter_projection_om_3d() + + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2,buffer3 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2,buffer3 + + ! Set first coeff (check for "all freq = 0" case) + if(local_offset(c_Y) == 0) then +@@ -783,9 +783,9 @@ contains + !! @param[in] dxf, dyf, dzf: grid filter size = domainLength/(CoarseRes-1) + subroutine filter_multires_om_3d(dxf, dyf, dzf) + +- real(C_DOUBLE), intent(in) :: dxf, dyf, dzf ++ real(C_FLOAT), intent(in) :: dxf, dyf, dzf + integer(C_INTPTR_T) :: i,j,k +- real(C_DOUBLE) :: kxc, kyc, kzc ++ real(C_FLOAT) :: kxc, kyc, kzc + + kxc = pi / dxf + kyc = pi / dyf +@@ -810,7 +810,7 @@ contains + !! pressure from velocity in the Fourier space + subroutine filter_pressure_3d() + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: coeff + + ! Set first coeff (check for "all freq = 0" case) + if(local_offset(c_Y) == 0) then +@@ -851,8 +851,8 @@ contains + subroutine filter_poisson_3d_many() + + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + + ! Set first coeff (check for "all freq = 0" case) + if(local_offset(c_Y) == 0) then +@@ -908,10 +908,10 @@ contains + !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step + subroutine filter_diffusion_3d_many(nudt) + +- real(C_DOUBLE), intent(in) :: nudt ++ real(C_FLOAT), intent(in) :: nudt + integer(C_INTPTR_T) :: i,j,k +- complex(C_DOUBLE_COMPLEX) :: coeff +- complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ++ complex(C_FLOAT_COMPLEX) :: coeff ++ complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + + !! mind the transpose -> index inversion between y and z + do j = 1,local_resolution(c_Y) +@@ -931,18 +931,18 @@ contains + + !> Clean fftw context (free memory, plans ...) + subroutine cleanFFTW_3d() +- call fftw_destroy_plan(plan_forward1) +- call fftw_destroy_plan(plan_backward1) ++ call fftwf_destroy_plan(plan_forward1) ++ call fftwf_destroy_plan(plan_backward1) + if(.not.manycase) then +- call fftw_destroy_plan(plan_forward2) +- call fftw_destroy_plan(plan_backward2) +- call fftw_destroy_plan(plan_forward3) +- call fftw_destroy_plan(plan_backward3) +- call fftw_free(cbuffer2) +- call fftw_free(cbuffer3) ++ call fftwf_destroy_plan(plan_forward2) ++ call fftwf_destroy_plan(plan_backward2) ++ call fftwf_destroy_plan(plan_forward3) ++ call fftwf_destroy_plan(plan_backward3) ++ call fftwf_free(cbuffer2) ++ call fftwf_free(cbuffer3) + endif +- call fftw_free(cbuffer1) +- call fftw_mpi_cleanup() ++ call fftwf_free(cbuffer1) ++ call fftwf_mpi_cleanup() + deallocate(fft_resolution) + deallocate(kx,ky,kz) + if(rank==0) close(21) +@@ -953,7 +953,7 @@ contains + integer(C_INTPTR_T), intent(in) :: nbelem + ! number of buffers used for fftw + integer, optional,intent(in) :: howmany +- complex(C_DOUBLE_COMPLEX) :: memoryAllocated ++ complex(C_FLOAT_COMPLEX) :: memoryAllocated + + integer :: nbFields + if(present(howmany)) then +diff --git src/main/main.f90 src/main/main.f90 +index 38cc6a5..0fc8f5f 100755 +--- src/main/main.f90 ++++ src/main/main.f90 +@@ -9,7 +9,7 @@ use vectorcalculus + implicit none + + integer :: info +-real(mk) :: start, end ++real(8) :: start, end + + !complex(mk), dimension(resolution(1),resolution(2)) :: omega,velocity_x,velocity_y + call MPI_Init(info) +@@ -116,7 +116,8 @@ contains + real(mk),dimension(3) :: lengths,step + integer, dimension(3) :: ghosts_v, ghosts_w + integer(C_INTPTR_T),dimension(3) :: nfft,offset +- real(mk) :: error,start ++ real(mk) :: error ++ real(8) :: start + logical :: ok + + if (rank==0) print *, " ======= Test 3D Poisson (r2c) solver for resolution ", resolution +diff --git src/scalesInterface/precision_tools.f90 src/scalesInterface/precision_tools.f90 +index 5dacf3f..4ae1a6a 100644 +--- src/scalesInterface/precision_tools.f90 ++++ src/scalesInterface/precision_tools.f90 +@@ -20,15 +20,15 @@ + !------------------------------------------------------------------------------ + + MODULE precision_tools +- use mpi, only: MPI_DOUBLE_PRECISION ++ use mpi, only: MPI_REAL + implicit None + + !> Floats precision + INTEGER, PARAMETER :: SP = kind(1.0) + INTEGER, PARAMETER :: DP = kind(1.0d0) +- INTEGER, PARAMETER :: WP = DP ++ INTEGER, PARAMETER :: WP = SP + !> the MPI type for REAL exchanges in simple or double precision +- INTEGER, parameter :: MPI_REAL_WP = MPI_DOUBLE_PRECISION ++ INTEGER, parameter :: MPI_REAL_WP = MPI_REAL + REAL(WP), PRIVATE :: sample_real_at_WP + REAL(WP), PARAMETER :: MAX_REAL_WP = HUGE(sample_real_at_WP) + INTEGER, PRIVATE :: sample_int -- GitLab