diff --git a/HySoP/change_groups_xz_slice b/HySoP/change_groups_xz_slice new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/HySoP/hysop/constants.py b/HySoP/hysop/constants.py index a43e95b8e64cc44a64ed3a22a32f11a4ff049ad0..a849cb68696b831749d3055e6d85e0780ee6c4f5 100644 --- a/HySoP/hysop/constants.py +++ b/HySoP/hysop/constants.py @@ -8,7 +8,7 @@ from hysop.mpi import MPI PI = math.pi # Set default type for real and integer numbers -HYSOP_REAL = np.float32 +HYSOP_REAL = np.float64 SIZEOF_HYSOP_REAL = int(HYSOP_REAL(1.).nbytes) # type for array indices HYSOP_INDEX = np.uint32 @@ -17,7 +17,7 @@ HYSOP_INTEGER = np.int32 # integer used for arrays dimensions HYSOP_DIM = np.int16 # float type for MPI messages -HYSOP_MPI_REAL = MPI.REAL +HYSOP_MPI_REAL = MPI.DOUBLE # int type for MPI messages HYSOP_MPI_INTEGER = MPI.INT ## default array layout (fortran or C convention) diff --git a/HySoP/hysop/f2py/fftw2py.f90 b/HySoP/hysop/f2py/fftw2py.f90 index 133ebe62c08148284ab8fc0beb64ef2d6e6f43e2..655982667aa82bbbc3965ab99c05447acc3a3566 100755 --- a/HySoP/hysop/f2py/fftw2py.f90 +++ b/HySoP/hysop/f2py/fftw2py.f90 @@ -5,7 +5,7 @@ module fftw2py use client_data - use hysopparam_sp + use hysopparam !> 2d case use fft2d !> 3d case @@ -88,12 +88,12 @@ contains ! Duplicate comm into client_data::main_comm (used later in fft2d and fft3d) call mpi_comm_dup(comm, main_comm, ierr) - + !print*, "Init fftw/poisson solver for a 3d problem" call init_r2c_scalar_3d(resolution,lengths) - + call getParamatersTopologyFFTW3d(datashape,offset) - + end subroutine init_fftw_solver_scalar !> Free memory allocated for fftw-related objects (plans and buffers) @@ -115,7 +115,7 @@ contains real(pk),dimension(size(omega,1),size(omega,2)),intent(out) :: velocity_x,velocity_y integer, dimension(2), intent(in) :: ghosts_vort, ghosts_velo !f2py intent(in,out) :: velocity_x,velocity_y - + call r2c_scalar_2d(omega, ghosts_vort) call filter_poisson_2d() @@ -149,7 +149,7 @@ contains real(pk),dimension(:,:,:),intent(in):: omega_x,omega_y,omega_z real(pk),dimension(size(omega_x,1),size(omega_y,2),size(omega_z,3)),intent(out) :: velocity_x,velocity_y,velocity_z integer, dimension(3), intent(in) :: ghosts_vort, ghosts_velo - real(8) :: start + real(pk) :: start !f2py intent(in,out) :: velocity_x,velocity_y,velocity_z start = MPI_WTime() call r2c_3d(omega_x,omega_y,omega_z, ghosts_vort) diff --git a/HySoP/hysop/f2py/scales2py.f90 b/HySoP/hysop/f2py/scales2py.f90 index b2c60f4450df1670210beef86f8a02a72b2625b7..c021618b45a1f723b4ea2176bf7e22036df5956b 100755 --- a/HySoP/hysop/f2py/scales2py.f90 +++ b/HySoP/hysop/f2py/scales2py.f90 @@ -6,7 +6,7 @@ use advec, only : advec_init,advec_step,advec_step_Inter_basic,advec_step_Inter_ use advec_vect, only : advec_step_Vect,advec_step_Inter_basic_Vect use interpolation_velo, only : interpol_init use mpi -use hysopparam_sp +use hysopparam implicit none @@ -93,7 +93,7 @@ contains real(pk), dimension(size(vx,1),size(vx,2),size(vx,3)), intent(inout) :: scal !f2py real(pk) intent(in,out), depend(size(vx,1)) :: scal - real(8) :: t0 + real(pk) :: t0 t0 = MPI_Wtime() call advec_step(dt,vx,vy,vz,scal) diff --git a/HySoP/setup.py.in b/HySoP/setup.py.in index cf709d4364ff76284b01cf1ab4bba3133d1e8a49..b2489f6c80d3d38cc255be212f218ec1d3b479e2 100644 --- a/HySoP/setup.py.in +++ b/HySoP/setup.py.in @@ -69,8 +69,8 @@ if enable_fortran is "ON": fortran_src.append(fortran_dir + 'parameters.f90') fortran_src.append(fortran_dir + 'fftw2py.f90') fftwdir = '@FFTWLIB@' - hysoplib.append('fftw3f') - hysoplib.append('fftw3f_mpi') + hysoplib.append('fftw3') + hysoplib.append('fftw3_mpi') hysop_libdir.append(fftwdir) else: packages.append('hysop.fakef2py') diff --git a/HySoP/src/client_data.f90 b/HySoP/src/client_data.f90 index d5cc241dcc970c8c19cac10c2ef5e5847cbf79f5..46b52686706b772f7349e316d910ad8c88bfedc2 100755 --- a/HySoP/src/client_data.f90 +++ b/HySoP/src/client_data.f90 @@ -1,14 +1,14 @@ !> Some global parameters and variables module client_data - use MPI, only : MPI_REAL + use MPI, only : MPI_DOUBLE_PRECISION use, intrinsic :: iso_c_binding ! required for fftw implicit none !> kind for real variables (simple or double precision) - integer, parameter :: mk = kind(1.0) ! double precision + integer, parameter :: mk = kind(1.0d0) ! double precision !> kind for real variables in mpi routines - integer, parameter :: mpi_mk = MPI_REAL + integer, parameter :: mpi_mk = MPI_DOUBLE_PRECISION !> Problem dimension (model, required for ppm to work properly) integer, parameter :: dime = 2 !> Real dimension @@ -26,7 +26,7 @@ module client_data !> to activate (or not) screen output logical,parameter :: verbose = .True. !> i (sqrt(-1) ...) - complex(C_FLOAT_COMPLEX), parameter :: Icmplx = cmplx(0._mk,1._mk, kind=mk) + complex(C_DOUBLE_COMPLEX), parameter :: Icmplx = cmplx(0._mk,1._mk, kind=mk) !> tolerance used to compute error real(mk), parameter :: tolerance = 1e-12 diff --git a/HySoP/src/fftw/Poisson.f90 b/HySoP/src/fftw/Poisson.f90 index e4bc8962c1ede7356a44ffacf807ee2bd4921d6a..8f67564cbf13f700476c72b4a830ab830e4b3c0f 100755 --- a/HySoP/src/fftw/Poisson.f90 +++ b/HySoP/src/fftw/Poisson.f90 @@ -54,7 +54,7 @@ contains real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z integer, dimension(3), intent(in) :: ghosts_w, ghosts_v - real(8) :: start + real(mk) :: start !! Compute fftw forward transform !! Omega is used to initialize the fftw buffer for input field. @@ -75,7 +75,7 @@ contains real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z - real(8) :: start + real(mk) :: start !! Compute fftw forward transform !! Omega is used to initialize the fftw buffer for input field. @@ -152,3 +152,5 @@ contains end module poisson + + diff --git a/HySoP/src/fftw/fft2d.f90 b/HySoP/src/fftw/fft2d.f90 index fab28261e1d25be38dc8f004d1b955cd0746ebe1..ab7277265594af7cdfb44d9806fbff97974a0064 100755 --- a/HySoP/src/fftw/fft2d.f90 +++ b/HySoP/src/fftw/fft2d.f90 @@ -42,15 +42,15 @@ module fft2d type(C_PTR) :: cbuffer2 !! Note Franck : check if local declarations of datain/out works and improve perfs. !> Field (complex values) for fftw input - complex(C_FLOAT_COMPLEX), pointer :: datain1(:,:),datain2(:,:) + complex(C_DOUBLE_COMPLEX), pointer :: datain1(:,:),datain2(:,:) !> Field (real values) for fftw input - real(C_FLOAT), pointer :: rdatain1(:,:) + real(C_DOUBLE), pointer :: rdatain1(:,:) !> Field (complex values) for fftw (forward) output - complex(C_FLOAT_COMPLEX), pointer :: dataout1(:,:) + complex(C_DOUBLE_COMPLEX), pointer :: dataout1(:,:) !> Field (real values) for fftw output - real(C_FLOAT), pointer :: rdatain2(:,:) + real(C_DOUBLE), pointer :: rdatain2(:,:) !> Field (complex values) for fftw (forward) output - complex(C_FLOAT_COMPLEX), pointer :: dataout2(:,:) + complex(C_DOUBLE_COMPLEX), pointer :: dataout2(:,:) !> GLOBAL number of points in each direction integer(C_INTPTR_T),pointer :: fft_resolution(:) !> LOCAL resolution @@ -58,13 +58,13 @@ module fft2d !> Offset in the direction of distribution integer(c_INTPTR_T),dimension(2) :: local_offset !> wave numbers for fft in x direction - real(C_FLOAT), pointer :: kx(:) + real(C_DOUBLE), pointer :: kx(:) !> wave numbers for fft in y direction - real(C_FLOAT), pointer :: ky(:) + real(C_DOUBLE), pointer :: ky(:) !> log file for fftw character(len=20),parameter :: filename ="hysopfftw.log" !> normalization factor - real(C_FLOAT) :: normFFT + real(C_DOUBLE) :: normFFT !> true if all the allocation stuff for global variables has been done. logical :: is2DUpToDate = .false. @@ -88,7 +88,7 @@ contains if(is2DUpToDate) return ! init fftw mpi context - call fftwf_mpi_init() + call fftw_mpi_init() if(rank==0) open(unit=21,file=filename,form="formatted") @@ -98,13 +98,13 @@ contains ! compute "optimal" size (according to fftw) for local date ! (warning : dimension reversal) - alloc_local = fftwf_mpi_local_size_2d_transposed(fft_resolution(c_Y), & + alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y), & fft_resolution(c_X),main_comm, local_resolution(c_Y), & local_offset(c_Y), local_resolution(c_X),local_offset(c_X)); ! allocate local buffer (used to save datain/dataout1 ! ==> in-place transform!!) - cbuffer1 = fftwf_alloc_complex(alloc_local) + cbuffer1 = fftw_alloc_complex(alloc_local) ! link datain and dataout1 to cbuffer, setting the right dimensions call c_f_pointer(cbuffer1, datain1, & [fft_resolution(c_X),local_resolution(c_Y)]) @@ -115,17 +115,17 @@ contains ! into dataout2 (input for backward transform and filter) ! and to save (in-place) the transform of the second component ! of the velocity - cbuffer2 = fftwf_alloc_complex(alloc_local) + cbuffer2 = fftw_alloc_complex(alloc_local) call c_f_pointer(cbuffer2, datain2,& [fft_resolution(c_X),local_resolution(c_Y)]) call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)]) ! create MPI plan for in-place forward/backward DFT (note dimension reversal) - plan_forward1 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& + plan_forward1 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward1 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& + plan_backward1 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) - plan_backward2 = fftwf_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& + plan_backward2 = fftw_mpi_plan_dft_2d(fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) call computeKxC(lengths(c_X)) @@ -154,7 +154,7 @@ contains end do ! compute transform (as many times as desired) - call fftwf_mpi_execute_dft(plan_forward1, datain1, dataout1) + call fftw_mpi_execute_dft(plan_forward1, datain1, dataout1) !!$ do i = 1, fft_resolution(c_Y) !!$ write(*,'(a,i5,a,16f10.4)') 'out[',rank,'] ', dataout1(i,1:local_resolution(c_X)) @@ -162,8 +162,8 @@ contains !!$ call filter_poisson_2d() - call fftwf_mpi_execute_dft(plan_backward1, dataout1, datain1) - call fftwf_mpi_execute_dft(plan_backward2,dataout2,datain2) + call fftw_mpi_execute_dft(plan_backward1, dataout1, datain1) + call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2) do j = 1, local_resolution(c_Y) do i = 1, fft_resolution(c_X) velocity_x(i,j) = datain1(i,j)*normFFT @@ -198,7 +198,7 @@ contains if(is2DUpToDate) return ! init fftw mpi context - call fftwf_mpi_init() + call fftw_mpi_init() if(rank==0) open(unit=21,file=filename,form="formatted") @@ -206,11 +206,11 @@ contains fft_resolution(:) = resolution(:)-1 halfLength = fft_resolution(c_X)/2+1 ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) - alloc_local = fftwf_mpi_local_size_2d_transposed(fft_resolution(c_Y),halfLength,main_comm,local_resolution(c_Y),& + alloc_local = fftw_mpi_local_size_2d_transposed(fft_resolution(c_Y),halfLength,main_comm,local_resolution(c_Y),& local_offset(c_Y),local_resolution(c_X),local_offset(c_X)); ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) - cbuffer1 = fftwf_alloc_complex(alloc_local) + cbuffer1 = fftw_alloc_complex(alloc_local) ! link rdatain1 and dataout1 to cbuffer, setting the right dimensions for each call c_f_pointer(cbuffer1, rdatain1, [2*halfLength,local_resolution(c_Y)]) @@ -218,17 +218,17 @@ contains ! second buffer used for backward transform. Used to copy dataout1 into dataout2 (input for backward transform and filter) ! and to save (in-place) the transform of the second component of the velocity - cbuffer2 = fftwf_alloc_complex(alloc_local) + cbuffer2 = fftw_alloc_complex(alloc_local) call c_f_pointer(cbuffer2, rdatain2, [2*halfLength,local_resolution(c_Y)]) call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)]) ! create MPI plans for in-place forward/backward DFT (note dimension reversal) - plan_forward1 = fftwf_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & + plan_forward1 = fftw_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward1 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & + plan_backward1 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) - plan_backward2 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & + plan_backward2 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) call computeKx(lengths(c_X)) @@ -266,7 +266,7 @@ contains !!$ end do !!$ ! compute transform (as many times as desired) - call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) + call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) !!$ do i = 1, fft_resolution(c_Y) !!$ write(*,'(a,i5,a,16f10.4)') 'aaaa[',rank,'] ', dataout1(i,1:local_resolution(c_X)) @@ -288,13 +288,13 @@ contains do i = 1, fft_resolution(c_X) ig = i + ghosts(c_X) rdatain1(i, j) = input_x(ig,jg) - rdatain2(i, j) = input_y(ig,jg) + rdatain2(i, j) = input_y(ig,jg) end do end do ! compute transform (as many times as desired) - call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) - call fftwf_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2) + call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) + call fftw_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2) end subroutine r2c_2d @@ -304,8 +304,8 @@ contains integer, dimension(2), intent(in) :: ghosts integer(C_INTPTR_T) :: i, j, ig, jg - call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) - call fftwf_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) + call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) + call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) do j = 1, local_resolution(c_Y) jg = j + ghosts(c_Y) do i = 1, fft_resolution(c_X) @@ -334,7 +334,7 @@ contains integer, dimension(2), intent(in) :: ghosts integer(C_INTPTR_T) :: i, j, ig, jg - call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) + call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) do j = 1, local_resolution(c_Y) jg = j + ghosts(c_Y) do i = 1, fft_resolution(c_X) @@ -438,7 +438,7 @@ contains subroutine filter_poisson_2d() integer(C_INTPTR_T) :: i, j - complex(C_FLOAT_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: coeff if(local_offset(c_X)==0) then if(local_offset(c_Y) == 0) then dataout1(1,1) = 0.0 @@ -485,9 +485,9 @@ contains subroutine filter_diffusion_2d(nudt) - real(C_FLOAT), intent(in) :: nudt + real(C_DOUBLE), intent(in) :: nudt integer(C_INTPTR_T) :: i, j - complex(C_FLOAT_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: coeff do i = 1,local_resolution(c_X) do j = 1, fft_resolution(c_Y) @@ -500,13 +500,13 @@ contains !> Clean fftw context (free memory, plans ...) subroutine cleanFFTW_2d() - call fftwf_destroy_plan(plan_forward1) - call fftwf_destroy_plan(plan_backward1) - !call fftwf_destroy_plan(plan_forward2) - !call fftwf_destroy_plan(plan_backward2) - call fftwf_free(cbuffer1) - call fftwf_free(cbuffer2) - call fftwf_mpi_cleanup() + call fftw_destroy_plan(plan_forward1) + call fftw_destroy_plan(plan_backward1) + !call fftw_destroy_plan(plan_forward2) + !call fftw_destroy_plan(plan_backward2) + call fftw_free(cbuffer1) + call fftw_free(cbuffer2) + call fftw_mpi_cleanup() deallocate(fft_resolution) if(rank==0) close(21) end subroutine cleanFFTW_2d @@ -516,7 +516,7 @@ contains subroutine filter_curl_2d() integer(C_INTPTR_T) :: i,j,k - complex(C_FLOAT_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: coeff !! mind the transpose -> index inversion between y and z do j = 1,local_resolution(c_Y) @@ -532,7 +532,7 @@ contains subroutine fft2d_diagnostics(nbelem) integer(C_INTPTR_T), intent(in) :: nbelem - complex(C_FLOAT_COMPLEX) :: memoryAllocated + complex(C_DOUBLE_COMPLEX) :: memoryAllocated memoryAllocated = real(nbelem*sizeof(memoryAllocated),mk)*1e-6 write(*,'(a,i5,a,i12,f10.2)') '[',rank,'] size of each buffer (elements / memory in MB):', & nbelem, memoryAllocated @@ -566,10 +566,10 @@ contains integer(C_INTPTR_T), dimension(2) :: n !> Field (real values) for fftw input - real(C_FLOAT), pointer :: rdatain1Many(:,:,:) + real(C_DOUBLE), pointer :: rdatain1Many(:,:,:) ! init fftw mpi context - call fftwf_mpi_init() + call fftw_mpi_init() howmany = 1 if(rank==0) open(unit=21,file=filename,form="formatted") @@ -579,12 +579,12 @@ contains n(1) = fft_resolution(2) n(2) = halfLength ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) - alloc_local = fftwf_mpi_local_size_many_transposed(2,n,howmany,FFTW_MPI_DEFAULT_BLOCK,& + alloc_local = fftw_mpi_local_size_many_transposed(2,n,howmany,FFTW_MPI_DEFAULT_BLOCK,& FFTW_MPI_DEFAULT_BLOCK,main_comm,local_resolution(c_Y),& local_offset(c_Y),local_resolution(c_X),local_offset(c_X)); ! allocate local buffer (used to save datain/dataout1 ==> in-place transform!!) - cbuffer1 = fftwf_alloc_complex(alloc_local) + cbuffer1 = fftw_alloc_complex(alloc_local) ! link rdatain1 and dataout1 to cbuffer, setting the right dimensions for each call c_f_pointer(cbuffer1, rdatain1Many, [howmany,2*halfLength,local_resolution(c_Y)]) @@ -592,17 +592,17 @@ contains ! second buffer used for backward transform. Used to copy dataout1 into dataout2 (input for backward transform and filter) ! and to save (in-place) the transform of the second component of the velocity - cbuffer2 = fftwf_alloc_complex(alloc_local) + cbuffer2 = fftw_alloc_complex(alloc_local) call c_f_pointer(cbuffer2, rdatain1Many, [howmany,2*halfLength,local_resolution(c_Y)]) call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_Y),local_resolution(c_X)]) ! create MPI plans for in-place forward/backward DFT (note dimension reversal) - plan_forward1 = fftwf_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1Many, dataout1, & + plan_forward1 = fftw_mpi_plan_dft_r2c_2d(fft_resolution(c_Y), fft_resolution(c_X), rdatain1Many, dataout1, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward1 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & + plan_backward1 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) - plan_backward2 = fftwf_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & + plan_backward2 = fftw_mpi_plan_dft_c2r_2d(fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) call computeKx(lengths(c_X)) diff --git a/HySoP/src/fftw/fft3d.f90 b/HySoP/src/fftw/fft3d.f90 index aeafe5d97dd54008739627fde5fd83421cdad0b6..1527da3dd31dd2f8c682ec67e84ed6e0a2c28b5a 100755 --- a/HySoP/src/fftw/fft3d.f90 +++ b/HySoP/src/fftw/fft3d.f90 @@ -40,15 +40,15 @@ module fft3d type(C_PTR) :: cbuffer3 !! Note Franck : check if local declarations of datain/out works and improve perfs. !> Field (complex values) for fftw input - complex(C_FLOAT_COMPLEX), pointer :: datain1(:,:,:)=>NULL(), datain2(:,:,:)=>NULL(), datain3(:,:,:)=>NULL() + complex(C_DOUBLE_COMPLEX), pointer :: datain1(:,:,:)=>NULL(), datain2(:,:,:)=>NULL(), datain3(:,:,:)=>NULL() !> Field (real values) for fftw input (these are only pointers to the cbuffers) - real(C_FLOAT), pointer :: rdatain1(:,:,:)=>NULL() ,rdatain2(:,:,:)=>NULL() ,rdatain3(:,:,:)=>NULL() + real(C_DOUBLE), pointer :: rdatain1(:,:,:)=>NULL() ,rdatain2(:,:,:)=>NULL() ,rdatain3(:,:,:)=>NULL() !> Field (real values) for fftw input in the fftw-many case - real(C_FLOAT), pointer :: rdatain_many(:,:,:,:)=>NULL() + real(C_DOUBLE), pointer :: rdatain_many(:,:,:,:)=>NULL() !> Field (complex values) for fftw (forward) output - complex(C_FLOAT_COMPLEX), pointer :: dataout1(:,:,:)=>NULL() ,dataout2(:,:,:)=>NULL() ,dataout3(:,:,:)=>NULL() + complex(C_DOUBLE_COMPLEX), pointer :: dataout1(:,:,:)=>NULL() ,dataout2(:,:,:)=>NULL() ,dataout3(:,:,:)=>NULL() !> Field (complex values) for fftw (forward) output in the fftw-many case - complex(C_FLOAT_COMPLEX), pointer :: dataout_many(:,:,:,:)=>NULL() + complex(C_DOUBLE_COMPLEX), pointer :: dataout_many(:,:,:,:)=>NULL() !> GLOBAL number of points in each direction on which fft is applied (--> corresponds to "real" resolution - 1) integer(C_INTPTR_T),pointer :: fft_resolution(:)=>NULL() !> LOCAL number of points for fft @@ -56,15 +56,15 @@ module fft3d !> Offset in the direction of distribution integer(c_INTPTR_T),dimension(3) :: local_offset !> wave numbers for fft in x direction - real(C_FLOAT), pointer :: kx(:) + real(C_DOUBLE), pointer :: kx(:) !> wave numbers for fft in y direction - real(C_FLOAT), pointer :: ky(:) + real(C_DOUBLE), pointer :: ky(:) !> wave numbers for fft in z direction - real(C_FLOAT), pointer :: kz(:) + real(C_DOUBLE), pointer :: kz(:) !> log file for fftw character(len=20),parameter :: filename ="hysopfftw.log" !> normalization factor - real(C_FLOAT) :: normFFT + real(C_DOUBLE) :: normFFT !> true if we use fftw-many routines logical :: manycase !> true if all the allocation stuff for global variables has been done. @@ -89,7 +89,7 @@ contains if(is3DUpToDate) return ! init fftw mpi context - call fftwf_mpi_init() + call fftw_mpi_init() if(rank==0) open(unit=21,file=filename,form="formatted") @@ -98,7 +98,7 @@ contains fft_resolution(:) = resolution(:)-1 ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal) - alloc_local = fftwf_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),main_comm,& + alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),main_comm,& local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y)); ! Set a default value for c_X components. @@ -106,35 +106,35 @@ contains local_resolution(c_X) = fft_resolution(c_X) ! allocate local buffer (used to save datain/dataout ==> in-place transform!!) - cbuffer1 = fftwf_alloc_complex(alloc_local) + cbuffer1 = fftw_alloc_complex(alloc_local) ! link datain and dataout to cbuffer, setting the right dimensions for each call c_f_pointer(cbuffer1, datain1, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)]) call c_f_pointer(cbuffer1, dataout1, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)]) ! second buffer used for backward transform. Used to copy dataout into dataout2 (input for backward transform and filter) ! and to save (in-place) the transform of the second component of the velocity - cbuffer2 = fftwf_alloc_complex(alloc_local) + cbuffer2 = fftw_alloc_complex(alloc_local) call c_f_pointer(cbuffer2, datain2, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)]) call c_f_pointer(cbuffer2, dataout2, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)]) ! second buffer used for backward transform. Used to copy dataout into dataout2 (input for backward transform and filter) ! and to save (in-place) the transform of the second component of the velocity - cbuffer3 = fftwf_alloc_complex(alloc_local) + cbuffer3 = fftw_alloc_complex(alloc_local) call c_f_pointer(cbuffer3, datain3, [fft_resolution(c_X),fft_resolution(c_Y),local_resolution(c_Z)]) call c_f_pointer(cbuffer3, dataout3, [fft_resolution(c_X),fft_resolution(c_Z),local_resolution(c_Y)]) ! create MPI plan for in-place forward/backward DFT (note dimension reversal) - plan_forward1 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& + plan_forward1 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain1,dataout1,& main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward1 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& + plan_backward1 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout1,datain1,& main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) - plan_forward2 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain2,dataout2,& + plan_forward2 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain2,dataout2,& main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward2 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& + plan_backward2 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout2,datain2,& main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) - plan_forward3 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain3,dataout3,& + plan_forward3 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z), fft_resolution(c_Y), fft_resolution(c_X),datain3,dataout3,& main_comm,FFTW_FORWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward3 = fftwf_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout3,datain3,& + plan_backward3 = fftw_mpi_plan_dft_3d(fft_resolution(c_Z),fft_resolution(c_Y),fft_resolution(c_X),dataout3,datain3,& main_comm,FFTW_BACKWARD,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) call computeKx(lengths(c_X)) @@ -173,17 +173,17 @@ contains end do end do ! compute transform (as many times as desired) - call fftwf_mpi_execute_dft(plan_forward1, datain1, dataout1) - call fftwf_mpi_execute_dft(plan_forward2, datain2, dataout2) - call fftwf_mpi_execute_dft(plan_forward3, datain3, dataout3) + call fftw_mpi_execute_dft(plan_forward1, datain1, dataout1) + call fftw_mpi_execute_dft(plan_forward2, datain2, dataout2) + call fftw_mpi_execute_dft(plan_forward3, datain3, dataout3) ! apply poisson filter call filter_poisson_3d() ! inverse transform to retrieve velocity - call fftwf_mpi_execute_dft(plan_backward1, dataout1,datain1) - call fftwf_mpi_execute_dft(plan_backward2,dataout2,datain2) - call fftwf_mpi_execute_dft(plan_backward3,dataout3,datain3) + call fftw_mpi_execute_dft(plan_backward1, dataout1,datain1) + call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2) + call fftw_mpi_execute_dft(plan_backward3,dataout3,datain3) do k =1, local_resolution(c_Z) do j = 1, fft_resolution(c_Y) do i = 1, fft_resolution(c_X) @@ -213,7 +213,7 @@ contains if(is3DUpToDate) return ! init fftw mpi context - call fftwf_mpi_init() + call fftw_mpi_init() if(rank==0) open(unit=21,file=filename,form="formatted") allocate(fft_resolution(3)) @@ -221,7 +221,7 @@ contains halfLength = fft_resolution(c_X)/2+1 ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal) - alloc_local = fftwf_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,& + alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,& main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y)); ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases. @@ -229,9 +229,9 @@ contains local_resolution(c_X) = halfLength ! allocate local buffer (used to save datain/dataout ==> in-place transform!!) - cbuffer1 = fftwf_alloc_complex(alloc_local) - cbuffer2 = fftwf_alloc_complex(alloc_local) - cbuffer3 = fftwf_alloc_complex(alloc_local) + cbuffer1 = fftw_alloc_complex(alloc_local) + cbuffer2 = fftw_alloc_complex(alloc_local) + cbuffer3 = fftw_alloc_complex(alloc_local) ! link rdatain and dataout to cbuffer, setting the right dimensions for each call c_f_pointer(cbuffer1, rdatain1, [2*halfLength,fft_resolution(c_Y),local_resolution(c_Z)]) @@ -246,17 +246,17 @@ contains rdatain3 = 0.0 ! create MPI plans for in-place forward/backward DFT (note dimension reversal) - plan_forward1 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & + plan_forward1 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward1 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & + plan_backward1 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) - plan_forward2 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain2, dataout2, & + plan_forward2 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain2, dataout2, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward2 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & + plan_backward2 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout2, rdatain2, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) - plan_forward3 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain3, dataout3, & + plan_forward3 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain3, dataout3, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward3 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout3, rdatain3, & + plan_backward3 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout3, rdatain3, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) call computeKx(lengths(c_X)) @@ -283,7 +283,7 @@ contains if(is3DUpToDate) return ! init fftw mpi context - call fftwf_mpi_init() + call fftw_mpi_init() if(rank==0) open(unit=21,file=filename,form="formatted") allocate(fft_resolution(3)) @@ -291,7 +291,7 @@ contains halfLength = fft_resolution(c_X)/2+1 ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal) - alloc_local = fftwf_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,& + alloc_local = fftw_mpi_local_size_3d_transposed(fft_resolution(c_Z),fft_resolution(c_Y),halfLength,& main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y)); ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases. @@ -299,7 +299,7 @@ contains local_resolution(c_X) = halfLength ! allocate local buffer (used to save datain/dataout ==> in-place transform!!) - cbuffer1 = fftwf_alloc_complex(alloc_local) + cbuffer1 = fftw_alloc_complex(alloc_local) ! link rdatain and dataout to cbuffer, setting the right dimensions for each call c_f_pointer(cbuffer1, rdatain1, [2*halfLength,fft_resolution(c_Y),local_resolution(c_Z)]) @@ -308,11 +308,11 @@ contains rdatain1 = 0.0 ! create MPI plans for in-place forward/backward DFT (note dimension reversal) - plan_forward1 = fftwf_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & + plan_forward1 = fftw_mpi_plan_dft_r2c_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), rdatain1, dataout1, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward1 = fftwf_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & + plan_backward1 = fftw_mpi_plan_dft_c2r_3d(fft_resolution(c_Z),fft_resolution(c_Y), fft_resolution(c_X), dataout1, rdatain1, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) - + call computeKx(lengths(c_X)) call computeKy(lengths(c_Y)) call computeKz(lengths(c_Z)) @@ -323,7 +323,7 @@ contains is3DUpToDate = .true. end subroutine init_r2c_scalar_3d - + !> forward transform - The result is saved in local buffers !! @param[in] omega_x 3d scalar field, x-component of the input vector field !! @param[in] omega_y 3d scalar field, y-component of the input vector field @@ -333,7 +333,7 @@ contains real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z integer, dimension(3), intent(in) :: ghosts - !real(8) :: start + !real(mk) :: start integer(C_INTPTR_T) :: i,j,k, ig, jg, kg ! ig, jg, kg are used to take into account @@ -355,9 +355,9 @@ contains ! compute transforms for each component !start = MPI_WTIME() - call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) - call fftwf_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2) - call fftwf_mpi_execute_dft_r2c(plan_forward3, rdatain3, dataout3) + call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) + call fftw_mpi_execute_dft_r2c(plan_forward2, rdatain2, dataout2) + call fftw_mpi_execute_dft_r2c(plan_forward3, rdatain3, dataout3) !!print *, "r2c time = ", MPI_WTIME() - start end subroutine r2c_3d @@ -370,12 +370,12 @@ contains subroutine c2r_3d(velocity_x,velocity_y,velocity_z, ghosts) real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z integer, dimension(3), intent(in) :: ghosts - real(8) :: start + real(mk) :: start integer(C_INTPTR_T) :: i,j,k, ig, jg, kg start = MPI_WTIME() - call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) - call fftwf_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) - call fftwf_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3) + call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) + call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) + call fftw_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3) !! print *, "c2r time : ", MPI_WTIME() -start ! copy back to velocity and normalisation do k =1, local_resolution(c_Z) @@ -400,7 +400,7 @@ contains real(mk),dimension(:,:,:),intent(in) :: scalar integer, dimension(3), intent(in) :: ghosts - real(8) :: start + real(mk) :: start integer(C_INTPTR_T) :: i,j,k, ig, jg, kg ! ig, jg, kg are used to take into account @@ -420,7 +420,7 @@ contains ! compute transforms for each component start = MPI_WTIME() - call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) + call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain1, dataout1) !!print *, "r2c time = ", MPI_WTIME() - start end subroutine r2c_scalar_3d @@ -431,10 +431,10 @@ contains subroutine c2r_scalar_3d(scalar, ghosts) real(mk),dimension(:,:,:),intent(inout) :: scalar integer, dimension(3), intent(in) :: ghosts - real(8) :: start + real(mk) :: start integer(C_INTPTR_T) :: i,j,k, ig, jg, kg start = MPI_WTIME() - call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) + call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) !! print *, "c2r time : ", MPI_WTIME() -start ! copy back to velocity and normalisation do k =1, local_resolution(c_Z) @@ -466,7 +466,7 @@ contains integer(C_INTPTR_T),dimension(3) :: n ! init fftw mpi context - call fftwf_mpi_init() + call fftw_mpi_init() blocksize = FFTW_MPI_DEFAULT_BLOCK if(rank==0) open(unit=21,file=filename,form="formatted") allocate(fft_resolution(3)) @@ -477,7 +477,7 @@ contains n(3) = halfLength howmany = 3 ! compute "optimal" size (according to fftw) for local data (warning : dimension reversal) - alloc_local = fftwf_mpi_local_size_many_transposed(3,n,howmany,blocksize,blocksize,& + alloc_local = fftw_mpi_local_size_many_transposed(3,n,howmany,blocksize,blocksize,& main_comm,local_resolution(c_Z),local_offset(c_Z),local_resolution(c_Y),local_offset(c_Y)); ! init c_X part. This is required to compute kx with the same function in 2d and 3d cases. @@ -485,7 +485,7 @@ contains local_resolution(c_X) = halfLength ! allocate local buffer (used to save datain/dataout ==> in-place transform!!) - cbuffer1 = fftwf_alloc_complex(alloc_local) + cbuffer1 = fftw_alloc_complex(alloc_local) ! link rdatain and dataout to cbuffer, setting the right dimensions for each call c_f_pointer(cbuffer1, rdatain_many, [howmany,2*halfLength,fft_resolution(c_Y),local_resolution(c_Z)]) @@ -494,9 +494,9 @@ contains ! create MPI plans for in-place forward/backward DFT (note dimension reversal) n(3) = fft_resolution(c_X) - plan_forward1 = fftwf_mpi_plan_many_dft_r2c(3,n,howmany,blocksize,blocksize, rdatain_many, dataout_many, & + plan_forward1 = fftw_mpi_plan_many_dft_r2c(3,n,howmany,blocksize,blocksize, rdatain_many, dataout_many, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_OUT)) - plan_backward1 = fftwf_mpi_plan_many_dft_c2r(3,n,howmany,blocksize,blocksize, dataout_many, rdatain_many, & + plan_backward1 = fftw_mpi_plan_many_dft_c2r(3,n,howmany,blocksize,blocksize, dataout_many, rdatain_many, & main_comm,ior(FFTW_MEASURE,FFTW_MPI_TRANSPOSED_IN)) call computeKx(lengths(c_X)) call computeKy(lengths(c_Y)) @@ -518,7 +518,7 @@ contains subroutine r2c_3d_many(omega_x,omega_y,omega_z) real(mk),dimension(:,:,:),intent(in) :: omega_x,omega_y,omega_z - real(8) :: start + real(mk) :: start integer(C_INTPTR_T) :: i,j,k ! init @@ -534,7 +534,7 @@ contains ! compute transform (as many times as desired) start = MPI_WTIME() - call fftwf_mpi_execute_dft_r2c(plan_forward1, rdatain_many, dataout_many) + call fftw_mpi_execute_dft_r2c(plan_forward1, rdatain_many, dataout_many) !! print *, "r2c time = ", MPI_WTIME() - start end subroutine r2c_3d_many @@ -545,11 +545,11 @@ contains !! @param[in,out] velocity_z 3d scalar field, z-component of the output vector field subroutine c2r_3d_many(velocity_x,velocity_y,velocity_z) real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z - real(8) :: start + real(mk) :: start integer(C_INTPTR_T) :: i,j,k start = MPI_WTIME() - call fftwf_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many) + call fftw_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many) !! print *, "c2r time : ", MPI_WTIME() -start do k =1, local_resolution(c_Z) do j = 1, fft_resolution(c_Y) @@ -590,7 +590,7 @@ contains !> Computation of frequencies coeff, over distributed direction(s) !> @param lengths size of the domain subroutine computeKy(length) - real(C_FLOAT), intent(in) :: length + real(C_DOUBLE), intent(in) :: length !! Local loops indices integer(C_INTPTR_T) :: i @@ -643,8 +643,8 @@ contains subroutine filter_poisson_3d() integer(C_INTPTR_T) :: i,j,k - complex(C_FLOAT_COMPLEX) :: coeff - complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + complex(C_DOUBLE_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ! Set first coeff (check for "all freq = 0" case) if(local_offset(c_Y) == 0) then @@ -702,10 +702,10 @@ contains !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step subroutine filter_curl_diffusion_3d(nudt) - real(C_FLOAT), intent(in) :: nudt + real(C_DOUBLE), intent(in) :: nudt integer(C_INTPTR_T) :: i,j,k - complex(C_FLOAT_COMPLEX) :: coeff - complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + complex(C_DOUBLE_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 !! mind the transpose -> index inversion between y and z do j = 1,local_resolution(c_Y) @@ -728,9 +728,9 @@ contains !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step subroutine filter_diffusion_3d(nudt) - real(C_FLOAT), intent(in) :: nudt + real(C_DOUBLE), intent(in) :: nudt integer(C_INTPTR_T) :: i,j,k - complex(C_FLOAT_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: coeff !! mind the transpose -> index inversion between y and z do j = 1,local_resolution(c_Y) @@ -751,8 +751,8 @@ contains subroutine filter_curl_3d() integer(C_INTPTR_T) :: i,j,k - complex(C_FLOAT_COMPLEX) :: coeff - complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + complex(C_DOUBLE_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 !! mind the transpose -> index inversion between y and z do j = 1,local_resolution(c_Y) @@ -775,8 +775,8 @@ contains subroutine filter_projection_om_3d() integer(C_INTPTR_T) :: i,j,k - complex(C_FLOAT_COMPLEX) :: coeff - complex(C_FLOAT_COMPLEX) :: buffer1,buffer2,buffer3 + complex(C_DOUBLE_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2,buffer3 ! Set first coeff (check for "all freq = 0" case) if(local_offset(c_Y) == 0) then @@ -837,9 +837,9 @@ contains !! @param[in] dxf, dyf, dzf: grid filter size = domainLength/(CoarseRes-1) subroutine filter_multires_om_3d(dxf, dyf, dzf) - real(C_FLOAT), intent(in) :: dxf, dyf, dzf + real(C_DOUBLE), intent(in) :: dxf, dyf, dzf integer(C_INTPTR_T) :: i,j,k - real(C_FLOAT) :: kxc, kyc, kzc + real(C_DOUBLE) :: kxc, kyc, kzc kxc = pi / dxf kyc = pi / dyf @@ -864,7 +864,7 @@ contains !! pressure from velocity in the Fourier space subroutine filter_pressure_3d() integer(C_INTPTR_T) :: i,j,k - complex(C_FLOAT_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: coeff ! Set first coeff (check for "all freq = 0" case) if(local_offset(c_Y) == 0) then @@ -904,8 +904,8 @@ contains subroutine filter_poisson_3d_many() integer(C_INTPTR_T) :: i,j,k - complex(C_FLOAT_COMPLEX) :: coeff - complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + complex(C_DOUBLE_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 ! Set first coeff (check for "all freq = 0" case) if(local_offset(c_Y) == 0) then @@ -961,10 +961,10 @@ contains !! @param[in] nudt \f$ \nu\times dt\f$, diffusion coefficient times current time step subroutine filter_diffusion_3d_many(nudt) - real(C_FLOAT), intent(in) :: nudt + real(C_DOUBLE), intent(in) :: nudt integer(C_INTPTR_T) :: i,j,k - complex(C_FLOAT_COMPLEX) :: coeff - complex(C_FLOAT_COMPLEX) :: buffer1,buffer2 + complex(C_DOUBLE_COMPLEX) :: coeff + complex(C_DOUBLE_COMPLEX) :: buffer1,buffer2 !! mind the transpose -> index inversion between y and z do j = 1,local_resolution(c_Y) @@ -984,18 +984,18 @@ contains !> Clean fftw context (free memory, plans ...) subroutine cleanFFTW_3d() - call fftwf_destroy_plan(plan_forward1) - call fftwf_destroy_plan(plan_backward1) + call fftw_destroy_plan(plan_forward1) + call fftw_destroy_plan(plan_backward1) if(.not.manycase) then - call fftwf_destroy_plan(plan_forward2) - call fftwf_destroy_plan(plan_backward2) - call fftwf_destroy_plan(plan_forward3) - call fftwf_destroy_plan(plan_backward3) - call fftwf_free(cbuffer2) - call fftwf_free(cbuffer3) + call fftw_destroy_plan(plan_forward2) + call fftw_destroy_plan(plan_backward2) + call fftw_destroy_plan(plan_forward3) + call fftw_destroy_plan(plan_backward3) + call fftw_free(cbuffer2) + call fftw_free(cbuffer3) endif - call fftwf_free(cbuffer1) - call fftwf_mpi_cleanup() + call fftw_free(cbuffer1) + call fftw_mpi_cleanup() deallocate(fft_resolution) deallocate(kx,ky,kz) if(rank==0) close(21) @@ -1006,7 +1006,7 @@ contains integer(C_INTPTR_T), intent(in) :: nbelem ! number of buffers used for fftw integer, optional,intent(in) :: howmany - complex(C_FLOAT_COMPLEX) :: memoryAllocated + complex(C_DOUBLE_COMPLEX) :: memoryAllocated integer :: nbFields if(present(howmany)) then diff --git a/HySoP/src/main/main.f90 b/HySoP/src/main/main.f90 index 91d63d97b2cddb5ca57c906ed330b45123e1163f..1bd5d985b46cdd86713910ce512d0f6da75f36c7 100755 --- a/HySoP/src/main/main.f90 +++ b/HySoP/src/main/main.f90 @@ -9,7 +9,7 @@ use vectorcalculus implicit none integer :: info -real(8) :: start, end +real(mk) :: start, end !complex(mk), dimension(resolution(1),resolution(2)) :: omega,velocity_x,velocity_y call MPI_Init(info) @@ -119,8 +119,7 @@ contains real(mk),dimension(3) :: lengths,step integer, dimension(3) :: ghosts_v, ghosts_w integer(C_INTPTR_T),dimension(3) :: nfft,offset - real(mk) :: error - real(8) :: start + real(mk) :: error,start logical :: ok if (rank==0) print *, " ======= Test 3D Poisson (r2c) solver for resolution ", resolution diff --git a/HySoP/src/scalesInterface/precision_tools.f90 b/HySoP/src/scalesInterface/precision_tools.f90 index 4ae1a6a665076f849e32b09ad77bfabdbc8e9e7d..5dacf3f39db303057276819266899d573633c1b9 100644 --- a/HySoP/src/scalesInterface/precision_tools.f90 +++ b/HySoP/src/scalesInterface/precision_tools.f90 @@ -20,15 +20,15 @@ !------------------------------------------------------------------------------ MODULE precision_tools - use mpi, only: MPI_REAL + use mpi, only: MPI_DOUBLE_PRECISION implicit None !> Floats precision INTEGER, PARAMETER :: SP = kind(1.0) INTEGER, PARAMETER :: DP = kind(1.0d0) - INTEGER, PARAMETER :: WP = SP + INTEGER, PARAMETER :: WP = DP !> the MPI type for REAL exchanges in simple or double precision - INTEGER, parameter :: MPI_REAL_WP = MPI_REAL + INTEGER, parameter :: MPI_REAL_WP = MPI_DOUBLE_PRECISION REAL(WP), PRIVATE :: sample_real_at_WP REAL(WP), PARAMETER :: MAX_REAL_WP = HUGE(sample_real_at_WP) INTEGER, PRIVATE :: sample_int