From cc90ac6861486a7b6212675bed36c87b683a00a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franck=20P=C3=A9rignon?= <franck.perignon@imag.fr> Date: Mon, 17 Dec 2012 13:29:17 +0000 Subject: [PATCH] Update f2py interf. to avoid hidden copies --- HySoP/hysop/f2py/fftw2py.f90 | 12 ++++-- HySoP/hysop/f2py/scales2py.f90 | 34 +++++++--------- HySoP/setup.py.in | 2 +- HySoP/src/fftw/fft2d.f90 | 22 ++++++---- HySoP/src/fftw/fft3d.f90 | 40 ++++++++++++++----- .../scalesInterface/layout/cart_topology.f90 | 4 +- 6 files changed, 69 insertions(+), 45 deletions(-) diff --git a/HySoP/hysop/f2py/fftw2py.f90 b/HySoP/hysop/f2py/fftw2py.f90 index 8d394a34e..ce5316448 100755 --- a/HySoP/hysop/f2py/fftw2py.f90 +++ b/HySoP/hysop/f2py/fftw2py.f90 @@ -70,7 +70,8 @@ contains subroutine solve_poisson_2d(omega,velocity_x,velocity_y) real(pk),dimension(:,:),intent(in):: omega real(pk),dimension(size(omega,1),size(omega,2)),intent(out) :: velocity_x,velocity_y - + !f2py intent(in,out) :: velocity_x,velocity_y + call r2c_2d(omega) call filter_poisson_2d() @@ -86,7 +87,7 @@ contains real(pk),dimension(:,:,:),intent(in):: omega_x,omega_y,omega_z real(pk),dimension(size(omega_x,1),size(omega_y,2),size(omega_z,3)),intent(out) :: velocity_x,velocity_y,velocity_z real(pk) :: start - + !f2py intent(in,out) :: velocity_x,velocity_y,velocity_z start = MPI_WTime() call r2c_3d(omega_x,omega_y,omega_z) @@ -103,7 +104,8 @@ contains subroutine solve_poisson_2d_c(omega,velocity_x,velocity_y) complex(pk),dimension(:,:),intent(in):: omega complex(pk),dimension(size(omega,1),size(omega,2)),intent(out) :: velocity_x,velocity_y - + !f2py intent(in,out) :: velocity_x,velocity_y + call c2c_2d(omega,velocity_x,velocity_y) end subroutine solve_poisson_2d_c @@ -114,7 +116,8 @@ contains subroutine solve_poisson_3d_c(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_Z) complex(pk),dimension(:,:,:),intent(in):: omega_x,omega_y,omega_z complex(pk),dimension(size(omega_x,1),size(omega_y,2),size(omega_z,3)),intent(out) :: velocity_x,velocity_y,velocity_z - + !f2py intent(in,out) :: velocity_x,velocity_y,velocity_z + call c2c_3d(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z) end subroutine solve_poisson_3d_c @@ -126,6 +129,7 @@ contains real(pk), intent(in) :: nudt real(pk),dimension(:,:,:),intent(in):: velocity_x,velocity_y,velocity_z real(pk),dimension(size(velocity_x,1),size(velocity_y,2),size(velocity_z,3)),intent(out) :: omega_x,omega_y,omega_z + !f2py intent(in,out) :: omega_x,omega_y,omega_z call r2c_3d(velocity_x,velocity_y,velocity_z) diff --git a/HySoP/hysop/f2py/scales2py.f90 b/HySoP/hysop/f2py/scales2py.f90 index 40c8fff1e..f8a09d52a 100755 --- a/HySoP/hysop/f2py/scales2py.f90 +++ b/HySoP/hysop/f2py/scales2py.f90 @@ -11,14 +11,16 @@ implicit none contains !> Initialisation of advection solver (from scales) context : create topology and memory buffers - !! @param[in] number of cells in the global discrete domain + !! @param[in] ncells number of cells in the global discrete domain !! @param[in] lengths width of each side of the domain + !! @param[in] topodims number of mpi-processus in each dir !! @param[out] datashape local dimension of the input/output field !! @param[out] offset absolute index of the first component of the local field - subroutine init_advection_solver(ncells,lengths,datashape,offset,dim,order,stab_coeff) + subroutine init_advection_solver(ncells,lengths,topodims,datashape,offset,dim,order,stab_coeff) integer, intent(in) :: dim integer, dimension(dim),intent(in) :: ncells real(pk),dimension(dim), intent(in) :: lengths + integer, dimension(dim), intent(in) :: topodims integer(ik), dimension(dim), intent(out) :: datashape integer(ik), dimension(dim), intent(out) :: offset character(len=*), optional :: order @@ -26,27 +28,20 @@ contains !f2py optional , depend(ncells) :: dim=len(ncells) !f2py intent(hide) dim !f2py character(*) optional, intent(in) :: order = 'p_O2' - - integer :: rank, error, nbprocs, groupSize - integer,dimension(2) :: dims + !f2py, depends(dim), intent(in) :: topodims + integer :: error,groupsize !rank, nbprocs if(dim /= 3) then stop 'Scales advection solver initialisation failed : not yet implemented for 2d problem.' end if - + ! get current process rank - call MPI_COMM_RANK(MPI_COMM_WORLD,rank,error) - call MPI_COMM_SIZE(MPI_COMM_WORLD,nbprocs,error) - - dims = 1 - ! Create scales mpi-topology - if(nbprocs > 1) then - dims(1) = 2 - dims(2) = nbProcs / 2 - end if - groupsize = 5 - call cart_create(dims,error) - call set_group_size(groupSize) + !call MPI_COMM_RANK(MPI_COMM_WORLD,rank,error) + !call MPI_COMM_SIZE(MPI_COMM_WORLD,nbprocs,error) + + !groupsize = 5 + call cart_create(topodims,error) + !call set_group_size(groupSize) ! Create meshes call discretisation_create(ncells(1),ncells(2),ncells(3),lengths(1),lengths(2),lengths(3)) @@ -71,7 +66,8 @@ contains real(pk), intent(in) :: dt real(pk), dimension(:,:,:), intent(in) :: vx, vy, vz real(pk), dimension(size(vx,1),size(vx,2),size(vx,3)), intent(inout) :: scal - + !f2py intent(in,out), depend(size(vx,1)) :: scal + real(pk) :: t0 t0 = MPI_Wtime() diff --git a/HySoP/setup.py.in b/HySoP/setup.py.in index 49414e4c6..513dab02d 100644 --- a/HySoP/setup.py.in +++ b/HySoP/setup.py.in @@ -84,7 +84,7 @@ if(enable_fortran): include_dirs=inc_dir, library_dirs=parmes_libdir, libraries=parmeslib, - define_macros=[('F2PY_REPORT_ON_ARRAY_COPY', '1')]) + define_macros=[('F2PY_REPORT_ON_ARRAY_COPY', '1'),('F2PY_REPORT_ATEXIT','1')]) ext_modules.append(parpyModule) config = Configuration(name=name, diff --git a/HySoP/src/fftw/fft2d.f90 b/HySoP/src/fftw/fft2d.f90 index f01cb2f95..5f99d5e1a 100755 --- a/HySoP/src/fftw/fft2d.f90 +++ b/HySoP/src/fftw/fft2d.f90 @@ -127,9 +127,13 @@ contains call filter_poisson_2d() call fftw_mpi_execute_dft(plan_backward1, dataout1, datain1) - velocity_x(:,:) = datain1(:,:)*normFFT call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2) - velocity_y(:,:) = datain2(:,:)*normFFT + do j = 1, local_resolution(c_Y) + do i = 1, fft_resolution(c_X) + velocity_x(i,j) = datain1(i,j)*normFFT + velocity_y(i,j) = datain2(i,j)*normFFT + end do + end do !!$ do i = 1, fft_resolution(c_X) !!$ write(*,'(a,i5,a,16f10.4)') 'vxx[',rank,'] ', velocity_x(i,1:local_resolution(c_Y)) @@ -230,15 +234,17 @@ contains !> Backward transform subroutine c2r_2d(velocity_x,velocity_y) real(mk),dimension(:,:),intent(inout) :: velocity_x,velocity_y - !!integer(C_INTPTR_T) :: i, j + integer(C_INTPTR_T) :: i, j call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) - - velocity_x(:,:) = rdatain1(:,:)/(fft_resolution(c_X)*fft_resolution(c_Y)) - call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) - velocity_y(:,:) = rdatain2(:,:)/(fft_resolution(c_X)*fft_resolution(c_Y)) - + do j = 1, local_resolution(c_Y) + do i = 1, fft_resolution(c_X) + velocity_x(i,j) = rdatain1(i,j)*normFFT + velocity_y(i,j) = rdatain2(i,j)*normFFT + end do + end do + !!$ !!$ do i = 1, fft_resolution(c_X) !!$ write(*,'(a,i5,a,16f10.4)') 'xx[',rank,'] ', velocity_x(i,1:local_resolution(c_Y)) diff --git a/HySoP/src/fftw/fft3d.f90 b/HySoP/src/fftw/fft3d.f90 index adfbd0e94..e9802ad32 100755 --- a/HySoP/src/fftw/fft3d.f90 +++ b/HySoP/src/fftw/fft3d.f90 @@ -175,9 +175,15 @@ contains call fftw_mpi_execute_dft(plan_backward1, dataout1,datain1) call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2) call fftw_mpi_execute_dft(plan_backward3,dataout3,datain3) - velocity_x(:,:,:) = datain1(:,:,:)*normFFT - velocity_y(:,:,:) = datain2(:,:,:)*normFFT - velocity_z(:,:,:) = datain3(:,:,:)*normFFT + do k =1, local_resolution(c_Z) + do j = 1, fft_resolution(c_Y) + do i = 1, fft_resolution(c_X) + velocity_x(i,j,k) = datain1(i,j,k)*normFFT + velocity_y(i,j,k) = datain2(i,j,k)*normFFT + velocity_z(i,j,k) = datain3(i,j,k)*normFFT + end do + end do + end do end subroutine c2c_3d @@ -289,15 +295,22 @@ contains subroutine c2r_3d(velocity_x,velocity_y,velocity_z) real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z real(mk) :: start + integer(C_INTPTR_T) :: i,j,k start = MPI_WTIME() call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1) call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2) call fftw_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3) print *, "c2r time : ", MPI_WTIME() -start ! copy back to velocity and normalisation - velocity_x(:,:,:) = rdatain1(:,:,:)*normFFT - velocity_y(:,:,:) = rdatain2(:,:,:)*normFFT - velocity_z(:,:,:) = rdatain3(:,:,:)*normFFT + do k =1, local_resolution(c_Z) + do j = 1, fft_resolution(c_Y) + do i = 1, fft_resolution(c_X) + velocity_x(i,j,k) = rdatain1(i,j,k)*normFFT + velocity_y(i,j,k) = rdatain2(i,j,k)*normFFT + velocity_z(i,j,k) = rdatain3(i,j,k)*normFFT + end do + end do + end do end subroutine c2r_3d @@ -395,15 +408,20 @@ contains subroutine c2r_3d_many(velocity_x,velocity_y,velocity_z) real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z real(mk) :: start + integer(C_INTPTR_T) :: i,j,k start = MPI_WTIME() call fftw_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many) print *, "c2r time : ", MPI_WTIME() -start - - ! copy back to velocity and normalisation - velocity_x(:,:,:) = rdatain_many(1,:,:,:)*normFFT - velocity_y(:,:,:) = rdatain_many(2,:,:,:)*normFFT - velocity_z(:,:,:) = rdatain_many(3,:,:,:)*normFFT + do k =1, local_resolution(c_Z) + do j = 1, fft_resolution(c_Y) + do i = 1, fft_resolution(c_X) + velocity_x(i,j,k) = rdatain_many(1,i,j,k)*normFFT + velocity_y(i,j,k) = rdatain_many(2,i,j,k)*normFFT + velocity_z(i,j,k) = rdatain_many(2,i,j,k)*normFFT + end do + end do + end do end subroutine c2r_3d_many diff --git a/HySoP/src/scalesInterface/layout/cart_topology.f90 b/HySoP/src/scalesInterface/layout/cart_topology.f90 index c7fa1e299..ee38a5708 100644 --- a/HySoP/src/scalesInterface/layout/cart_topology.f90 +++ b/HySoP/src/scalesInterface/layout/cart_topology.f90 @@ -366,10 +366,10 @@ subroutine discretisation_init() ! Group of line along X N_group(1,1) = N_proc(2)/group_size(1,1) N_group(1,2) = N_proc(3)/group_size(1,2) - ! Group of line along X + ! Group of line along Y N_group(2,1) = N_proc(1)/group_size(2,1) N_group(2,2) = N_proc(3)/group_size(2,2) - ! Group of line along X + ! Group of line along Z N_group(3,1) = N_proc(1)/group_size(3,1) N_group(3,2) = N_proc(2)/group_size(3,2) ! But not everything is done by groups !! -- GitLab