From cc90ac6861486a7b6212675bed36c87b683a00a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franck=20P=C3=A9rignon?= <franck.perignon@imag.fr>
Date: Mon, 17 Dec 2012 13:29:17 +0000
Subject: [PATCH] Update f2py interf. to avoid hidden copies

---
 HySoP/hysop/f2py/fftw2py.f90                  | 12 ++++--
 HySoP/hysop/f2py/scales2py.f90                | 34 +++++++---------
 HySoP/setup.py.in                             |  2 +-
 HySoP/src/fftw/fft2d.f90                      | 22 ++++++----
 HySoP/src/fftw/fft3d.f90                      | 40 ++++++++++++++-----
 .../scalesInterface/layout/cart_topology.f90  |  4 +-
 6 files changed, 69 insertions(+), 45 deletions(-)

diff --git a/HySoP/hysop/f2py/fftw2py.f90 b/HySoP/hysop/f2py/fftw2py.f90
index 8d394a34e..ce5316448 100755
--- a/HySoP/hysop/f2py/fftw2py.f90
+++ b/HySoP/hysop/f2py/fftw2py.f90
@@ -70,7 +70,8 @@ contains
   subroutine solve_poisson_2d(omega,velocity_x,velocity_y)
     real(pk),dimension(:,:),intent(in):: omega
     real(pk),dimension(size(omega,1),size(omega,2)),intent(out) :: velocity_x,velocity_y
-    
+    !f2py intent(in,out) :: velocity_x,velocity_y
+
     call r2c_2d(omega)
     
     call filter_poisson_2d()
@@ -86,7 +87,7 @@ contains
     real(pk),dimension(:,:,:),intent(in):: omega_x,omega_y,omega_z
     real(pk),dimension(size(omega_x,1),size(omega_y,2),size(omega_z,3)),intent(out) :: velocity_x,velocity_y,velocity_z
     real(pk) :: start
-    
+    !f2py intent(in,out) :: velocity_x,velocity_y,velocity_z    
     start = MPI_WTime()
     call r2c_3d(omega_x,omega_y,omega_z)
     
@@ -103,7 +104,8 @@ contains
   subroutine solve_poisson_2d_c(omega,velocity_x,velocity_y)
     complex(pk),dimension(:,:),intent(in):: omega
     complex(pk),dimension(size(omega,1),size(omega,2)),intent(out) :: velocity_x,velocity_y
-    
+    !f2py intent(in,out) :: velocity_x,velocity_y
+
     call c2c_2d(omega,velocity_x,velocity_y)
       
   end subroutine solve_poisson_2d_c
@@ -114,7 +116,8 @@ contains
   subroutine solve_poisson_3d_c(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_Z)
     complex(pk),dimension(:,:,:),intent(in):: omega_x,omega_y,omega_z
     complex(pk),dimension(size(omega_x,1),size(omega_y,2),size(omega_z,3)),intent(out) :: velocity_x,velocity_y,velocity_z
-    
+    !f2py intent(in,out) :: velocity_x,velocity_y,velocity_z
+
     call c2c_3d(omega_x,omega_y,omega_z,velocity_x,velocity_y,velocity_z)
       
   end subroutine solve_poisson_3d_c
@@ -126,6 +129,7 @@ contains
     real(pk), intent(in) :: nudt
     real(pk),dimension(:,:,:),intent(in):: velocity_x,velocity_y,velocity_z
     real(pk),dimension(size(velocity_x,1),size(velocity_y,2),size(velocity_z,3)),intent(out) :: omega_x,omega_y,omega_z
+    !f2py intent(in,out) :: omega_x,omega_y,omega_z
     
     call r2c_3d(velocity_x,velocity_y,velocity_z)
     
diff --git a/HySoP/hysop/f2py/scales2py.f90 b/HySoP/hysop/f2py/scales2py.f90
index 40c8fff1e..f8a09d52a 100755
--- a/HySoP/hysop/f2py/scales2py.f90
+++ b/HySoP/hysop/f2py/scales2py.f90
@@ -11,14 +11,16 @@ implicit none
 contains
 
   !> Initialisation of advection solver (from scales) context : create topology and memory buffers
-  !! @param[in] number of cells in the global discrete domain
+  !! @param[in] ncells number of cells in the global discrete domain
   !! @param[in] lengths width of each side of the domain
+  !! @param[in] topodims number of mpi-processus in each dir
   !! @param[out] datashape local dimension of the input/output field
   !! @param[out] offset absolute index of the first component of the local field
-  subroutine init_advection_solver(ncells,lengths,datashape,offset,dim,order,stab_coeff)
+  subroutine init_advection_solver(ncells,lengths,topodims,datashape,offset,dim,order,stab_coeff)
     integer, intent(in) :: dim 
     integer, dimension(dim),intent(in) :: ncells
     real(pk),dimension(dim), intent(in) :: lengths
+    integer, dimension(dim), intent(in) :: topodims
     integer(ik), dimension(dim), intent(out) :: datashape
     integer(ik), dimension(dim), intent(out) :: offset
     character(len=*), optional ::  order
@@ -26,27 +28,20 @@ contains
     !f2py optional , depend(ncells) :: dim=len(ncells)
     !f2py intent(hide) dim
     !f2py character(*) optional, intent(in) :: order = 'p_O2'
-    
-    integer :: rank, error, nbprocs, groupSize
-    integer,dimension(2) :: dims
+    !f2py, depends(dim), intent(in) :: topodims
+    integer :: error,groupsize !rank, nbprocs
 
     if(dim /= 3) then
        stop 'Scales advection solver initialisation failed : not yet implemented for 2d problem.'
     end if
-    
+
     ! get current process rank
-    call MPI_COMM_RANK(MPI_COMM_WORLD,rank,error)
-    call MPI_COMM_SIZE(MPI_COMM_WORLD,nbprocs,error)
- 
-    dims = 1
-    ! Create scales mpi-topology 
-    if(nbprocs > 1) then
-       dims(1) = 2
-       dims(2) = nbProcs / 2
-    end if
-    groupsize = 5
-    call cart_create(dims,error)
-    call set_group_size(groupSize)
+    !call MPI_COMM_RANK(MPI_COMM_WORLD,rank,error)
+    !call MPI_COMM_SIZE(MPI_COMM_WORLD,nbprocs,error)
+
+    !groupsize = 5
+    call cart_create(topodims,error)
+    !call set_group_size(groupSize)
     ! Create meshes
     call discretisation_create(ncells(1),ncells(2),ncells(3),lengths(1),lengths(2),lengths(3))
     
@@ -71,7 +66,8 @@ contains
     real(pk), intent(in) :: dt
     real(pk), dimension(:,:,:), intent(in) :: vx, vy, vz
     real(pk), dimension(size(vx,1),size(vx,2),size(vx,3)), intent(inout) :: scal
-    
+    !f2py intent(in,out), depend(size(vx,1)) :: scal
+
     real(pk) :: t0
 
     t0 = MPI_Wtime()
diff --git a/HySoP/setup.py.in b/HySoP/setup.py.in
index 49414e4c6..513dab02d 100644
--- a/HySoP/setup.py.in
+++ b/HySoP/setup.py.in
@@ -84,7 +84,7 @@ if(enable_fortran):
                             include_dirs=inc_dir,
                             library_dirs=parmes_libdir,
                             libraries=parmeslib,
-                            define_macros=[('F2PY_REPORT_ON_ARRAY_COPY', '1')])
+                            define_macros=[('F2PY_REPORT_ON_ARRAY_COPY', '1'),('F2PY_REPORT_ATEXIT','1')])
     ext_modules.append(parpyModule)
 
 config = Configuration(name=name,
diff --git a/HySoP/src/fftw/fft2d.f90 b/HySoP/src/fftw/fft2d.f90
index f01cb2f95..5f99d5e1a 100755
--- a/HySoP/src/fftw/fft2d.f90
+++ b/HySoP/src/fftw/fft2d.f90
@@ -127,9 +127,13 @@ contains
     call filter_poisson_2d()
         
     call fftw_mpi_execute_dft(plan_backward1, dataout1, datain1)
-    velocity_x(:,:) = datain1(:,:)*normFFT
     call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2)
-    velocity_y(:,:) = datain2(:,:)*normFFT
+    do j = 1, local_resolution(c_Y)
+       do i = 1, fft_resolution(c_X)
+          velocity_x(i,j) = datain1(i,j)*normFFT
+          velocity_y(i,j) = datain2(i,j)*normFFT
+       end do
+    end do
     
 !!$    do i = 1, fft_resolution(c_X)
 !!$       write(*,'(a,i5,a,16f10.4)') 'vxx[',rank,'] ', velocity_x(i,1:local_resolution(c_Y))
@@ -230,15 +234,17 @@ contains
   !> Backward transform
   subroutine c2r_2d(velocity_x,velocity_y)
     real(mk),dimension(:,:),intent(inout) :: velocity_x,velocity_y
-    !!integer(C_INTPTR_T) :: i, j
+    integer(C_INTPTR_T) :: i, j
 
     call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1)
-    
-    velocity_x(:,:) = rdatain1(:,:)/(fft_resolution(c_X)*fft_resolution(c_Y))
-
     call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2)
-    velocity_y(:,:) = rdatain2(:,:)/(fft_resolution(c_X)*fft_resolution(c_Y))
-    
+    do j = 1, local_resolution(c_Y)
+       do i = 1, fft_resolution(c_X)
+          velocity_x(i,j) = rdatain1(i,j)*normFFT
+          velocity_y(i,j) = rdatain2(i,j)*normFFT
+       end do
+    end do
+
 !!$
 !!$    do i = 1, fft_resolution(c_X)
 !!$       write(*,'(a,i5,a,16f10.4)') 'xx[',rank,'] ', velocity_x(i,1:local_resolution(c_Y))
diff --git a/HySoP/src/fftw/fft3d.f90 b/HySoP/src/fftw/fft3d.f90
index adfbd0e94..e9802ad32 100755
--- a/HySoP/src/fftw/fft3d.f90
+++ b/HySoP/src/fftw/fft3d.f90
@@ -175,9 +175,15 @@ contains
     call fftw_mpi_execute_dft(plan_backward1, dataout1,datain1)
     call fftw_mpi_execute_dft(plan_backward2,dataout2,datain2)
     call fftw_mpi_execute_dft(plan_backward3,dataout3,datain3)
-    velocity_x(:,:,:) = datain1(:,:,:)*normFFT
-    velocity_y(:,:,:) = datain2(:,:,:)*normFFT
-    velocity_z(:,:,:) = datain3(:,:,:)*normFFT
+    do k =1, local_resolution(c_Z)
+       do j = 1, fft_resolution(c_Y)
+          do i = 1, fft_resolution(c_X)
+             velocity_x(i,j,k) = datain1(i,j,k)*normFFT
+             velocity_y(i,j,k) = datain2(i,j,k)*normFFT
+             velocity_z(i,j,k) = datain3(i,j,k)*normFFT
+          end do
+       end do
+    end do
 
   end subroutine c2c_3d
 
@@ -289,15 +295,22 @@ contains
   subroutine c2r_3d(velocity_x,velocity_y,velocity_z)
     real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z
     real(mk) :: start
+    integer(C_INTPTR_T) :: i,j,k
     start = MPI_WTIME()
     call fftw_mpi_execute_dft_c2r(plan_backward1,dataout1,rdatain1)
     call fftw_mpi_execute_dft_c2r(plan_backward2,dataout2,rdatain2)
     call fftw_mpi_execute_dft_c2r(plan_backward3,dataout3,rdatain3)
     print *, "c2r time : ", MPI_WTIME() -start
     ! copy back to velocity and normalisation
-    velocity_x(:,:,:) = rdatain1(:,:,:)*normFFT
-    velocity_y(:,:,:) = rdatain2(:,:,:)*normFFT
-    velocity_z(:,:,:) = rdatain3(:,:,:)*normFFT
+    do k =1, local_resolution(c_Z)
+       do j = 1, fft_resolution(c_Y)
+          do i = 1, fft_resolution(c_X)
+             velocity_x(i,j,k) = rdatain1(i,j,k)*normFFT
+             velocity_y(i,j,k) = rdatain2(i,j,k)*normFFT
+             velocity_z(i,j,k) = rdatain3(i,j,k)*normFFT
+          end do
+       end do
+    end do
   
   end subroutine c2r_3d
 
@@ -395,15 +408,20 @@ contains
   subroutine c2r_3d_many(velocity_x,velocity_y,velocity_z)
     real(mk),dimension(:,:,:),intent(inout) :: velocity_x,velocity_y,velocity_z
     real(mk) :: start
+    integer(C_INTPTR_T) :: i,j,k
 
     start = MPI_WTIME()
     call fftw_mpi_execute_dft_c2r(plan_backward1,dataout_many,rdatain_many)
     print *, "c2r time : ", MPI_WTIME() -start
-    
-    ! copy back to velocity and normalisation
-    velocity_x(:,:,:) = rdatain_many(1,:,:,:)*normFFT
-    velocity_y(:,:,:) = rdatain_many(2,:,:,:)*normFFT
-    velocity_z(:,:,:) = rdatain_many(3,:,:,:)*normFFT
+    do k =1, local_resolution(c_Z)
+       do j = 1, fft_resolution(c_Y)
+          do i = 1, fft_resolution(c_X)
+             velocity_x(i,j,k) = rdatain_many(1,i,j,k)*normFFT
+             velocity_y(i,j,k) = rdatain_many(2,i,j,k)*normFFT
+             velocity_z(i,j,k) = rdatain_many(2,i,j,k)*normFFT
+          end do
+       end do
+    end do
 
   end subroutine c2r_3d_many
   
diff --git a/HySoP/src/scalesInterface/layout/cart_topology.f90 b/HySoP/src/scalesInterface/layout/cart_topology.f90
index c7fa1e299..ee38a5708 100644
--- a/HySoP/src/scalesInterface/layout/cart_topology.f90
+++ b/HySoP/src/scalesInterface/layout/cart_topology.f90
@@ -366,10 +366,10 @@ subroutine discretisation_init()
     ! Group of line along X
     N_group(1,1) = N_proc(2)/group_size(1,1)
     N_group(1,2) = N_proc(3)/group_size(1,2)
-    ! Group of line along X
+    ! Group of line along Y
     N_group(2,1) = N_proc(1)/group_size(2,1)
     N_group(2,2) = N_proc(3)/group_size(2,2)
-    ! Group of line along X
+    ! Group of line along Z
     N_group(3,1) = N_proc(1)/group_size(3,1)
     N_group(3,2) = N_proc(2)/group_size(3,2)
 ! But not everything is done by groups !!
-- 
GitLab