diff --git a/HySoP/src/scalesInterface/layout/cart_topology.f90 b/HySoP/src/scalesInterface/layout/cart_topology.f90 index 63554dee5376939eb3dadad690ca59ee66eb0799..b388f631fe81ad211c4b501a2860ba90d27c8b9b 100644 --- a/HySoP/src/scalesInterface/layout/cart_topology.f90 +++ b/HySoP/src/scalesInterface/layout/cart_topology.f90 @@ -74,7 +74,9 @@ module cart_topology !> Table of the previous communicators (ie comm devoted to 1D subgrids) integer, dimension(3), protected :: D_comm !> Rank of immediate neighbors - integer,dimension(3,-2:2),protected :: neighbors + integer,dimension(3,-4:4),protected :: neighbors + !> Rank of immediate neighbors + integer,dimension(1:3,-1:1),protected :: neighbors_cart_topo=0 ! ----- Information about current MPI processus and MPI topology !> number of processes in each direction @@ -93,8 +95,6 @@ module cart_topology ! ------ Information about mesh subdivision and on the global grid ----- !> information about local mesh - for scalar type(cartesian_mesh), protected :: mesh_sc - !> REcopy of mesh_sc%N_proc for python interface - integer, dimension(3) :: N_proc !> Computation are done by group of line. Here we define their size integer, dimension(3,2), protected :: group_size !> To check if group size is initialized @@ -246,14 +246,19 @@ subroutine cart_create(dims, ierr, spec_comm, topology) ! --- Initialise information about the current processus --- call mpi_comm_rank(cart_comm, cart_rank, ierr) do direction = 1, 3 + !neighbors on 1D topology call mpi_comm_rank(D_comm(direction), D_rank(direction), ierr) call mpi_cart_shift(D_comm(direction), 0, 1, neighbors(direction,-1), neighbors(direction,1), ierr) call mpi_cart_shift(D_comm(direction), 0, 2, neighbors(direction,-2), neighbors(direction,2), ierr) + call mpi_cart_shift(D_comm(direction), 0, 3, neighbors(direction,-3), neighbors(direction,3), ierr) + call mpi_cart_shift(D_comm(direction), 0, 3, neighbors(direction,-4), neighbors(direction,4), ierr) neighbors(direction,0) = D_rank(direction) + !neighbors on 3D topology + call mpi_cart_shift(cart_comm , direction-1, 1, neighbors_cart_topo(direction,-1), neighbors_cart_topo(direction,1), ierr) + neighbors_cart_topo(direction,0) = cart_rank end do call mpi_cart_coords(cart_comm, cart_rank, 3, coord, ierr) coordYZ = (/ coord(2), coord(3) /) - ! --- Spectral context --- ! Initialized the communicator used on which the spectral part ! will be based. @@ -346,7 +351,6 @@ subroutine discretisation_create(Nx, Ny, Nz, Lx, Ly, Lz, verbosity) mesh_sc%length(3)= Lz mesh_sc%N_proc = mesh_sc%N / nb_proc_dim - N_proc = mesh_sc%N_proc mesh_sc%relative_extend(:,1) = 1 mesh_sc%relative_extend(:,2) = mesh_sc%N_proc @@ -378,7 +382,6 @@ subroutine discretisation_default(verbosity) mesh_sc%N = default_size mesh_sc%length = 1. mesh_sc%N_proc = mesh_sc%N / nb_proc_dim - N_proc = mesh_sc%N_proc mesh_sc%relative_extend(:,1) = 1 mesh_sc%relative_extend(:,2) = mesh_sc%N_proc diff --git a/HySoP/src/scalesInterface/particles/advec.f90 b/HySoP/src/scalesInterface/particles/advec.f90 index 5078f22c064b72a249ac3d1d3858e5d99a3abc54..5d15d869b6e0c5cf45b8014699c7a92f612c1a81 100644 --- a/HySoP/src/scalesInterface/particles/advec.f90 +++ b/HySoP/src/scalesInterface/particles/advec.f90 @@ -116,14 +116,10 @@ subroutine advec_init(order, stab_coeff, verbosity, dim_split) select case(dim_splitting) case('classic') - write(*,'(6x,a)') '========== CLASSIC Splitting =========' - write(*,'(6x,a)') '======================================' advec_step => advec_step_Torder1 ! Compute stability coefficient if (present(stab_coeff)) stab_coeff = 1.0/(2.0*dble(bl_size)) case default ! Strang - write(*,'(6x,a)') '========== STRANG Splitting =========' - write(*,'(6x,a)') '======================================' advec_step => advec_step_Torder2 ! Compute stability coefficient - as each dimension is solved in ! dt/2, stab_coef is 2 times bigger @@ -140,8 +136,8 @@ subroutine advec_init(order, stab_coeff, verbosity, dim_split) advec_remesh => AC_remesh_limit_lambda_group ! limited and corrected lambda 2 case('p_M4') advec_remesh => AC_remesh_Mprime_group ! Xremesh_Mprime4 - ! Check if interface with parmes is done. Ok in Scales. But needs to - ! get diffusion information from Parmes. + ! Check if interface is done. Ok in Scales. But needs to + ! get diffusion information. Ok for advec_plus variant, but here ? !case('d_M4') ! advec_remesh_plus => AC_remesh_Mprime_group ! Xremesh_Mprime4 with diffusion case('p_M6') @@ -211,6 +207,96 @@ subroutine advec_setup_alongZ() line_dir = 3 end subroutine advec_setup_alongZ +!> Solve advection equation - order 2 - with basic velocity interpolation +!! @param[in] dt = time step +!! @param[in] Vx = velocity along x (could be discretised on a bigger mesh then the scalar) +!! @param[in] Vy = velocity along y +!! @param[in] Vz = velocity along z +!! @param[in,out] scal = scalar field to advect +subroutine advec_step_Inter_basic(dt, Vx, Vy, Vz, scal) + + ! Input/Output + real(WP), intent(in) :: dt + real(WP), dimension(:,:,:), intent(in) :: Vx, Vy, Vz + real(WP), dimension(:,:,:), intent(inout) :: scal + ! Local + real(WP), dimension(:,:,:), allocatable :: Vx_f, Vy_f, Vz_f + integer :: ierr ! Error code. + + allocate(Vx_f(mesh_sc%N_proc(1),mesh_sc%N_proc(2),mesh_sc%N_proc(3)),stat=ierr) + if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vx_f' + allocate(Vy_f(mesh_sc%N_proc(1),mesh_sc%N_proc(2),mesh_sc%N_proc(3)),stat=ierr) + if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vy_f' + allocate(Vz_f(mesh_sc%N_proc(1),mesh_sc%N_proc(2),mesh_sc%N_proc(3)),stat=ierr) + if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vz_f' + + call Interpol_3D(Vx, mesh_V%dx, Vx_f, mesh_sc%dx) + call Interpol_3D(Vy, mesh_V%dx, Vy_f, mesh_sc%dx) + call Interpol_3D(Vz, mesh_V%dx, Vz_f, mesh_sc%dx) + if (cart_rank==0) write(6,'(a)') ' [INFO PARTICLES] Interpolation done' + + call advec_step_Torder2(dt, Vx_f, Vy_f, Vz_f, scal) + + deallocate(Vx_f) + deallocate(Vy_f) + deallocate(Vz_f) + +end subroutine advec_step_Inter_basic + + +!> Solve advection equation - order 2 - with more complex velocity interpolation +!! @param[in] dt = time step +!! @param[in] Vx = velocity along x (could be discretised on a bigger mesh then the scalar) +!! @param[in] Vy = velocity along y +!! @param[in] Vz = velocity along z +!! @param[in,out] scal = scalar field to advect +subroutine advec_step_Inter_Two(dt, Vx, Vy, Vz, scal) + + ! Input/Output + real(WP), intent(in) :: dt + real(WP), dimension(:,:,:), intent(in) :: Vx, Vy, Vz + real(WP), dimension(:,:,:), intent(inout) :: scal + ! Local + real(WP), dimension(:,:,:), allocatable :: Vx_c, Vy_c, Vz_c + real(WP), dimension(:,:,:), allocatable :: Vx_f, Vy_f, Vz_f + integer :: ierr ! Error code. + + allocate(Vx_c(mesh_V%N_proc(1),mesh_sc%N_proc(2),mesh_sc%N_proc(3)),stat=ierr) + if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vx_c' + allocate(Vx_f(mesh_sc%N_proc(1),mesh_sc%N_proc(2),mesh_sc%N_proc(3)),stat=ierr) + if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vx_f' + allocate(Vy_c(mesh_V%N_proc(2),mesh_sc%N_proc(1),mesh_sc%N_proc(3)),stat=ierr) + if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vy_c' + allocate(Vy_f(mesh_sc%N_proc(2),mesh_sc%N_proc(1),mesh_sc%N_proc(3)),stat=ierr) + if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vy_f' + allocate(Vz_c(mesh_V%N_proc(3),mesh_sc%N_proc(1),mesh_sc%N_proc(2)),stat=ierr) + if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vz_c' + allocate(Vz_f(mesh_sc%N_proc(3),mesh_sc%N_proc(1),mesh_sc%N_proc(2)),stat=ierr) + if (ierr/=0) write(6,'(a,i0,a)') '[ERROR]Â on cart_rank ', cart_rank, ' - not enough memory for Vz_f' + + call Interpol_2D_3D_vect(mesh_sc%dx, mesh_V%dx, Vx, Vy, Vz, Vx_c, Vx_f, Vy_c, Vy_f, Vz_c, Vz_f) + + call advec_setup_alongX() + call advec_X_basic_no_com(dt/2.0, gsX, Vx_f, scal) + call advec_setup_alongY() + call advec_1D_Vcoarse(dt/2.0, gsY, Vy_c, Vy_f, scal) + call advec_setup_alongZ() + call advec_1D_Vcoarse(dt/2.0, gsZ, Vz_c, Vz_f, scal) + call advec_1D_Vcoarse(dt/2.0, gsZ, Vz_c, Vz_f, scal) + call advec_setup_alongY() + call advec_1D_Vcoarse(dt/2.0, gsY, Vy_c, Vy_f, scal) + call advec_setup_alongX() + call advec_X_basic_no_com(dt/2.0, gsX, Vx_f, scal) + + deallocate(Vx_f) + deallocate(Vy_f) + deallocate(Vz_f) + + deallocate(Vx_c) + deallocate(Vy_c) + deallocate(Vz_c) + +end subroutine advec_step_Inter_Two !> Solve advection equation - order 1 in time (order 2 dimensional splitting) !! @param[in] dt = time step diff --git a/HySoP/src/scalesInterface/particles/interpolation_velo.F90 b/HySoP/src/scalesInterface/particles/interpolation_velo.F90 index 229573d1ab9c32f328297f624b605a76c465f35f..94b76df362bd6f38b2b74c153b846767b1a0f84f 100644 --- a/HySoP/src/scalesInterface/particles/interpolation_velo.F90 +++ b/HySoP/src/scalesInterface/particles/interpolation_velo.F90 @@ -53,7 +53,7 @@ module Interpolation_velo !> Generic subroutine (pointer intialize to the choosen formula) procedure(weight_M4), pointer, public :: get_weight => null() !> Specific interpolation formula - public :: weight_M4, weight_Mprime4, weight_Lambda4_4 + public :: weight_M4, weight_Mprime4, weight_Lambda4_4, weight_linear ! ===== Private variables ===== character(len=4), protected :: interpol = 'Mp4' @@ -87,6 +87,13 @@ subroutine interpol_init(formula, verbose) if(present(verbose)) verbosity = verbose select case(trim(interpol)) + case('lin') + stencil_size = 2 + stencil_d = 1 + stencil_g = 0 + get_weight => weight_linear + if ((cart_rank==0).and.(verbosity)) & + & write(*,'(6x,a)') '============= Interpolation = linear ===========' case('L4_4') stencil_size = 6 stencil_d = 3 @@ -189,7 +196,7 @@ end subroutine Interpol_3D ! ==================== 2D interpolation ==================== ! ======================================================================== -!> Interpolate a field (ordannate along X,Y,Z) along X and Y-axis +!> Interpolate a field (ordonnate along X,Y,Z) along X and Y-axis !! @param[in] V_coarse = velocity to interpolate along the last direction !! @param[in] dx_c = space step on the coarse grid (for last direction) !! @param[in] V_fine = interpolated velocity @@ -404,49 +411,99 @@ subroutine Inter_LastDir_com(dir, V_coarse, dx_c, V_fine, dx_f) real(WP), dimension(:,:,:), allocatable :: V_beg, V_end ! ghost values of velocity real(WP), dimension(stencil_size) :: weight ! interpolation weight integer :: i,ind,i_bis, V_ind ! some loop indices - integer :: ind_max, ind_min, ind_limit + integer :: ind_max, ind_min, ind_limit, ind_limit_2 real(WP) :: pos integer :: N_coarse, N_fine ! number of grid points + integer :: com_pos ! to deal with multiple communications - if (stencil size)/2 > local size of coarse grid + ! = position where ghost values are recieved in V_beg and V_end + integer, dimension(2) :: com_nb ! number of communcation (if (stencil size)/2 > local size of coarse grid) integer, dimension(2) :: com_size ! size of mpi communication for ghost points - integer, dimension(2) :: rece_request ! mpi communication request (handle) of nonblocking receive - integer, dimension(MPI_STATUS_SIZE) :: status ! mpi status (for mpi_wait) integer :: ierr ! mpi error code + integer, dimension(:),allocatable :: beg_request ! mpi communication request (handle) of nonblocking receive + integer, dimension(:),allocatable :: end_request ! mpi communication request (handle) of nonblocking receive + real(WP), parameter :: eps = 1e-6 ! Initialisation - com_size = size(V_coarse,1)*size(V_coarse,2) - com_size(1) = com_size(1)*(stencil_g) - com_size(2) = com_size(2)*(stencil_d) + com_size(1) = size(V_coarse,1)*size(V_coarse,2) N_coarse = size(V_coarse,3) N_fine = size(V_fine,3) - ! ind_max = max(indice ind on fine grid as V_fine(i) can be computed without communnication) - ! = max{ind : V_ind=floor[(ind-1)*dx_f/dx_c]+1 <= (N_coarse-stencil_d)} - ! = max{ind : V_ind=floor[(ind-1)*dx_f/dx_c]+1 < (N_coarse-stencil_d+1)} - ! = max{ind : pos=(ind-1)*dx_f < [(N_coarse-stencil_d+1)-1]*dx_c} - ! = max{ind : pos=(ind-1) < [(N_coarse-stencil_d+1)-1]*dx_c/dx_f} - ! = max{ind : pos=ind < [(N_coarse-stencil_d+1)-1]*dx_c/dx_f + 1} - ind_max = ceiling((N_coarse-stencil_d)*dx_c/dx_f) - 1 - ind_min = ceiling((stencil_g)*dx_c/dx_f)+1 + ! ind_max = max(indice ind on fine grid as V_fine(i) can be computed without communication) + ! = max{ind : V_ind=floor[(ind-1)*dx_f/dx_c]+1 <= (N_coarse-stencil_d) } + ! = max{ind : V_ind=floor[(ind-1)*dx_f/dx_c] <= (N_coarse-stencil_d-1) } + ! = max{ind : pos = (ind-1)*dx_f/dx_c < (N_coarse-stencil_d-1)+1 } + ! = max{ind : (ind-1) < (N_coarse-stencil_d)*dx_c/dx_f } + ! = max{ind : ind < [(N_coarse-stencil_d)*(dx_c/dx_f)]+1} + ! Define real_max = [(N_coarse-stencil_d)*(dx_c/dx_f)] as a real. One gets: + ! ind_max = max{ind integer as : ind < real_max+1} and thus + ! ind_max = real_max if real_max is an integer + ! floor(real_max+1) else + ! ie ind_max = ceiling(real_max) + !ind_max = ceiling((N_coarse-stencil_d)*dx_c/dx_f) + ind_max = ceiling(((N_coarse-stencil_d)*dx_c/dx_f)-eps) ! To avoid numerical error and thus segmentation fault + ! ind_min = min(indice ind on fine grid as V_fine(i) can be computed without communication) + ! = min{ind : V_ind=floor[(ind-1)*dx_f/dx_c]+1 - stencil_g > 0} + ! = min{ind : V_ind=floor[(ind-1)*dx_f/dx_c] > stencil_g -1 } + ! = min{ind : V_ind=floor[(ind-1)*dx_f/dx_c] >= stencil_g } + ! = min{ind : (ind-1)*dx_f/dx_c >= stencil_g } + ! = min{ind : pos= (ind-1)*dx_f >= stencil_g*dx_c} + ! = min{ind : ind >= (stencil_g*dx_c/dx_f) + 1} + ! = ceiling[(stencil_g*dx_c/dx_f) + 1] + ind_min = ceiling((stencil_g)*dx_c/dx_f)+1 ! here numerical truncature can not lead to seg. fault ! ==== Communication ==== if(stencil_g>0) then allocate(V_beg(size(V_coarse,1),size(V_coarse,2),stencil_g)) - ! Initiate non blocking receive - call Mpi_Irecv(V_beg(1,1,1),com_size(1),MPI_DOUBLE_PRECISION, & - & neighbors(dir,-1), 1, D_comm(dir), rece_request(1), ierr) - ! Send data - call Mpi_Send(V_coarse(:,:,N_coarse-stencil_g+1),com_size(1),MPI_DOUBLE_PRECISION, & - & neighbors(dir,1), 1, D_comm(dir), ierr) + com_nb(1) = ceiling(real(stencil_g)/N_coarse) ! number of required communication to get ghost + allocate(beg_request(com_nb(1))) + com_pos = stencil_g+1 ! i = 1 + missing (or remainding) ghost lines + com_size(2) = com_size(1)*N_coarse + ! Except for last communication, send all local coarse data. + ! Note that it happen if local coarse grid containt less than (stencil_size/2) + ! points along current direction (ie if coarse grid is very coarse) + do ind = 1, com_nb(1)-1 + com_pos = com_pos - N_coarse ! = 1 + missing ghost lines after this step + ! Communication + call Mpi_Irecv(V_beg(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,-ind), 100+ind, D_comm(dir), beg_request(ind), ierr) + call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,ind), 100+ind, D_comm(dir), ierr) + end do + ! Last communication to complete "right" ghost (begining points) + ! We use that missing ghost lines = com_pos - 1 + com_size(2) = com_size(1)*(com_pos-1) + call Mpi_Irecv(V_beg(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,-com_nb(1)), 1, D_comm(dir), beg_request(com_nb(1)), ierr) + call Mpi_Send(V_coarse(1,1,N_coarse-com_pos+2),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,com_nb(1)), 1, D_comm(dir), ierr) end if if(stencil_d>0) then allocate(V_end(size(V_coarse,1),size(V_coarse,2),stencil_d)) - ! Initiate non blocking receive - call Mpi_Irecv(V_end(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, & - & neighbors(dir,1), 2, D_comm(dir), rece_request(2), ierr) + com_nb(2) = ceiling(real(stencil_d)/N_coarse) ! number of required communication to get ghost + allocate(end_request(com_nb(2))) + com_pos = 1 ! Reception from next processus is done in position 1 + com_size(2) = com_size(1)*N_coarse + ! Except for last communication, send all local coarse data. + ! Note that it happen if local coarse grid containt less than (stencil_size/2) + ! points along current direction (ie if coarse grid is very coarse) + do ind = 1, com_nb(2)-1 + ! Communication + call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,ind), 200+ind, D_comm(dir), end_request(ind), ierr) + call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,-ind), 200+ind, D_comm(dir), ierr) + ! next com_pos = (ind*N_coarse)+1 = com_pos + N_coarse + com_pos = com_pos + N_coarse + end do + ! Last step + ! Note that: missing ghost lines = stencil_d - (com_nb-1)*N_coarse + com_size(2) = com_size(1)*(stencil_d-((com_nb(2)-1)*N_coarse)) + ! Perform communication + call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,com_nb(2)), 2, D_comm(dir), end_request(com_nb(2)), ierr) ! Send data - call Mpi_Send(V_coarse(:,:,1),com_size(2),MPI_DOUBLE_PRECISION, & - & neighbors(dir,-1), 2, D_comm(dir), ierr) - else + call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,-com_nb(2)), 2, D_comm(dir), ierr) end if ! ==== Interpolation ==== @@ -461,12 +518,20 @@ subroutine Inter_LastDir_com(dir, V_coarse, dx_c, V_fine, dx_f) V_fine(:,:,ind) = V_fine(:,:,ind) + weight(i+1)*V_coarse(:,:,V_ind+i) end do end do + ! -- Wait for communication completion before dealing with the end -- + if(stencil_g>0) then + call mpi_waitall(com_nb(1),beg_request, MPI_STATUSES_IGNORE, ierr) + deallocate(beg_request) + end if + if(stencil_d>0) then + call mpi_waitall(com_nb(2),end_request, MPI_STATUSES_IGNORE, ierr) + deallocate(end_request) + end if ! -- For begining -- - if(stencil_g>0) call mpi_wait(rece_request(1), status, ierr) - ! Use that interpolation formula are exact + ! Use that interpolation formula are exact - no computation for the first point of each line V_fine(:,:,1) = V_coarse(:,:,1) ! For other first points - do ind = 2, ind_min-1 + do ind = 2, min(ind_min-1, N_fine) ! Be carful, in some massively parrallel context, ind_min could bigger than N_fine +1 pos = (ind-1)*dx_f/dx_c V_ind = floor(pos)+1 call get_weight(pos-V_ind+1, weight) @@ -478,14 +543,22 @@ subroutine Inter_LastDir_com(dir, V_coarse, dx_c, V_fine, dx_f) !V_fine(:,:,ind) = V_fine(:,:,ind) + weight(i)*V_beg(:,:,V_ind+stencil_g+i) V_fine(:,:,ind) = V_fine(:,:,ind) + weight(i)*V_beg(:,:,V_ind-1+i) ! first point in V_beg stands for 1-stencil_g position end do - ! We look for first local value of V_coarse at position (:,:,1) ! (array starts at 1) - do i_bis = ind_limit+1, stencil_size + ! If N_coarse < stencil_size, last interpolation points does not belong to local domain + ! but to domain of processus of coordinnates = (mine+1) inside the mpi-topology. + ! Then we search in V_end for values at last interpolation point, + ind_limit_2 = min(stencil_size, N_coarse+ind_limit) ! for very coarse grid, stencil size could be bigger than N_coarse + do i_bis = ind_limit+1, ind_limit_2 + ! We look for first local value of V_coarse at position (:,:,1) ! (array starts at 1) V_fine(:,:,ind) = V_fine(:,:,ind) + weight(i_bis)*V_coarse(:,:,i_bis-ind_limit) end do + ! Values in V_end + do i_bis = ind_limit_2+1, stencil_size + V_fine(:,:,ind) = V_fine(:,:,ind) + weight(i_bis)*V_end(:,:,i_bis-ind_limit_2) + end do end do ! -- For point of at the end of a line along the current direction -- - if(stencil_d>0) call mpi_wait(rece_request(2), status, ierr) - do ind = ind_max+1, N_fine + ! If ind_max<= ind_min (ie if stencil_size>N_coarse), computation are already done for first point + do ind = max(ind_max+1,ind_min), N_fine pos = (ind-1)*dx_f/dx_c V_ind = floor(pos)+1 call get_weight(pos-V_ind+1, weight) @@ -534,40 +607,75 @@ subroutine Inter_LastDir_Permut_com(dir, V_coarse, dx_c, V_fine, dx_f) integer :: ind_max, ind_min, ind_limit, ind_limit_2 real(WP) :: pos integer :: N_coarse, N_fine ! number of grid points + integer :: com_pos ! to deal with multiple communications - if (stencil size)/2 > local size of coarse grid + ! = position where ghost values are recieved in V_beg and V_end + integer, dimension(2) :: com_nb ! number of communcation (if (stencil size)/2 > local size of coarse grid) integer, dimension(2) :: com_size ! size of mpi communication for ghost points - integer, dimension(2) :: rece_request ! mpi communication request (handle) of nonblocking receive - integer, dimension(MPI_STATUS_SIZE) :: status ! mpi status (for mpi_wait) integer :: ierr ! mpi error code + integer, dimension(:),allocatable :: beg_request ! mpi communication request (handle) of nonblocking receive + integer, dimension(:),allocatable :: end_request ! mpi communication request (handle) of nonblocking receive ! Initialisation - com_size = size(V_coarse,1)*size(V_coarse,2) - com_size(1) = com_size(1)*(stencil_g) - com_size(2) = com_size(2)*(stencil_d) + com_size(1) = size(V_coarse,1)*size(V_coarse,2) N_coarse = size(V_coarse,3) N_fine = size(V_fine,2) - ind_max = ceiling((N_coarse-stencil_d)*dx_c/dx_f) - 1 +ind_max = ceiling(((N_coarse-stencil_d)*dx_c/dx_f)-1e-6) ! To avoid numerical error and thus segmentation fault ind_min = ceiling((stencil_g)*dx_c/dx_f)+1 ! ==== Communication ==== if(stencil_g>0) then allocate(V_beg(size(V_coarse,1),size(V_coarse,2),stencil_g)) - ! Initiate non blocking receive - call Mpi_Irecv(V_beg(1,1,1),com_size(1),MPI_DOUBLE_PRECISION, & - & neighbors(dir,-1), 1, D_comm(dir), rece_request(1), ierr) - ! Send data - call Mpi_Send(V_coarse(1,1,N_coarse-stencil_g+1),com_size(1),MPI_DOUBLE_PRECISION, & - & neighbors(dir,1), 1, D_comm(dir), ierr) + com_nb(1) = ceiling(real(stencil_g)/N_coarse) ! number of required communication to get ghost + allocate(beg_request(com_nb(1))) + com_pos = stencil_g+1 ! i = 1 + missing (or remainding) ghost lines + com_size(2) = com_size(1)*N_coarse + ! Except for last communication, send all local coarse data. + ! Note that it happen if local coarse grid containt less than (stencil_size/2) + ! points along current direction (ie if coarse grid is very coarse) + do ind = 1, com_nb(1)-1 + com_pos = com_pos - N_coarse ! = 1 + missing ghost lines after this step + ! Communication + call Mpi_Irecv(V_beg(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,-ind), 100+ind, D_comm(dir), beg_request(ind), ierr) + call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,ind), 100+ind, D_comm(dir), ierr) + end do + ! Last communication to complete "right" ghost (begining points) + ! We use that missing ghost lines = com_pos - 1 + com_size(2) = com_size(1)*(com_pos-1) + call Mpi_Irecv(V_beg(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,-com_nb(1)), 1, D_comm(dir), beg_request(com_nb(1)), ierr) + call Mpi_Send(V_coarse(1,1,N_coarse-com_pos+2),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,com_nb(1)), 1, D_comm(dir), ierr) end if if(stencil_d>0) then allocate(V_end(size(V_coarse,1),size(V_coarse,2),stencil_d)) - ! Initiate non blocking receive - call Mpi_Irecv(V_end(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, & - & neighbors(dir,1), 2, D_comm(dir), rece_request(2), ierr) + com_nb(2) = ceiling(real(stencil_d)/N_coarse) ! number of required communication to get ghost + allocate(end_request(com_nb(2))) + com_pos = 1 ! Reception from next processus is done in position 1 + com_size(2) = com_size(1)*N_coarse + ! Except for last communication, send all local coarse data. + ! Note that it happen if local coarse grid containt less than (stencil_size/2) + ! points along current direction (ie if coarse grid is very coarse) + do ind = 1, com_nb(2)-1 + ! Communication + call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,ind), 200+ind, D_comm(dir), end_request(ind), ierr) + call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,-ind), 200+ind, D_comm(dir), ierr) + ! next com_pos = (ind*N_coarse)+1 = com_pos + N_coarse + com_pos = com_pos + N_coarse + end do + ! Last step + ! Note that: missing ghost lines = stencil_d - (com_nb-1)*N_coarse + com_size(2) = com_size(1)*(stencil_d-((com_nb(2)-1)*N_coarse)) + ! Perform communication + call Mpi_Irecv(V_end(1,1,com_pos),com_size(2),MPI_DOUBLE_PRECISION, & + & neighbors(dir,com_nb(2)), 2, D_comm(dir), end_request(com_nb(2)), ierr) ! Send data call Mpi_Send(V_coarse(1,1,1),com_size(2),MPI_DOUBLE_PRECISION, & - & neighbors(dir,-1), 2, D_comm(dir), ierr) - else + & neighbors(dir,-com_nb(2)), 2, D_comm(dir), ierr) end if ! ==== Interpolation ==== @@ -582,12 +690,22 @@ subroutine Inter_LastDir_Permut_com(dir, V_coarse, dx_c, V_fine, dx_f) V_fine(:,ind,:) = V_fine(:,ind,:) + weight(i+1)*V_coarse(:,:,V_ind+i) end do end do + + ! -- Wait for communication completion before dealing with the end -- + if(stencil_g>0) then + call mpi_waitall(com_nb(1),beg_request, MPI_STATUSES_IGNORE, ierr) + deallocate(beg_request) + end if + if(stencil_d>0) then + call mpi_waitall(com_nb(2),end_request, MPI_STATUSES_IGNORE, ierr) + deallocate(end_request) + end if + ! -- For begining -- - if(stencil_g>0) call mpi_wait(rece_request(1), status, ierr) ! Use that interpolation formula are exact V_fine(:,1,:) = V_coarse(:,:,1) ! For other first points - do ind = 2, ind_min-1 + do ind = 2, min(ind_min-1, N_fine) ! Be carful, in some massively parrallel context, ind_min could bigger than N_fine +1 pos = (ind-1)*(dx_f/dx_c) V_ind = floor(pos)+1 call get_weight(pos-V_ind+1, weight) @@ -601,11 +719,10 @@ subroutine Inter_LastDir_Permut_com(dir, V_coarse, dx_c, V_fine, dx_f) V_fine(:,ind,:) = V_fine(:,ind,:) + weight(i_bis)*V_coarse(:,:,i_bis-ind_limit) end do do i_bis = ind_limit_2+1, stencil_size - V_fine(:,ind,:) = V_fine(:,ind,:) + weight(i_bis)*V_end(:,:,i_bis-ind_limit-N_coarse) + V_fine(:,ind,:) = V_fine(:,ind,:) + weight(i_bis)*V_end(:,:,i_bis-ind_limit_2) end do end do ! -- For point of at the end of a line along the current direction -- - if(stencil_d>0) call mpi_wait(rece_request(2), status, ierr) ! If ind_max<= ind_min (ie if stencil_size>N_coarse), computation are already done for first point do ind = max(ind_max+1,ind_min), N_fine pos = (ind-1)*(dx_f/dx_c) @@ -651,6 +768,8 @@ subroutine Inter_FirstDir_no_com(V_coarse, dx_c, V_fine, dx_f) integer :: i, ind, V_ind ! some loop indices real(WP) :: pos +V_Fine = 0.0_WP + ! ==== Initialisation ==== N_coarse = size(V_coarse,1) N_fine = size(V_fine,1) @@ -861,6 +980,7 @@ subroutine weight_Mprime4(pos,weight) end subroutine weight_Mprime4 +!> Interpolation with M4 kernel. Order 2 everywhere ? subroutine weight_M4(pos,weight) real(WP), intent(in) :: pos @@ -882,6 +1002,7 @@ subroutine weight_M4(pos,weight) end subroutine weight_M4 +!> Interpolation with Lambda(4,4) kernel. Order 4 everywhere. subroutine weight_Lambda4_4(pos,weight) real(WP), intent(in) :: pos @@ -899,5 +1020,18 @@ subroutine weight_Lambda4_4(pos,weight) end subroutine weight_Lambda4_4 +!> Basic interpolation formula. Be careful, this kernel may create unphysical oscillation +!! in low frequency. This is rather implemented to show the requirement of "better" +!! interpolation kernel. +subroutine weight_linear(pos,weight) + + real(WP), intent(in) :: pos + real(WP), dimension(:), intent(out) :: weight + + weight(1) = 1-pos + weight(2) = pos + +end subroutine weight_linear + end module Interpolation_velo !> @}