From ad3d0649d877f3a6c6531ec57051e08f85778186 Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr>
Date: Tue, 6 Mar 2018 17:13:15 +0100
Subject: [PATCH] ghost exchangers

---
 examples/scalar_advection/scalar_advection.py |   4 +
 hysop/constants.py                            |   2 +
 hysop/fields/ghost_exchangers.py              | 474 +++++++++++++-----
 3 files changed, 359 insertions(+), 121 deletions(-)

diff --git a/examples/scalar_advection/scalar_advection.py b/examples/scalar_advection/scalar_advection.py
index e37d36952..c18d3df77 100644
--- a/examples/scalar_advection/scalar_advection.py
+++ b/examples/scalar_advection/scalar_advection.py
@@ -20,6 +20,8 @@ def compute(args):
     def init_velocity(data, coords):
         for i,d in enumerate(data):
             d[...] = args.velocity[::-1][i]
+        # data[0][...] = np.arange(data[0].shape[0])[:,None]
+        # data[1][...] = -np.arange(data[0].shape[0])[:,None]
 
     ## Function to compute initial scalar values
     def init_scalar(data, coords):
@@ -101,6 +103,8 @@ def compute(args):
     dfields = problem.input_discrete_fields
     dfields[velo].initialize(formula=init_velocity)
     dfields[scalar].initialize(formula=init_scalar)
+    # import sys
+    # sys.exit(1)
     
     # Create a simulation and solve the problem 
     # (do not forget to specify the dt parameter here)
diff --git a/hysop/constants.py b/hysop/constants.py
index 3a213b9a7..a40f41443 100644
--- a/hysop/constants.py
+++ b/hysop/constants.py
@@ -102,6 +102,8 @@ def implementation_to_backend(implementation):
 
 GhostOperation = EnumFactory.create('GhostOperation',
         ['EXCHANGE', 'ACCUMULATE'])
+ExchangeMethod = EnumFactory.create('ExchangeMethod',
+        ['ISEND_IRECV', 'NEIGHBOR_ALL_TO_ALL_V', 'NEIGHBOR_ALL_TO_ALL_W'])
 
 Precision = EnumFactory.create('Precision',
     ['DEFAULT', 'SAME', 'QUAD', 'LONG_DOUBLE', 'DOUBLE', 'FLOAT', 'HALF'])
diff --git a/hysop/fields/ghost_exchangers.py b/hysop/fields/ghost_exchangers.py
index cd92299fa..d7ea4fceb 100644
--- a/hysop/fields/ghost_exchangers.py
+++ b/hysop/fields/ghost_exchangers.py
@@ -2,7 +2,7 @@
 from hysop.deps import np, hashlib
 from hysop.tools.types import check_instance, to_tuple, first_not_None
 from hysop.tools.mpi_utils import iter_mpi_requests, dtype_to_mpi_type
-from hysop.constants import GhostOperation, Backend, DirectionLabels
+from hysop.constants import GhostOperation, Backend, DirectionLabels, ExchangeMethod
 from hysop.topology.topology import Topology, TopologyView
 from hysop.topology.cartesian_topology import CartesianTopologyView
 from hysop.core.arrays.all import HostArray, OpenClArray
@@ -16,6 +16,16 @@ class GhostExchanger(object):
         check_instance(topology, TopologyView)
         check_instance(data, tuple, minsize=1)
         check_instance(ghost_op, GhostOperation)
+        
+        dtype = data[0].dtype
+        for d in data[1:]:
+            if (d.dtype != dtype):
+                msg='All data should have the same dtype, got {} and {}.'
+                msg=msg.format(dtype, d.dtype)
+                raise RuntimeError(msg)
+        base_dtype    = dtype
+        base_mpi_type = dtype_to_mpi_type(dtype)
+
         self.topology = topology
         self.data = data
         self.mesh = topology.mesh
@@ -23,6 +33,8 @@ class GhostExchanger(object):
         self.base_tag = int(hashlib.sha1(name).hexdigest(), 16) % (104729)
         self.name = name
         self.ghost_op = ghost_op
+        self.base_dtype = base_dtype
+        self.base_mpi_type = base_mpi_type
 
 class CartesianDiscreteFieldGhostExchanger(GhostExchanger):
 
@@ -68,8 +80,11 @@ class CartesianDiscreteFieldGhostExchanger(GhostExchanger):
 
     def build_python_ghost_exchanger(self):
         ghost_op        = self.ghost_op
+        comm            = self.topology.comm
         local_rank      = self.topology.cart_rank
         neighbour_ranks = self.topology.proc_neighbour_ranks
+        base_dtype      = self.base_dtype
+        base_mpi_type   = self.base_mpi_type
 
         def msg_tag(i, local_rank, target_rank, d, direction):
             tag = self.base_tag
@@ -79,20 +94,29 @@ class CartesianDiscreteFieldGhostExchanger(GhostExchanger):
             tag += (d + 1) * 97
             tag += (direction+1)*17
             return tag
-        
+
         local_exchange  = []
-        global_send     = []
-        global_recv     = []
         target_buffers  = []
+        
+        # Call kwds depends on exchange method used (Send-Recv, Window', 'Neighbor_AlltoAll')
+        exchange_method    = ExchangeMethod.NEIGHBOR_ALL_TO_ALL_V
+        isend_kwds        = []    # one call per direction, per dimension, per data buffer
+        irecv_kwds        = []    # one call per direction, per dimension, per data buffer
+        all_to_all_v_kwds = None  # only one call
+        all_to_all_w_kwds = []    # one cell per data buffer
+        
+        has_mpi_exchange = False
+        all_to_all_v_send_requests = {}
+        all_to_all_v_recv_requests = {}
         for (i,buf) in enumerate(self.data):
-            dtype = buf.dtype
-            base_mpi_type = dtype_to_mpi_type(dtype)
+            all_to_all_w_send_requests = {}
+            all_to_all_w_recv_requests = {}
             for d in self.directions:
                 if self.ghosts[d]==0:
                     continue
                 inner_left, inner_right, _ = self.inner_ghosts[d]
                 outer_left, outer_right, _ = self.outer_ghosts[d]
-
+                has_mpi_exchange |= (self.nprocs[d] > 1)
                 if (self.nprocs[d] == 1):
                     local_exchange.append((buf,outer_right,inner_left))
                     local_exchange.append((buf,outer_left,inner_right))
@@ -101,133 +125,334 @@ class CartesianDiscreteFieldGhostExchanger(GhostExchanger):
                     #   INNER GHOSTS ---> OUTER GHOSTS
                     # OPERATION IS
                     #   OUTER_GHOSTS[...] = INNER_GHOSTS
-                    
-                    # Exchanges with left neighour
-                    left_rank = neighbour_ranks[0,d]
-                    assert (left_rank != local_rank) and (left_rank != -1)
-                    
-                    # send inner left to left rank
-                    sendtag  = msg_tag(i, local_rank, left_rank, d, 0)
-                    mpi_type = TopoTools.create_subarray(inner_left, buf.shape,
-                                                             mpi_type=base_mpi_type)
-                    send_kwds = {'buf':[buf.handle, mpi_type], 
-                                 'dest':left_rank, 
-                                 'tag':sendtag}
-                    global_send.append(send_kwds)
-                    
-                    # receive outer right from left rank
-                    recvtag = msg_tag(i, left_rank, local_rank, d, 1)
-                    mpi_type = TopoTools.create_subarray(outer_right, buf.shape,
-                                                                mpi_type=base_mpi_type)
-                    recv_kwds = {'buf':[buf.handle, mpi_type], 
-                                 'source':left_rank, 
-                                 'tag':recvtag}
-                    global_recv.append(recv_kwds)
-                    
-                    # Exchanges with right neighour
-                    right_rank = neighbour_ranks[1,d]
-                    assert (right_rank != local_rank) and (right_rank != -1)
+                    if (exchange_method is ExchangeMethod.NEIGHBOR_ALL_TO_ALL_V):
+                        # Send and receive every buffer at once to neighbours 
+                        # /!\  we send and receive all data components at once
+                        left_rank  = neighbour_ranks[0,d]
+                        right_rank = neighbour_ranks[1,d]
+                        all_to_all_v_send_requests.setdefault(('left',left_rank), [])   \
+                                                  .append(buf[inner_left])
+                        all_to_all_v_send_requests.setdefault(('right',right_rank), []) \
+                                                  .append(buf[inner_right])
+                        all_to_all_v_recv_requests.setdefault(('left',left_rank), [])   \
+                                                  .append(buf[outer_left])
+                        all_to_all_v_recv_requests.setdefault(('right',right_rank), []) \
+                                                  .append(buf[outer_right])
+                    elif (exchange_method is ExchangeMethod.NEIGHBOR_ALL_TO_ALL_W):
+                        # Send and receive to and from every neigbours
+                        # /!\ only one buffer at a time
+                        left_rank  = neighbour_ranks[0,d]
+                        right_rank = neighbour_ranks[1,d]
+                        all_to_all_w_send_requests.setdefault(('left',left_rank), [])   \
+                                                  .append((buf, inner_left))
+                        all_to_all_w_send_requests.setdefault(('right',right_rank), []) \
+                                                  .append((buf, inner_right))
+                        all_to_all_w_recv_requests.setdefault(('left',left_rank), [])   \
+                                                  .append((buf, outer_left))
+                        all_to_all_w_recv_requests.setdefault(('right',right_rank), []) \
+                                                  .append((buf, outer_right))
+                    elif (exchange_method is ExchangeMethod.ISEND_IRECV):
+                        # Exchanges with left neighour
+                        left_rank = neighbour_ranks[0,d]
+                        assert (left_rank != local_rank) and (left_rank != -1)
+                        
+                        # send inner left to left rank
+                        sendtag  = msg_tag(i, local_rank, left_rank, d, 0)
+                        mpi_type = TopoTools.create_subarray(inner_left, buf.shape,
+                                                                 mpi_type=base_mpi_type)
+                        send_kwds = {'buf':[buf.handle, mpi_type], 
+                                     'dest':left_rank, 
+                                     'tag':sendtag}
+                        isend_kwds.append(send_kwds)
+                        
+                        # receive outer right from left rank
+                        recvtag = msg_tag(i, left_rank, local_rank, d, 1)
+                        mpi_type = TopoTools.create_subarray(outer_right, buf.shape,
+                                                                    mpi_type=base_mpi_type)
+                        recv_kwds = {'buf':[buf.handle, mpi_type], 
+                                     'source':left_rank, 
+                                     'tag':recvtag}
+                        irecv_kwds.append(recv_kwds)
+                        
+                        # Exchanges with right neighour
+                        right_rank = neighbour_ranks[1,d]
+                        assert (right_rank != local_rank) and (right_rank != -1)
 
-                    # send inner right to right rank
-                    sendtag = msg_tag(i, local_rank, right_rank, d, 1)
-                    mpi_type = TopoTools.create_subarray(inner_right, buf.shape,
-                                                            mpi_type=base_mpi_type)
-                    send_kwds = {'buf':[buf.handle, mpi_type], 
-                                 'dest':right_rank, 
-                                 'tag':sendtag}
-                    global_send.append(send_kwds)
-                    
-                    # receive outer left from right rank
-                    recvtag = msg_tag(i, right_rank, local_rank, d, 0)
-                    mpi_type = TopoTools.create_subarray(outer_left, buf.shape,
-                                                            mpi_type=base_mpi_type)
-                    recv_kwds = {'buf':[buf.handle, mpi_type], 
-                                 'source':right_rank, 
-                                 'tag':recvtag}
-                    global_recv.append(recv_kwds)
+                        # send inner right to right rank
+                        sendtag = msg_tag(i, local_rank, right_rank, d, 1)
+                        mpi_type = TopoTools.create_subarray(inner_right, buf.shape,
+                                                                mpi_type=base_mpi_type)
+                        send_kwds = {'buf':[buf.handle, mpi_type], 
+                                     'dest':right_rank, 
+                                     'tag':sendtag}
+                        isend_kwds.append(send_kwds)
+                        
+                        # receive outer left from right rank
+                        recvtag = msg_tag(i, right_rank, local_rank, d, 0)
+                        mpi_type = TopoTools.create_subarray(outer_left, buf.shape,
+                                                                mpi_type=base_mpi_type)
+                        recv_kwds = {'buf':[buf.handle, mpi_type], 
+                                     'source':right_rank, 
+                                     'tag':recvtag}
+                        irecv_kwds.append(recv_kwds)
+                    else:
+                        msg='Unknown MPI exchange method {}.'.format(exchange_method)
+                        raise NotImplementedError(msg)
                 elif (ghost_op in (GhostOperation.ACCUMULATE,)):
                     # SEND DIRECTION IS
                     #    OUTER GHOSTS ---> TMP BUFFER
                     # OPERATION IS
                     #    INNER GHOSTS = OP(INNER_GHOSTS, OUTER_GHOSTS)
-                    
-                    # Exchanges with left neighour
-                    left_rank = neighbour_ranks[0,d]
-                    assert (left_rank != local_rank) and (left_rank != -1)
-                    
-                    # send outer left to left rank
-                    sendtag  = msg_tag(i, local_rank, left_rank, d, 0)
-                    mpi_type = TopoTools.create_subarray(outer_left, buf.shape,
-                                                             mpi_type=base_mpi_type)
-                    send_kwds = {'buf':[buf.handle, mpi_type], 
-                                 'dest':left_rank, 
-                                 'tag':sendtag}
-                    global_send.append(send_kwds)
-                    
-                    # receive outer right ghosts data from left rank in a tmp buffer
-                    recvtag = msg_tag(i, left_rank, local_rank, d, 1)
-                    mpi_type = base_mpi_type.Create_contiguous(buf[inner_left].size) 
-                    mpi_type.Commit()
-                    tmp = self.backend.empty_like(buf[inner_left])
-                    recv_kwds = {'buf':[tmp.handle, mpi_type], 
-                                 'source':left_rank, 
-                                 'tag':recvtag}
-                    global_recv.append(recv_kwds)
-                    target_buffers.append((tmp, buf, inner_left))
-                    
-                    # Exchanges with right neighour
-                    right_rank = neighbour_ranks[1,d]
-                    assert (right_rank != local_rank) and (right_rank != -1)
+                    if (exchange_method is ExchangeMethod.NEIGHBOR_ALL_TO_ALL_V):
+                        # Send and receive every buffer at once to neighbours 
+                        # /!\  we send and receive all data components at once
+                        left_rank  = neighbour_ranks[0,d]
+                        right_rank = neighbour_ranks[1,d]
+                        all_to_all_v_send_requests.setdefault(('left',left_rank), [])   \
+                                                  .append(buf[outer_left])
+                        all_to_all_v_send_requests.setdefault(('right',right_rank), []) \
+                                                  .append(buf[outer_right])
+                        all_to_all_v_recv_requests.setdefault(('left',left_rank), [])   \
+                                                  .append(buf[inner_left])
+                        all_to_all_v_recv_requests.setdefault(('right',right_rank), []) \
+                                                  .append(buf[inner_right])
+                    elif (exchange_method is ExchangeMethod.NEIGHBOR_ALL_TO_ALL_W):
+                        # Send and receive to and from every neigbours
+                        # /!\ only one buffer at a time
+                        left_rank  = neighbour_ranks[0,d]
+                        right_rank = neighbour_ranks[1,d]
+                        all_to_all_w_send_requests.setdefault(('left',left_rank), [])   \
+                                                  .append((buf, outer_left))
+                        all_to_all_w_send_requests.setdefault(('right',right_rank), []) \
+                                                  .append((buf, outer_right))
+                        all_to_all_w_recv_requests.setdefault(('left',left_rank), [])   \
+                                                  .append((buf, inner_left))
+                        all_to_all_w_recv_requests.setdefault(('right',right_rank), []) \
+                                                  .append((buf, inner_right))
+                    elif (exchange_method is ExchangeMethod.ISEND_IRECV):
+                        # Exchanges with left neighour
+                        left_rank = neighbour_ranks[0,d]
+                        assert (left_rank != local_rank) and (left_rank != -1)
+                        
+                        # send outer left to left rank
+                        sendtag  = msg_tag(i, local_rank, left_rank, d, 0)
+                        mpi_type = TopoTools.create_subarray(outer_left, buf.shape,
+                                                                 mpi_type=base_mpi_type)
+                        send_kwds = {'buf':[buf.handle, mpi_type], 
+                                     'dest':left_rank, 
+                                     'tag':sendtag}
+                        isend_kwds.append(send_kwds)
+                        
+                        # receive outer right ghosts data from left rank in a tmp buffer
+                        recvtag = msg_tag(i, left_rank, local_rank, d, 1)
+                        mpi_type = base_mpi_type.Create_contiguous(buf[inner_left].size) 
+                        mpi_type.Commit()
+                        tmp = self.backend.empty_like(buf[inner_left])
+                        recv_kwds = {'buf':[tmp.handle, mpi_type], 
+                                     'source':left_rank, 
+                                     'tag':recvtag}
+                        irecv_kwds.append(recv_kwds)
+                        target_buffers.append((tmp, buf, inner_left))
+                        
+                        # Exchanges with right neighour
+                        right_rank = neighbour_ranks[1,d]
+                        assert (right_rank != local_rank) and (right_rank != -1)
 
-                    # send outer right to right rank
-                    sendtag = msg_tag(i, local_rank, right_rank, d, 1)
-                    mpi_type = TopoTools.create_subarray(outer_right, buf.shape,
-                                                            mpi_type=base_mpi_type)
-                    send_kwds = {'buf':[buf.handle, mpi_type], 
-                                 'dest':right_rank, 
-                                 'tag':sendtag}
-                    global_send.append(send_kwds)
-                    
-                    # receive outer left ghosts data from right rank in a tmp buffer
-                    recvtag = msg_tag(i, right_rank, local_rank, d, 0)
-                    mpi_type = base_mpi_type.Create_contiguous(buf[inner_right].size) 
-                    mpi_type.Commit()
-                    tmp = self.backend.empty_like(buf[inner_right])
-                    recv_kwds = {'buf':[tmp.handle, mpi_type], 
-                                 'source':right_rank, 
-                                 'tag':recvtag}
-                    global_recv.append(recv_kwds)
-                    target_buffers.append((tmp, buf, inner_right))
+                        # send outer right to right rank
+                        sendtag = msg_tag(i, local_rank, right_rank, d, 1)
+                        mpi_type = TopoTools.create_subarray(outer_right, buf.shape,
+                                                                mpi_type=base_mpi_type)
+                        send_kwds = {'buf':[buf.handle, mpi_type], 
+                                     'dest':right_rank, 
+                                     'tag':sendtag}
+                        isend_kwds.append(send_kwds)
+                        
+                        # receive outer left ghosts data from right rank in a tmp buffer
+                        recvtag = msg_tag(i, right_rank, local_rank, d, 0)
+                        mpi_type = base_mpi_type.Create_contiguous(buf[inner_right].size) 
+                        mpi_type.Commit()
+                        tmp = self.backend.empty_like(buf[inner_right])
+                        recv_kwds = {'buf':[tmp.handle, mpi_type], 
+                                     'source':right_rank, 
+                                     'tag':recvtag}
+                        irecv_kwds.append(recv_kwds)
+                        target_buffers.append((tmp, buf, inner_right))
+                    else:
+                        msg='Unknown MPI exchange method {}.'.format(exchange_method)
+                        raise NotImplementedError(msg)
                 else:
                     msg='Unknown GhostOperation {}.'.format(ghost_op)
                     raise NotImplementedError(msg)
+
+            # handle all_to_all_w kwds (one call for all neighbours per buffer)
+            if (exchange_method is ExchangeMethod.NEIGHBOR_ALL_TO_ALL_W):
+                sendtypes, recvtypes = (),()   # subarray types
+                sendcounts, recvcounts = (),() # in type counts
+                sdispls, rdispls = (),()       # in bytes
+                for direction in xrange(comm.Get_dim()):
+                    for (tag,rank) in zip(('left','right'), comm.Shift(direction, 1)):
+                        if ((tag,rank) in all_to_all_w_send_requests):
+                            requests = all_to_all_w_send_requests[(tag,rank)]
+                            assert len(requests)==1
+                            (src,slc) = requests[0]
+                            assert src is buf
+                            assert src.dtype == base_dtype
+                            mpi_type = TopoTools.create_subarray(slc, src.shape, 
+                                                                    mpi_type=base_mpi_type)
+                            send_count = 1
+                        else:
+                            mpi_type  = base_mpi_type
+                            send_count = 0
+                        sendtypes  += (mpi_type,)
+                        sendcounts += (send_count,)
+                        sdispls    += (0,)
+                        if ((tag,rank) in all_to_all_w_recv_requests):
+                            requests = all_to_all_w_recv_requests[(tag,rank)]
+                            assert len(requests)==1
+                            (dst,slc) = requests[0]
+                            assert dst is buf
+                            assert dst.dtype == base_dtype
+                            mpi_type = TopoTools.create_subarray(slc, dst.shape, 
+                                                                    mpi_type=base_mpi_type)
+                            recv_count = 1
+                        else:
+                            mpi_type  = base_mpi_type
+                            recv_count = 0
+                        recvtypes  += (mpi_type,)
+                        recvcounts += (recv_count,)
+                        rdispls    += (0,)
+                all_to_all_w_kwds.append( {
+                    'sendbuf': [buf.handle, sendcounts, sdispls, sendtypes],
+                    'recvbuf': [buf.handle, recvcounts, rdispls, recvtypes]
+                } )
         
-        comm = self.topology.comm
+        # handle all_to_all_v kwds (one call for all buffers and all neighbours)
+        all_to_all_v_send_buffers = ()
+        all_to_all_v_recv_buffers = ()
+        all_to_all_v_send_buffer = None
+        all_to_all_v_recv_buffer = None
+        if has_mpi_exchange and (exchange_method is ExchangeMethod.NEIGHBOR_ALL_TO_ALL_V):
+            assert all_to_all_v_send_requests
+            assert all_to_all_v_recv_requests
+            sendtype, recvtype = base_mpi_type, base_mpi_type 
+            sendcounts, recvcounts = (),() # in type counts
+            sdispls, rdispls = (),()       # in type counts
+            send_disp, recv_disp = 0,0     # in type counts
+            send_buffers, recv_buffers = (), ()
+            for direction in xrange(comm.Get_dim()):
+                for (tag,rank) in zip(('left','right'), comm.Shift(direction, 1)):
+                    send_count, recv_count = 0, 0
+                    if ((tag,rank) in all_to_all_v_send_requests):
+                        for src in all_to_all_v_send_requests[(tag,rank)]:
+                            assert src.dtype == base_dtype
+                            slc = slice(send_disp+send_count, send_disp+send_count+src.size)
+                            send_buffers += ((src,slc),)
+                            send_count += src.size
+                    sendcounts += (send_count,)
+                    sdispls    += (send_disp,)
+                    send_disp  += send_count
+                    if ((tag,rank) in all_to_all_v_recv_requests):
+                        for dst in all_to_all_v_recv_requests[(tag,rank)]:
+                            assert dst.dtype == base_dtype
+                            slc = slice(recv_disp+recv_count, recv_disp+recv_count+dst.size)
+                            recv_buffers += ((dst,slc),)
+                            recv_count   += dst.size
+                    recvcounts += (recv_count,)
+                    rdispls    += (recv_disp,)
+                    recv_disp  += recv_count
+            assert send_disp == sum(sendcounts) > 0, send_disp
+            send_size = send_disp
+            send_buffer = self.backend.empty(shape=(send_size,), dtype=base_dtype)
+            assert recv_disp == sum(recvcounts) > 0, recv_disp
+            recv_size = recv_disp
+            recv_buffer = self.backend.empty(shape=(recv_size,), dtype=base_dtype)
+            assert send_buffer.dtype==base_dtype
+            assert recv_buffer.dtype==recv_buffer
+            all_to_all_v_kwds = {
+                'sendbuf': [send_buffer.handle, sendcounts, sdispls, sendtype],
+                'recvbuf': [recv_buffer.handle, recvcounts, rdispls, recvtype]
+            }
+            all_to_all_v_send_buffers = send_buffers
+            all_to_all_v_recv_buffers = recv_buffers
+            all_to_all_v_send_buffer = send_buffer
+            all_to_all_v_recv_buffer = recv_buffer
         
         if ghost_op is GhostOperation.EXCHANGE:
-            def python_launcher():
-                evts = []
-                for recv_kwds in global_recv:
-                    evt = comm.Irecv(**recv_kwds)
-                    evts.append(evt)
-                for send_kwds in global_send:
-                    comm.Isend(**send_kwds)
-                for (buf,outer,inner) in local_exchange:
-                    buf[outer] = buf[inner]
-                MPI.Request.Waitall(evts)
+            if has_mpi_exchange:
+                if exchange_method is ExchangeMethod.NEIGHBOR_ALL_TO_ALL_W:
+                    def python_launcher():
+                        evts = []
+                        for kwds in all_to_all_w_kwds:
+                            evt = comm.Ineighbor_alltoallw(**kwds)
+                            evts.append(evt)
+                        for (buf,outer,inner) in local_exchange:
+                            buf[outer] = buf[inner]
+                        MPI.Request.Waitall(evts)
+                elif exchange_method is ExchangeMethod.NEIGHBOR_ALL_TO_ALL_V:
+                    def python_launcher():
+                        if all_to_all_v_kwds:
+                            for (src,slc) in all_to_all_v_send_buffers:
+                                all_to_all_v_send_buffer[slc] = src.ravel()
+                            evt = comm.Ineighbor_alltoallv(**all_to_all_v_kwds)
+                        for (buf,outer,inner) in local_exchange:
+                            buf[outer] = buf[inner]
+                        if all_to_all_v_kwds:
+                            evt.wait()
+                            for (dst,slc) in all_to_all_v_recv_buffers:
+                                dst[...] = all_to_all_v_recv_buffer[slc].reshape(dst.shape)
+                elif exchange_method is ExchangeMethod.ISEND_IRECV:
+                    def python_launcher():
+                        evts = []
+                        for recv_kwds in irecv_kwds:
+                            evt = comm.Irecv(**recv_kwds)
+                            evts.append(evt)
+                        for send_kwds in isend_kwds:
+                            comm.Isend(**send_kwds)
+                        for (buf,outer,inner) in local_exchange:
+                            buf[outer] = buf[inner]
+                        MPI.Request.Waitall(evts)
+                else:
+                    msg='Unknown MPI exchange method {}.'.format(exchange_method)
+                    raise NotImplementedError(msg)
+            else:
+                def python_launcher():
+                    for (buf,outer,inner) in local_exchange:
+                        buf[outer] = buf[inner]
         elif ghost_op is GhostOperation.ACCUMULATE:
-            def python_launcher():
-                recv_evts = []
-                for recv_kwds in global_recv:
-                    evt = comm.Irecv(**recv_kwds)
-                    recv_evts.append(evt)
-                for send_kwds in global_send:
-                    comm.Isend(**send_kwds)
-                for (buf,outer,inner) in local_exchange:
-                    buf[inner] += buf[outer]
-                for idx in iter_mpi_requests(recv_evts):
-                    (tmp, buf, inner) = target_buffers[idx]
-                    buf[inner] += tmp
+            if has_mpi_exchange:
+                if exchange_method is ExchangeMethod.NEIGHBOR_ALL_TO_ALL_W:
+                    raise NotImplementedError()
+                elif exchange_method is ExchangeMethod.NEIGHBOR_ALL_TO_ALL_V:
+                    assert all_to_all_v_kwds:
+                    def python_launcher():
+                        for (src,slc) in all_to_all_v_send_buffers:
+                            all_to_all_v_send_buffer[slc] = src.ravel()
+                        evt = comm.Ineighbor_alltoallv(**all_to_all_v_kwds)
+                        for (buf,outer,inner) in local_exchange:
+                            buf[inner] += buf[outer]
+                        evt.wait()
+                        for (dst,slc) in all_to_all_v_recv_buffers:
+                            dst[...] += all_to_all_v_recv_buffer[slc].reshape(dst.shape)
+                elif exchange_method is ExchangeMethod.ISEND_IRECV:
+                    def python_launcher():
+                        recv_evts = []
+                        for recv_kwds in irecv_kwds:
+                            evt = comm.Irecv(**recv_kwds)
+                            recv_evts.append(evt)
+                        for send_kwds in isend_kwds:
+                            comm.Isend(**send_kwds)
+                        for (buf,outer,inner) in local_exchange:
+                            buf[inner] += buf[outer]
+                        for idx in iter_mpi_requests(recv_evts):
+                            (tmp, buf, inner) = target_buffers[idx]
+                            buf[inner] += tmp
+                else:
+                    msg='Unknown MPI exchange method {}.'.format(exchange_method)
+                    raise NotImplementedError(msg)
+            else:
+                def python_launcher():
+                    for (buf,outer,inner) in local_exchange:
+                        buf[inner] += buf[outer]
         else:
             msg='Unknown GhostOperation {}.'.format(ghost_op)
             raise NotImplementedError(msg)
@@ -237,9 +462,15 @@ class CartesianDiscreteFieldGhostExchanger(GhostExchanger):
 
     def exchange_ghosts_on_host_backend(self):
         """Exchange ghosts on specified host data."""
+        # print 'EXCHANGE GHOSTS'
+        # print self.ghost_op, self.directions, self.ghosts
+        # print 'BEFORE EXCHANGE'
+        # print self.data[0]
         if (self._python_launcher is None):
             self.build_python_ghost_exchanger()
-        return self._python_launcher()
+        self._python_launcher()
+        # print 'AFTER EXCHANGE'
+        # print self.data[0]
 
     def build_opencl_kernel_launcher(self):
         from hysop.backend.device.opencl.opencl_kernel_launcher import OpenClKernelListLauncher
@@ -289,3 +520,4 @@ class CartesianDiscreteFieldGhostExchanger(GhostExchanger):
             self.build_opencl_kernel_launcher()
         return self._cl_launcher(queue=queue)
 
+
-- 
GitLab