From 1aeb648a06e77c30089df423150dfb969cbead05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franck=20P=C3=A9rignon?= <franck.perignon@imag.fr>
Date: Sat, 21 May 2016 17:19:26 +0200
Subject: [PATCH] -Review topo creation - fix tests + pep8 things + docstrings

---
 CMakeLists.txt                                |   6 +
 hysop/domain/domain.py                        |  30 +-
 hysop/gpu/gpu_diffusion.py                    |   6 +-
 hysop/gpu/gpu_discrete.py                     |   2 +-
 hysop/gpu/gpu_multiphase_baroclinic_rhs.py    |   6 +-
 hysop/gpu/gpu_particle_advection.py           |  74 +++--
 hysop/gpu/multi_gpu_particle_advection.py     |  36 +--
 .../tests/test_gpu_multiresolution_filter.py  |   3 +-
 hysop/gpu/tools.py                            | 277 +++++++++++-------
 hysop/mpi/__init__.py                         |  35 +--
 hysop/mpi/main_var.py                         |  16 -
 hysop/mpi/tests/test_bridge.py                |   5 +-
 hysop/mpi/tests/utils.py                      |  92 +++++-
 hysop/mpi/topology.py                         | 110 +++++--
 hysop/numerics/remeshing.py                   |   2 +-
 hysop/operator/computational.py               |   2 +-
 hysop/operator/curlAndDiffusion.py            |   2 +-
 .../operator/tests/test_adaptive_time_step.py |   3 +-
 .../tests/test_multiresolution_filter.py      |   3 +-
 hysop/operator/tests/test_redistribute.py     |  17 +-
 hysop/problem/problem_tasks.py                |   7 +-
 hysop/tools/parameters.py                     |   2 +-
 hysop/tools/problem2dot.py                    |   2 +-
 hysop/tools/profiler.py                       |   2 +-
 24 files changed, 465 insertions(+), 275 deletions(-)
 delete mode 100644 hysop/mpi/main_var.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9e85aa59a..3b5fb6a5d 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -348,6 +348,12 @@ if(EXISTS ${CMAKE_SOURCE_DIR}/hysop/__init__.py.in)
   configure_file(hysop/__init__.py.in ${CMAKE_SOURCE_DIR}/hysop/__init__.py)
 endif()
 
+if(EXISTS ${CMAKE_SOURCE_DIR}/hysop/mpi/__init__.py.in)
+  message(STATUS "Generate mpi/__init__.py file ...")
+  file(REMOVE ${CMAKE_SOURCE_DIR}/hysop/mpi/__init__.py)
+  configure_file(hysop/mpi/__init__.py.in ${CMAKE_SOURCE_DIR}/hysop/mpi/__init__.py)
+endif()
+
 # Hysop C++ library is generated in setup.py by swig
 # --- C++ main and tests  ---
 if(WITH_LIB_CXX)
diff --git a/hysop/domain/domain.py b/hysop/domain/domain.py
index 6868c724d..7ba343ab6 100644
--- a/hysop/domain/domain.py
+++ b/hysop/domain/domain.py
@@ -46,13 +46,11 @@ class Domain(object):
 
         Notes
         -----
-        About boundary conditions
-        ==========================
+        * About boundary conditions:
         At the time, only periodic boundary conditions are implemented.
         Do not use this parameter (i.e. let default values)
 
-        About MPI Tasks
-        ================
+        *About MPI Tasks
         proc_tasks[n] = 12 means that task 12 owns proc n.
 
         """
@@ -134,7 +132,8 @@ class Domain(object):
         return self._tasks_on_proc[main_rank]
 
     def create_topology(self, discretization, dim=None, mpi_params=None,
-                        shape=None, cutdir=None):
+                        shape=None, cutdir=None, isperiodic=None,
+                        cartesian_topology=None):
         """Create or return an existing :class:`~hysop.mpi.topology.Cartesian`.
 
         Parameters
@@ -152,6 +151,12 @@ class Domain(object):
         cutdir : list or array of bool
             set which directions must (may) be distributed,
             cutdir[dir] = True to distribute data along dir.
+        isperiodic : list or array of bool, optional
+            mpi grid periodicity
+        cartesian_topology : MPI.Cartcomm, optional
+            a predefined mpi cartesian topology. Use this
+            when you need the same communicator for two
+            different meshes/space distribution of data.
 
         Returns
         -------
@@ -161,6 +166,18 @@ class Domain(object):
             :class:`~hysop.mpi.topology.Cartesian`)
             or it creates a new topology and register it in the topology dict.
 
+        Notes:
+        ------
+        * Almost all parameters above are optional.
+          Only one must be chosen among dim, cutdir and shape.
+          See :ref:`topologies` in the Hysop User Guide for details.
+        * when cartesian_topology is given, dim, shape and cutdir parameters,
+          if set, are not used to build the mpi topology, but compared with
+          cartesian_topology parameters. If they do not fit, error may occur.
+        * See hysop.domain.domain.Domain.create_plane_topology_from_mesh
+          details to build a plane topology from a given local discretization
+          (e.g. from fftw or scales precomputation).
+
         """
         # set task number
         tid = self.current_task()
@@ -172,7 +189,8 @@ class Domain(object):
             assert mpi_params.task_id == tid, msg
         new_topo = Cartesian(self, discretization, dim=dim,
                              mpi_params=mpi_params, shape=shape,
-                             cutdir=cutdir)
+                             cutdir=cutdir, isperiodic=isperiodic,
+                             cartesian_topology=cartesian_topology)
         newid = new_topo.get_id()
         return self.topologies[newid]
 
diff --git a/hysop/gpu/gpu_diffusion.py b/hysop/gpu/gpu_diffusion.py
index 7496ea541..0f2084b76 100644
--- a/hysop/gpu/gpu_diffusion.py
+++ b/hysop/gpu/gpu_diffusion.py
@@ -13,7 +13,7 @@ from hysop.gpu.gpu_operator import GPUOperator
 from hysop.gpu.gpu_kernel import KernelLauncher
 from hysop.gpu.gpu_discrete import GPUDiscreteField
 from hysop.tools.profiler import FProfiler
-from hysop.mpi.main_var import MPI
+from hysop.mpi import Wtime
 
 
 class GPUDiffusion(DiscreteOperator, GPUOperator):
@@ -185,7 +185,7 @@ class GPUDiffusion(DiscreteOperator, GPUOperator):
     def _compute_diffusion_comm(self, simulation):
         assert self.field_tmp is not None
         # Compute OpenCL transfer parameters
-        tc = MPI.Wtime()
+        tc = Wtime()
         topo = self.field.topology
         first_cut_dir = topo.cutdir.tolist().index(True)
         wait_evt = []
@@ -249,7 +249,7 @@ class GPUDiffusion(DiscreteOperator, GPUOperator):
                                             self._to_recv_buf[d],
                                             self._to_recv[d],
                                             is_blocking=False))
-        self.profiler['comm_diffusion'] += MPI.Wtime() - tc
+        self.profiler['comm_diffusion'] += Wtime() - tc
 
         if len(self._cutdir_list) == 1:
             d_evt = self.num_diffusion(
diff --git a/hysop/gpu/gpu_discrete.py b/hysop/gpu/gpu_discrete.py
index 0dd5a8596..d51a5fa23 100644
--- a/hysop/gpu/gpu_discrete.py
+++ b/hysop/gpu/gpu_discrete.py
@@ -98,7 +98,7 @@ class GPUDiscreteField(DiscreteField):
         # By default, all mpi process are take, otherwise, user create and
         # gives his own topologies.
         if topology is None:
-            from hysop.mpi.main_var import main_rank
+            from hysop.mpi import main_rank
             self._rank = main_rank
         else:
             self._rank = topology.rank
diff --git a/hysop/gpu/gpu_multiphase_baroclinic_rhs.py b/hysop/gpu/gpu_multiphase_baroclinic_rhs.py
index b14706fbb..25500f61d 100644
--- a/hysop/gpu/gpu_multiphase_baroclinic_rhs.py
+++ b/hysop/gpu/gpu_multiphase_baroclinic_rhs.py
@@ -13,7 +13,7 @@ from hysop.gpu.gpu_operator import GPUOperator
 from hysop.gpu.gpu_kernel import KernelListLauncher
 from hysop.gpu.gpu_discrete import GPUDiscreteField
 from hysop.tools.profiler import FProfiler
-from hysop.mpi.main_var import MPI
+from hysop.mpi import Wtime
 from hysop.methods_keys import SpaceDiscretisation
 from hysop.numerics.finite_differences import FD_C_4
 
@@ -275,7 +275,7 @@ class BaroclinicRHS(DiscreteOperator, GPUOperator):
 
     def _compute_baroclinic_rhs_comm(self, simulation):
         """Compute operator with communications"""
-        tc = MPI.Wtime()
+        tc = Wtime()
         topo = self.rho.topology
         first_cut_dir = topo.cutdir.tolist().index(True)
         wait_evt = []
@@ -339,7 +339,7 @@ class BaroclinicRHS(DiscreteOperator, GPUOperator):
                                             self._to_recv_buf[d],
                                             self._to_recv[d],
                                             is_blocking=False))
-        self.profiler['comm_baroclinic_rhs_comm'] += MPI.Wtime() - tc
+        self.profiler['comm_baroclinic_rhs_comm'] += Wtime() - tc
         self.rhs.events.append(self._call_kernel(wait_evt))
 
     def _call_kernel_one_ghost(self, wait_evt):
diff --git a/hysop/gpu/gpu_particle_advection.py b/hysop/gpu/gpu_particle_advection.py
index 9a09a6767..3b065ee71 100644
--- a/hysop/gpu/gpu_particle_advection.py
+++ b/hysop/gpu/gpu_particle_advection.py
@@ -1,11 +1,8 @@
+"""Discrete advection for GPU
 """
-@file gpu_particle_advection.py
-
-Discrete advection representation
-"""
-from abc import ABCMeta, abstractmethod
+from abc import abstractmethod
 from hysop import __VERBOSE__
-from hysop.constants import np, debug, S_DIR
+from hysop.constants import np, debug, S_DIR, HYSOP_REAL
 from hysop.methods_keys import TimeIntegrator, Remesh, \
     Support, Splitting, MultiScale
 from hysop.numerics.integrators.euler import Euler
@@ -22,37 +19,34 @@ from hysop.numerics.update_ghosts import UpdateGhostsFull
 
 
 class GPUParticleAdvection(ParticleAdvection, GPUOperator):
-    """
-    Particle advection operator representation on GPU.
+    """Particle advection solver on GPU
 
     """
     __metaclass__ = ABCMeta
 
     @debug
     def __init__(self, **kwds):
-        """
-        Create a Advection operator.
-        Work on a given field (scalar or vector) at a given velocity to compute
-        advected values.
+        """Particular advection of field(s) in a given direction,
+        on GPU.
+
         OpenCL kernels are build once per dimension in order to handle
         directional splitting with resolution non uniform in directions.
 
-        @param velocity : Velocity field
-        @param fields_on_grid : Advected fields
-        @param d : Direction to advect
-        @param device_type : OpenCL device type (default = 'gpu').
-        @param method : the method to use. {'m4prime', 'm6prime', 'm8prime',
-        'l6star'}
-        @param user_src : User OpenCL sources.
-        @param splittingConfig : Directional splitting configuration
-        (hysop.numerics.splitting.Splitting.__init__)
+        Parameters
+        ----------
+        kwds : base classes parameters
+
+        Note
+        ----
+        * warning : this operator is derived from ParticleAdvection AND
+        GPUOperator. kwds must then handle argument of both classes.
         """
         # Set default method if unknown
         if 'method' not in kwds:
             kwds['method'] = default.ADVECTION
             kwds['method'][Support] = 'gpu_2k'
 
-        # init base class
+        # init base class (i.e. ParticleAdvection)
         super(GPUParticleAdvection, self).__init__(**kwds)
         self.fields_topo = self.fields_on_grid[0].topology
         self.velocity_topo = self.velocity.topology
@@ -67,6 +61,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
         # [ParticleAdvection, GPUOperator], i.e. ParticleAdvection.__init__)
         # the GPUOperator.__init__ must be explicitely called.
         # see http://stackoverflow.com/questions/3277367/how-does-pythons-super-work-with-multiple-inheritance
+        # init second base class (i.e GPUOperator)
         GPUOperator.__init__(
             self,
             platform_id=get_extra_args_from_method(self, 'platform_id', None),
@@ -191,11 +186,11 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
         if self.direction == 0:
             self._buffer_initialisations()
 
-        ## List of executions
+        # List of executions
         self.exec_list = None
         self._build_exec_list()
 
-        ## Particle initialisation OpenCL events for each field:
+        # Particle initialisation OpenCL events for each field:
         self._init_events = {self.fields_on_grid[0]: []}
 
     @abstractmethod
@@ -208,6 +203,8 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
         pass
 
     def _build_exec_list(self):
+        """Prepare GPU kernels sequence
+        """
         # Build execution list regarding splitting:
         # Splitting Strang 2nd order:
         #   3D: X(dt/2), Y(dt/2), Z(dt), Y(dt/2), X(dt/2)
@@ -253,7 +250,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
                 #[self._init_copy, self._init_copy_r],  # X(dt)
                 ]
         else:
-            raise ValueError('Not yet implemeted Splitting on GPU : ' +
+            raise ValueError('Splitting type not yet implemeted on GPU: ' +
                              self.method[Splitting])
 
     def globalMemoryUsagePreview(self, v_shape, shape):
@@ -269,17 +266,16 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
         pass
 
     def _buffer_allocations(self):
+        """Allocate OpenCL buffers for velocity and advected field.
         """
-        Allocate OpenCL buffers for velocity and advected field.
-        """
-        ## Velocity.
+        # Velocity.
         alloc = not isinstance(self.velocity, GPUDiscreteField)
         GPUDiscreteField.fromField(self.cl_env, self.velocity,
                                    self.gpu_precision, simple_layout=False)
         if alloc:
             self.size_global_alloc += self.velocity.mem_size
 
-        ## Transported field.
+        # Transported field.
         alloc = not isinstance(self.fields_on_grid[0], GPUDiscreteField)
         GPUDiscreteField.fromField(self.cl_env,
                                    self.fields_on_grid[0],
@@ -288,7 +284,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
         if alloc:
             self.size_global_alloc += self.fields_on_grid[0].mem_size
 
-        ## Fields on particles
+        # Fields on particles
         self.fields_on_part = {}
         start = 0
         for f in self.fields_on_grid:
@@ -324,10 +320,10 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
                             print gpudf.name, '-> OpenCL Kernel', k_name
                         if gpudf == self.velocity:
                             workItemNumber, gwi, lwi = \
-                                self.cl_env.get_WorkItems(self.v_resol_dir)
+                                self.cl_env.get_work_items(self.v_resol_dir)
                         else:
                             workItemNumber, gwi, lwi = \
-                                self.cl_env.get_WorkItems(self.resol_dir)
+                                self.cl_env.get_work_items(self.resol_dir)
                         gpudf.setInitializationKernel(KernelLauncher(
                             cl.Kernel(self.prg, k_name), self.cl_env.queue,
                             gwi, lwi))
@@ -476,8 +472,8 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
         """
         if usr_src is not None:
             build_options = self.build_options + self._size_constants
-            workItemNb, gwi, lwi = self.cl_env.get_WorkItems(self.resol_dir)
-            v_workItemNb, gwi, lwi = self.cl_env.get_WorkItems(self.v_resol_dir)
+            workItemNb, gwi, lwi = self.cl_env.get_work_items(self.resol_dir)
+            v_workItemNb, gwi, lwi = self.cl_env.get_work_items(self.v_resol_dir)
             build_options += " -D WI_NB=" + str(workItemNb)
             build_options += " -D V_WI_NB=" + str(v_workItemNb)
             self.prg = self.cl_env.build_src(usr_src, build_options, 1)
@@ -500,10 +496,10 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
         build_options += " -D WI_NB=" + str(WINb)
         build_options += " -D PART_NB_PER_WI="
         build_options += str(self.resol_dir[0] / WINb)
-        ## Build code
+        # Build code
         src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower())
                for s in src]
-        ## Euler integrator
+        # Euler integrator
         if self.method[TimeIntegrator] is Euler:
             if not self._isMultiScale:
                 src = [s for s in src if s.find(Euler.__name__.lower()) < 0]
@@ -539,10 +535,10 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
         # Build code
         src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower())
                for s in src]
-        ## Adding remeshing weights for the multiscale advection
+        # Adding remeshing weights for the multiscale advection
         if self._isMultiScale:
             src.insert(1, self._kernel_cfg['remesh'][0][1])
-        ## Euler integrator
+        # Euler integrator
         if self.method[TimeIntegrator] is Euler:
             if not self._isMultiScale:
                 src = [s for s in src if s.find(Euler.__name__.lower()) < 0]
@@ -569,7 +565,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator):
         build_options += " -D WI_NB=" + str(WINb)
         build_options += " -D PART_NB_PER_WI="
         build_options += str(self.resol_dir[0] / WINb)
-        ## Build code
+        # Build code
         prg = self.cl_env.build_src(
             src, build_options, vec,
             nb_remesh_components=self.fields_on_grid[0].nb_components)
diff --git a/hysop/gpu/multi_gpu_particle_advection.py b/hysop/gpu/multi_gpu_particle_advection.py
index 6febad7ff..41023d6b1 100644
--- a/hysop/gpu/multi_gpu_particle_advection.py
+++ b/hysop/gpu/multi_gpu_particle_advection.py
@@ -14,7 +14,7 @@ from hysop.numerics.remeshing import Linear as Linear_rmsh
 from hysop.gpu.gpu_kernel import KernelLauncher
 from hysop.tools.profiler import FProfiler, profile
 from hysop.gpu import cl, CL_PROFILE
-from hysop.mpi.main_var import MPI
+from hysop.mpi import Wtime
 import hysop.tools.numpywrappers as npw
 
 
@@ -213,7 +213,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
         ## Python remeshing formula for the multiscale interpolation
         self._py_ms_formula = self.method[MultiScale]
         if self._isMultiScale:
-            if not self._py_ms_formula is Linear_rmsh:
+            if self._py_ms_formula is not Linear_rmsh:
                 raise ValueError('Not yet implemented' +
                                  str(self.method[MultiScale]))
         ## Python remeshing formula
@@ -608,7 +608,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
         _evt_r_v.wait()
 
     def _exchange_velocity_buffers(self, dt):
-        ctime = MPI.Wtime()
+        ctime = Wtime()
         ghosts = self.velocity_topo.ghosts()[self.direction]
         if self.direction == 0:
             max_velo_p = np.max(self.velocity.data[0][-ghosts - 1:, :, :])
@@ -621,9 +621,9 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
             max_velo_m = np.max(self.velocity.data[2][:, :, :ghosts + 1])
         self._recompute_scal_buffers(max_velo_m, max_velo_p, dt)
         self._get_velocity_buffers(ghosts)
-        self.profiler['comm_cpu_advec_get'] += MPI.Wtime() - ctime
+        self.profiler['comm_cpu_advec_get'] += Wtime() - ctime
 
-        ctime = MPI.Wtime()
+        ctime = Wtime()
         for b in xrange(self._v_n_blocks):
             self._l_recv_v[b] = self._comm.Irecv(
                 [self._v_l_buff_flat[self._v_block_slice[b]],
@@ -646,7 +646,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
             for b in xrange(self._v_n_blocks):
                 self._l_recv_v[b].Wait()
                 self._r_recv_v[b].Wait()
-        self.profiler['comm_cpu_advec'] += MPI.Wtime() - ctime
+        self.profiler['comm_cpu_advec'] += Wtime() - ctime
 
     def _todevice_velocity_buffers(self):
         for b in xrange(self._v_n_blocks):
@@ -870,14 +870,14 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
                 wait_for=[evt_comm_l])
 
         # Send the left buffer
-        ctime = MPI.Wtime()
+        ctime = Wtime()
         for b in xrange(self._s_n_blocks_to_l):
             self._evt_get_l[b].wait()
             self._l_send[b] = self._comm.Issend(
                 [self._s_l_buff[self._s_block_slice_to_l[b]],
                  self._s_elem_block_to_l, HYSOP_MPI_REAL],
                 dest=self._L_rk, tag=333 + self._comm_rank + 17 * b)
-        ctime_send_l = MPI.Wtime() - ctime
+        ctime_send_l = Wtime() - ctime
 
         # Fill and get the right buffer
         evt_comm_r = self._num_comm_r(wait_evts, dt)
@@ -895,14 +895,14 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
                 is_blocking=False,
                 wait_for=[evt_comm_r])
         # Send the right buffer
-        ctime = MPI.Wtime()
+        ctime = Wtime()
         for b in xrange(self._s_n_blocks_to_r):
             self._evt_get_r[b].wait()
             self._r_send[b] = self._comm.Issend(
                 [self._s_r_buff[self._s_block_slice_to_r[b]],
                  self._s_elem_block_to_r, HYSOP_MPI_REAL],
                 dest=self._R_rk, tag=888 + self._comm_rank + 19 * b)
-        ctime_send_r = MPI.Wtime() - ctime
+        ctime_send_r = Wtime() - ctime
 
         # remesh in-domain particles and get left-right layer
         evt = self._num_comm(wait_evts, dt)
@@ -929,7 +929,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
             is_blocking=False,
             wait_for=[evt])
 
-        ctime = MPI.Wtime()
+        ctime = Wtime()
         # Wait MPI transfer of data from left, add them to local
         # data and send back to device
         for b in xrange(self._s_n_blocks_to_r):
@@ -937,11 +937,11 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
         for b in xrange(self._s_n_blocks_from_l):
             self._l_recv[b].Wait()
         evt_get_locl.wait()
-        ctime_wait_l = MPI.Wtime() - ctime
+        ctime_wait_l = Wtime() - ctime
 
-        calctime = MPI.Wtime()
+        calctime = Wtime()
         self._s_locl_buff += self._s_froml_buff
-        self.profiler['comm_calc_remesh'] += MPI.Wtime() - calctime
+        self.profiler['comm_calc_remesh'] += Wtime() - calctime
         evt_set_locl = cl.enqueue_copy(
             self.cl_env.queue,
             self.fields_on_grid[0].gpu_data[0],
@@ -955,16 +955,16 @@ class MultiGPUParticleAdvection(GPUParticleAdvection):
 
         # Wait MPI transfer of data from right, add them to local
         # data and send back to device
-        ctime = MPI.Wtime()
+        ctime = Wtime()
         for b in xrange(self._s_n_blocks_to_l):
             self._l_send[b].Wait()
         for b in xrange(self._s_n_blocks_from_r):
             self._r_recv[b].Wait()
         evt_get_locr.wait()
-        ctime_wait_r = MPI.Wtime() - ctime
-        calctime = MPI.Wtime()
+        ctime_wait_r = Wtime() - ctime
+        calctime = Wtime()
         self._s_locr_buff += self._s_fromr_buff
-        self.profiler['comm_calc_remesh'] += MPI.Wtime() - calctime
+        self.profiler['comm_calc_remesh'] += Wtime() - calctime
         evt_set_locr = cl.enqueue_copy(
             self.cl_env.queue,
             self.fields_on_grid[0].gpu_data[0],
diff --git a/hysop/gpu/tests/test_gpu_multiresolution_filter.py b/hysop/gpu/tests/test_gpu_multiresolution_filter.py
index cf1ffe130..814d2c85b 100755
--- a/hysop/gpu/tests/test_gpu_multiresolution_filter.py
+++ b/hysop/gpu/tests/test_gpu_multiresolution_filter.py
@@ -8,7 +8,7 @@ import numpy as np
 from hysop.methods_keys import Remesh, Support, ExtraArgs
 from hysop.methods import Rmsh_Linear, L2_1
 # In parallel we need to use as many threads as gpu
-from hysop.mpi.main_var import main_size, main_rank
+from hysop.mpi import main_size, main_rank
 import pyopencl as cl
 n_gpu = len(cl.get_platforms()[0].get_devices(
     device_type=cl.device_type.GPU))
@@ -16,7 +16,6 @@ PROC_TASKS = [0, ] * main_size
 if main_rank < n_gpu:
     PROC_TASKS[main_rank] = 1
 
-
 L = [1., 1., 1.]
 O = [0., 0., 0.]
 simu = Simulation(start=0., end=0.1, nb_iter=1)
diff --git a/hysop/gpu/tools.py b/hysop/gpu/tools.py
index da279766e..d0ac14247 100644
--- a/hysop/gpu/tools.py
+++ b/hysop/gpu/tools.py
@@ -1,17 +1,15 @@
-"""
-@file tools.py
+"""Misc. tools for gpu/OpenCL management.
 
-Tools for gpu management.
 """
 from hysop import __VERBOSE__, __DEFAULT_PLATFORM_ID__, __DEFAULT_DEVICE_ID__
-from hysop.constants import np, HYSOP_REAL, ORDER
-from hysop.gpu import cl, clTools, GPU_SRC, CL_PROFILE
-import hysop.tools.numpywrappers as npw
+from hysop.constants import np, HYSOP_REAL
+from hysop.gpu import cl, GPU_SRC, CL_PROFILE
 import re
-import mpi4py.MPI as mpi
+from hysop.mpi import MPI
 FLOAT_GPU, DOUBLE_GPU = np.float32, np.float64
 
-## Global variable handling an OpenCL Environment instance
+
+# Global variable handling the OpenCL Environment instance
 __cl_env = None
 
 
@@ -21,32 +19,45 @@ class OpenCLEnvironment(object):
 
     def __init__(self, platform_id, device_id, device_type,
                  precision, gl_sharing=False, comm=None):
-        """Create environment.
-        @param platform_id : OpenCL platform id
-        @param device_id : OpenCL device id
-        @param device_type : OpenCL device type
-        @param precision : Recquired precision
-        @param resolution : Global resolution
-        @param gl_sharing : Flag to build a OpenGL shared OpenCL context
+        """Initialize an OpenCL environment
+
+        Parameters
+        ----------
+        platform_id : int
+            chosen platform id.
+        device_id : int
+            chosen device id.
+        device_type : string
+            chosen device type.
+        precision : int
+            required precision for real data.
+            Must be FLOAT_GPU or DOUBLE_GPU.
+        gl_sharing : bool, optional
+            True to build a context shared between OpenGL and OpenCL.
+            Default=False.
+        comm : mpi communicator, optional
+            Communicator which handles the OpenCL env.
+            Default = hysop.mpi.main_comm
+
         """
         self._platform_id = platform_id
         self._device_id = device_id
         self._device_type = device_type
         self._gl_sharing = gl_sharing
-        ## OpenCL platform
+        # OpenCL platform
         self.platform = self._get_platform(platform_id)
-        ## OpenCL device
+        # OpenCL device
         self.device = self._get_device(self.platform, device_id, device_type)
-        ## Device available memory
+        # Device available memory
         self.available_mem = self.device.global_mem_size
-        ## OpenCL context
+        # OpenCL context
         self.ctx = self._get_context(self.device, gl_sharing)
-        ## OpenCL queue
+        # OpenCL queue
         self.queue = self._get_queue(self.ctx)
 
-        ## MPI sub-communicator for all processes attached to the same device
+        # MPI sub-communicator for all processes attached to the same device
         if comm is None:
-            from hysop.mpi.main_var import main_comm
+            from hysop.mpi import main_comm
         else:
             main_comm = comm
         # Splitting the mpi communicator by the device id is not enough:
@@ -55,44 +66,53 @@ class OpenCLEnvironment(object):
         import hashlib
         # The md5 sum of the proc name is tuncated to obtain an integer
         # for fortran (32bit)
-        hash_name = hashlib.md5(mpi.Get_processor_name()).hexdigest()[-7:]
+        hash_name = hashlib.md5(MPI.Get_processor_name()).hexdigest()[-7:]
         self.gpu_comm = main_comm.Split(
             color=int(hash_name, 16) + device_id,
             key=main_comm.Get_rank())
 
-        ## Floating point precision
+        # Floating point precision
         self.precision = precision
         if self.precision is FLOAT_GPU:
             self.prec_size = 4
         elif self.precision is DOUBLE_GPU:
             self.prec_size = 8
+        self.macros = {}
         self.default_build_opts = ""
         if CL_PROFILE and self.device.vendor.find('NVIDIA') >= 0:
             self.default_build_opts += " -cl-nv-verbose"
         self.default_build_opts += " -Werror" + self._get_precision_opts()
 
-        ## Kernels configuration dictionary
+        # Kernels configuration dictionary
         if self.device.name == "Cayman":
-            from config_cayman import kernels_config as kernel_cfg
+            from hysop.gpu.config_cayman import kernels_config as kernel_cfg
         elif self.device.name == "Tesla K20m" or \
                 self.device.name == "Tesla K20Xm":
-            from config_k20m import kernels_config as kernel_cfg
+            from hysop.gpu.config_k20m import kernels_config as kernel_cfg
         else:
             print "/!\\ Get a defautl kernels config for", self.device.name
-            from config_default import kernels_config as kernel_cfg
+            from hysop.gpu.config_default import kernels_config as kernel_cfg
         self.kernels_config = kernel_cfg
         self._locMem_Buffers = {}
 
     def modify(self, platform_id, device_id, device_type,
                precision, gl_sharing=False):
-        """
-        Modify OpenCL environment parameters.
-        @param platform_id : OpenCL platform id
-        @param device_id : OpenCL device id
-        @param device_type : OpenCL device type
-        @param precision : Recquired precision
-        @param resolution : Global resolution
-        @param gl_sharing : Flag to build a OpenGL shared OpenCL context
+        """Modify OpenCL environment parameters.
+
+        Parameters
+        ----------
+        platform_id : int
+            chosen platform id.
+        device_id : int
+            chosen device id.
+        device_type : string
+            chosen device type.
+        precision : int
+            required precision for real data.
+            Must be FLOAT_GPU or DOUBLE_GPU.
+        gl_sharing : bool, optional
+            True to build a context shared between OpenGL and OpenCL.
+            Default=False.
         """
         platform_changed, device_changed = False, False
         if not platform_id == self._platform_id:
@@ -110,14 +130,14 @@ class OpenCLEnvironment(object):
             self.available_mem = self.device.global_mem_size
             device_changed = True
         if platform_changed or device_changed or \
-                (not self._gl_sharing and not gl_sharing is self._gl_sharing):
+                (not self._gl_sharing and gl_sharing is not self._gl_sharing):
             if self._gl_sharing and not gl_sharing:
                 print ("Warning: Loosing Gl shared context.")
             self._gl_sharing = gl_sharing
             self.ctx = self._get_context(self.device, gl_sharing)
             self.queue = self._get_queue(self.ctx)
-        if not self.precision is precision and not precision is None:
-            if not self.precision is None:
+        if self.precision is not precision and precision is not None:
+            if self.precision is not None:
                 print ("Warning, GPU precision is overrided from",)
                 print (self.precision, 'to', precision)
             self.precision = precision
@@ -127,10 +147,9 @@ class OpenCLEnvironment(object):
             self.default_build_opts += "-Werror" + self._get_precision_opts()
 
     def _get_platform(self, platform_id):
-        """
-        Get an OpenCL platform.
-        @param platform_id : OpenCL platform id
-        @return OpenCL platform
+        """Returns an OpenCL platform
+        :param platform_id : OpenCL platform id
+
         """
         try:
             # OpenCL platform
@@ -147,12 +166,16 @@ class OpenCLEnvironment(object):
         return platform
 
     def _get_device(self, platform, device_id, device_type):
-        """
-        Get an OpenCL device.
-        @param platform : OpenCL platform
-        @param device_id : OpenCL device id
-        @param device_type : OpenCL device type
-        @return OpenCL device
+        """Returns an OpenCL device
+
+        Parameters
+        ----------
+        platform_id : int
+            chosen platform id.
+        device_id : int
+            chosen device id.
+        device_type : string
+            chosen device type.
 
         Try to use given parameters and in case of fails, use pyopencl context
         creation function.
@@ -199,11 +222,15 @@ class OpenCLEnvironment(object):
         return device
 
     def _get_context(self, device, gl_sharing):
-        """
-        Get an OpenCL context.
-        @param device : OpenCL device
-        @param gl_sharing : Flag to build a OpenGL shared OpenCL context
-        @return OpenCL context
+        """Returns OpenCL context
+
+        Parameters
+        ----------
+        device : OpenCL device
+            which handles the context.
+        gl_sharing : bool
+            True to build a context shared between OpenGL and OpenCL.
+            Default=False.
 
         """
         props = None
@@ -223,15 +250,15 @@ class OpenCLEnvironment(object):
             ctx = cl.Context([device])
         if __VERBOSE__:
             print " Context:"
-            if not props is None:
+            if props is not None:
                 print "  - properties           :", props
         return ctx
 
     def _get_queue(self, ctx):
-        """
-        Get OpenCL queue from context
-        @param ctx : OpenCL context
-        @return OpenCL queue
+        """Returns OpenCL queue from context
+
+        :param ctx : OpenCL context
+
         """
         props = None
         if CL_PROFILE:
@@ -241,20 +268,31 @@ class OpenCLEnvironment(object):
             queue = cl.CommandQueue(ctx)
         if __VERBOSE__:
             print " Queue"
-            if not props is None:
+            if props is not None:
                 print "  - properties           :", props
             print "==="
         return queue
 
     def create_other_queue(self):
+        """Create OpenCL queue from current context
+        """
         return self._get_queue(self.ctx)
 
-    def get_WorkItems(self, resolution, vector_width=1):
-        """
-        Set the optimal work-item number and OpenCL space index.
-        @param resolution : Problem resolution
-        @param vector_width : OpenCL vector types width
-        @return work-item number, global space index and local space index
+    def get_work_items(self, resolution, vector_width=1):
+        """Set the optimal work-item number and OpenCL space index.
+
+        Parameters
+        ----------
+        resolution : tuple
+            local mesh resolution
+        vector_width : int
+            OpenCL vector types width
+
+        Returns
+        -------
+        int : work-item number
+        tuple : global space index
+        tuple : local space index
 
         Use 64 work-items in 3D and 256 in 2D.
         \todo Use Both the number from device capability
@@ -297,7 +335,7 @@ class OpenCLEnvironment(object):
 
     def _get_precision_opts(self):
         """Check if device is capable regarding given precision
-        @return build options regarding precision recquired
+        and returns build options regarding the required precision.
         """
         opts = ""
         # Precision supported
@@ -336,14 +374,21 @@ class OpenCLEnvironment(object):
         return opts
 
     def build_src(self, files, options="", vector_width=4,
-                  nb_remesh_components=1, macros=None):
-        """
-        Build OpenCL sources
-        @param files: Source files
-        @param options : Compiler options to use for buildind
-        @param vector_width : OpenCL vector type width
-        @param nb_remesh_components : number of remeshed components
-        @return OpenCL binaries
+                  nb_remesh_components=1):
+        """Build OpenCL sources
+
+        Parameters
+        ----------
+        files : file or list of files
+            sources
+        options : string
+            Compiler options to use for buildind
+        vector_width : int
+            OpenCL vector type width
+        nb_remesh_components : int
+            number of remeshed components
+
+        Returns OpenCL binaries
 
         Parse the sources to handle single and double precision.
         """
@@ -371,9 +416,8 @@ class OpenCLEnvironment(object):
                 self.parse_file(f, vector_width, nb_remesh_components))
             f.close()
             #print gpu_src
-        if macros is not None:
-            for k in macros:
-                gpu_src = gpu_src.replace(k, str(macros[k]))
+        for k in self.macros:
+            gpu_src = gpu_src.replace(k, str(self.macros[k]))
         if self.precision is FLOAT_GPU:
             # Rexexp to add 'f' suffix to float constants
             # Match 1.2, 1.234, 1.2e3, 1.2E-05
@@ -413,12 +457,19 @@ class OpenCLEnvironment(object):
         return build
 
     def parse_file(self, f, n=8, nb_remesh_components=1):
-        """
-        Parse a file containing OpenCL sources.
-        @param f : source file
-        @param n : vector width
-        @param nb_remesh_components : number of remeshed components
-        @return parsed sources as string.
+        """Parse a file containing OpenCL sources.
+
+        Parameters
+        ----------
+        f : file
+        n : int, optional
+            vector width, default=8
+        nb_remesh_components : int
+            number of remeshed components
+
+        Returns
+        -------
+        string, the parsed sources.
 
         - <code>__N__</code> is expanded as an integer corresponding to a
         vector with
@@ -569,14 +620,27 @@ def get_opengl_shared_environment(platform_id=None,
                                   device_id=None,
                                   device_type=None, precision=HYSOP_REAL,
                                   comm=None):
-    """
-    Get an OpenCL environment with OpenGL shared enable.
-
-    @param platform_id :OpenCL platform id
-    @param device_id : OpenCL device id
-    @param device_type : OpenCL device type
-    @param precision : Required precision
-    @return OpenCL platform, device, context and queue
+    """Build or get an OpenCL environment.
+
+    Parameters
+    ----------
+    platform_id : int
+        chosen platform id.
+    device_id : int
+       chosen device id.
+    device_type : string
+       chosen device type.
+    precision : int, optional
+       required precision for real data.
+       Default : HYSOP_REAL
+    comm : mpi communicator, optional
+        Communicator which handles the OpenCL env.
+        Default = hysop.mpi.main_comm
+
+    Returns
+    -------
+    :class:`~hysop.gpu.tools.OpenCLEnvironment`
+    handling OpenCL platform, device, context and queue
 
     The context is obtained with gl-shared properties depending on the OS.
     """
@@ -598,14 +662,27 @@ def get_opencl_environment(platform_id=None,
                            device_id=None,
                            device_type=None, precision=HYSOP_REAL,
                            comm=None):
-    """
-    Get an OpenCL environment.
-
-    @param platform_id :OpenCL platform id
-    @param device_id : OpenCL device id
-    @param device_type : OpenCL device type
-    @param precision : Required precision
-    @return OpenCL platform, device, context and queue
+    """Build or get an OpenCL environment.
+
+    Parameters
+    ----------
+    platform_id : int
+        chosen platform id.
+    device_id : int
+       chosen device id.
+    device_type : string
+       chosen device type.
+    precision : int, optional
+       required precision for real data.
+       Default : HYSOP_REAL
+    comm : mpi communicator, optional
+        Communicator which handles the OpenCL env.
+        Default = hysop.mpi.main_comm
+
+    Returns
+    -------
+    :class:`~hysop.gpu.tools.OpenCLEnvironment`
+    handling OpenCL platform, device, context and queue
 
     """
     if platform_id is None:
diff --git a/hysop/mpi/__init__.py b/hysop/mpi/__init__.py
index c10d709c6..f4f4d7a38 100644
--- a/hysop/mpi/__init__.py
+++ b/hysop/mpi/__init__.py
@@ -11,29 +11,30 @@ in order to make any change of this interface, if required, easiest.
 
 At the time we use mpi4py : http://mpi4py.scipy.org
 
-
 """
 
-# Everything concerning the chosen mpi implementation is hidden in main_var
+# Everything concerning the chosen mpi implementation is hidden from hysop
+# main interface.
 # Why? --> to avoid that things like mpi4py. ... spread everywhere in the
 # soft so to ease a change of this implementation (if needed).
-from hysop.mpi import main_var
-
-MPI = main_var.MPI
-"""MPI underlying implementation
-"""
+from mpi4py import MPI as MPI
+"""MPI interface"""
 
-main_comm = main_var.main_comm
-"""Main communicator (copy of comm_world)
-"""
+main_comm = MPI.COMM_WORLD.Dup()
+"""Main communicator"""
 
-main_rank = main_var.main_rank
-"""Rank of the current process in the main communicator
-"""
+main_rank = main_comm.Get_rank()
+"""Rank of the current process in main communicator"""
 
-main_size = main_var.main_size
-"""Size of the main communicator
-"""
+main_size = main_comm.Get_size()
+"""Number of mpi process in main communicator"""
 
 Wtime = MPI.Wtime
-"""Timer for MPI calls"""
+"""Function to return elapsed time since some time in the past.
+
+Usage:
+tref = Wtime()
+# proceed with some computations ...
+elapsed = Wtime() - tref
+# -> elapsed == time for 'some computations' on the current mpi process
+"""
diff --git a/hysop/mpi/main_var.py b/hysop/mpi/main_var.py
deleted file mode 100644
index 0840b4d5e..000000000
--- a/hysop/mpi/main_var.py
+++ /dev/null
@@ -1,16 +0,0 @@
-"""
-@file main_var.py
-
-Global parameters related to mpi, for hysop package.
-
-"""
-
-# Set the underlying implementation of MPI
-# mpi4py must be hidden from user
-import mpi4py.MPI
-MPI = mpi4py.MPI
-
-# Create hysop main communicator from COMM_WORLD
-main_comm = MPI.COMM_WORLD.Dup()
-main_rank = main_comm.Get_rank()
-main_size = main_comm.Get_size()
diff --git a/hysop/mpi/tests/test_bridge.py b/hysop/mpi/tests/test_bridge.py
index ac28390f7..5bfe802d8 100755
--- a/hysop/mpi/tests/test_bridge.py
+++ b/hysop/mpi/tests/test_bridge.py
@@ -1,6 +1,7 @@
 from hysop.domain.box import Box
 from hysop.tools.parameters import Discretization
 from hysop.mpi.bridge import Bridge
+from hysop.mpi import main_size, main_comm
 from hysop.mpi.bridge_overlap import BridgeOverlap
 from hysop.mpi.bridge_inter import BridgeInter
 import math
@@ -42,10 +43,6 @@ def test_bridge3D():
     print bridge
 
 
-from hysop.mpi.main_var import main_size, main_comm
-from hysop.mpi.tests.utils import create_subtopos, create_inter_topos
-
-
 def test_bridge_overlap():
     """
     Try the pathologic case where source and target do not apply on
diff --git a/hysop/mpi/tests/utils.py b/hysop/mpi/tests/utils.py
index 94b56d619..6de9749e4 100644
--- a/hysop/mpi/tests/utils.py
+++ b/hysop/mpi/tests/utils.py
@@ -1,8 +1,7 @@
-"""
-Functions used through mpi-related tests.
+"""Functions used in mpi-related tests.
 """
 
-from hysop.mpi.main_var import main_comm, main_rank, main_size
+from hysop.mpi import main_comm, main_rank, main_size
 from hysop.tools.parameters import MPIParams
 import hysop as pp
 GPU = 4
@@ -11,10 +10,30 @@ OTHER = 12
 
 
 def create_multitask_context(dim, discr):
-    # MPI procs are distributed among three tasks
-    # Two tasks:
-    # proc 0 and last proc work on 'GPU' task.
-    # The others on 'CPU' task.
+    """Create a domain and a topology in a multi-task context
+
+    Parameters
+    ----------
+    dim : int
+       domain dimension
+    discr : :class:`~hysop.tools.parameters.Discretization
+       space discretization
+
+    Returns
+    -------
+    dom : :class:`~hysop.domain.box.Box`
+        a domain (box-shaped) defined on three differents tasks
+    topo : :class:`~hysop.mpi.topology.Cartesian`
+        topology associated to the domain
+
+    Notes
+    -----
+
+    This function has sense only if the number of mpi processe
+    is is greater than or equal to 3. In that case, 3 tasks are defined:
+    proc 0 and last proc handle 'GPU' task, proc 1 'OTHER' task and all other
+    procs, 'CPU' task.
+    """
     proc_tasks = [CPU, ] * main_size
     if main_size > 2:
         proc_tasks[-1] = GPU
@@ -30,6 +49,22 @@ def create_multitask_context(dim, discr):
 
 
 def create_subtopos(domain, discr_source, discr_target):
+    """Split main mpi-communicator into two topologies.
+
+    Parameters
+    ----------
+    dom : :class:`~hysop.domain.box.Box`
+        a domain (box-shaped)
+    discr_source, discr_target : :class:`~hysop.tools.parameters.Discretization
+       space discretizations for each sub-topo
+
+
+    Returns
+    -------
+    source_topo, target_topo: class:`~hysop.mpi.topology.Cartesian`
+        topologies associated with rank 0 and last rank for 'target'
+        and other ranks for 'source'.
+    """
     # split main comm into two groups
     rk_source = [i for i in xrange(main_size)]
     rk_target = list(rk_source)
@@ -57,7 +92,22 @@ def create_subtopos(domain, discr_source, discr_target):
 
 
 def create_nonoverlap_topos(domain, discr_source, discr_target):
-    # split main comm into two groups
+    """Split main mpi-communicator into two topologies.
+
+    Parameters
+    ----------
+    dom : :class:`~hysop.domain.box.Box`
+        a domain (box-shaped)
+    discr_source, discr_target : :class:`~hysop.tools.parameters.Discretization
+       space discretizations for each sub-topo
+
+
+    Returns
+    -------
+    source_topo, target_topo: class:`~hysop.mpi.topology.Cartesian`
+        topologies associated with even ranks for 'source'
+        and odd ranks for 'target'.
+    """
     rk_source = [i for i in xrange(main_size) if i % 2 == 0]
     rk_target = [i for i in xrange(main_size) if i % 2 != 0]
     g_source = main_comm.group.Incl(rk_source)
@@ -82,10 +132,30 @@ def create_nonoverlap_topos(domain, discr_source, discr_target):
 
 
 def create_inter_topos(dim, discr1, discr2):
-    # MPI procs are distributed among two tasks
+    """Create a domain and two topologies in a multi-task context
+
+    Parameters
+    ----------
+    dim : int
+       domain dimension
+    discr1, discr2 : :class:`~hysop.tools.parameters.Discretization
+        space discretizations for topo1 and topo2
+
+    Returns
+    -------
+    dom : :class:`~hysop.domain.box.Box`
+        a domain (box-shaped) defined on three differents tasks
+    topo1, topo2 : :class:`~hysop.mpi.topology.Cartesian`
+        topologies associated to the domain
+
+    Notes
+    -----
 
-    # proc 0 and last proc work on 'GPU' task.
-    # The others on 'CPU' task.
+    This function has sense only if the number of mpi processe
+    is is greater than or equal to 3. In that case, 2 tasks are defined:
+    proc 0 and last proc handle 'GPU' task and all other
+    procs, 'CPU' task.
+    """
     proc_tasks = [CPU, ] * main_size
     if main_size > 2:
         proc_tasks[-1] = GPU
diff --git a/hysop/mpi/topology.py b/hysop/mpi/topology.py
index 11a8057b1..f21282fdc 100644
--- a/hysop/mpi/topology.py
+++ b/hysop/mpi/topology.py
@@ -9,7 +9,7 @@
 from hysop.constants import debug, ORDER, PERIODIC
 from hysop.domain.mesh import Mesh
 from itertools import count
-from hysop.mpi.main_var import MPI
+from hysop.mpi import MPI
 from hysop.tools.parameters import Discretization, MPIParams
 import numpy as np
 import hysop.tools.numpywrappers as npw
@@ -43,18 +43,19 @@ class Cartesian(object):
     @debug
     def __new__(cls, *args, **kw):
         return object.__new__(cls, *args, **kw)
-#
+    #
     # Counter of topology.Cartesian instances to set a unique id for each
     # Cartesian topology instance.
     __ids = count(0)
 
     @debug
     def __init__(self, domain, discretization, dim=None, mpi_params=None,
-                 isperiodic=None, cutdir=None, shape=None, mesh=None):
+                 isperiodic=None, cutdir=None, shape=None, mesh=None,
+                 cartesian_topology=None):
         """
 
         Parameters
-        -----------
+        ----------
         domain : :class:`~hysop.domain.domain.Domain`
            the geometry on which the topology is defined.
         discretization : :class:`~hysop.tools.parameters.Discretization`
@@ -65,7 +66,7 @@ class Cartesian(object):
         mpi_params : :class:`~hysop.tools.parameters.MPIParams`, optional
             mpi setup (comm, task ...).
             If None, comm = main_comm, task = DEFAULT_TASK_ID.
-        isperiodic : list or array of bool, optional
+        isperiodic : tuple, list or array of bool, optional
             mpi grid periodicity
         cutdir : list or array of bool
             set which directions must (may) be distributed,
@@ -74,22 +75,28 @@ class Cartesian(object):
             mpi grid layout
         mesh : :class:`~hysop.domain.mesh.Mesh`, optional
             a predifined mesh (it includes local and global grids.)
+        cartesian_topology : MPI.Cartcomm, optional
+            a predefined mpi cartesian topology. Use this
+            when you need the same communicator for two
+            different meshes/space distribution of data.
 
         Notes:
         ------
-        Almost all parameters above are optional.
-        Only one must be chosen among dim, cutdir and shape.
-        See :ref:`topologies` in the Hysop User Guide for details.
-
-        See hysop.mpi.topology.Cartesian.plane_precomputed
-        details to build a plane topology from a given local discretization
-        (e.g. from fftw or scales precomputation).
+        * Almost all parameters above are optional.
+          Only one must be chosen among dim, cutdir and shape.
+          See :ref:`topologies` in the Hysop User Guide for details.
+        * when cartesian_topology is given, dim, shape and cutdir parameters,
+          if set, are not used to build the mpi topology, but compared with
+          cartesian_topology parameters. If they do not fit, error may occur.
+        * See hysop.mpi.topology.Cartesian.plane_precomputed
+          details to build a plane topology from a given local discretization
+          (e.g. from fftw or scales precomputation).
         """
         # ===== 1 - Required parameters : domain and mpi (comm, task) ====
         # An id for the topology
         self.__id = self.__ids.next()
 
-        ## Associated domain
+        # Associated domain
         self.domain = domain
         # - MPI topo params :
         if mpi_params is None:
@@ -121,7 +128,11 @@ class Cartesian(object):
         # mpi communicator (cartesian topo)
         self.comm = None
 
-        self._build_mpi_topo(shape, cutdir, dim, isperiodic)
+        if cartesian_topology is None:
+            self._build_mpi_topo(shape, cutdir, dim, isperiodic)
+        else:
+            self._set_with_external_topo(cartesian_topology, shape,
+                                         cutdir, dim, isperiodic)
 
         # ===== 3 - Get features of the mpi processes grid =====
         # Size of the topology (i.e. total number of mpi processes)
@@ -136,8 +147,9 @@ class Cartesian(object):
         # --> proc_coords has values even for directions that
         # are not distributed. If cutdir = [False, False, True]
         # then reduced_coords = [ nx ] and proc_coords = [0, 0, nx]
-        self.proc_coords = npw.dim_zeros(self.domain.dimension)
-        self.proc_coords[self.cutdir] = reduced_coords
+        # self.proc_coords = npw.dim_zeros(self.domain.dimension)
+        # self.proc_coords[self.cutdir] = reduced_coords
+        self.proc_coords = reduced_coords
         # Neighbours : self.neighbours[0,i] (resp. [1,i])
         # previous (resp. next) neighbour in direction i
         # (warning : direction in the grid of process).
@@ -178,10 +190,49 @@ class Cartesian(object):
         """ default builder
         See :ref:`topologies` for details.
 
+        """
+        self._check_topo_parameters(shape, cutdir, dim, isperiodic)
+
+        # From this point, the following parameters must be properly set:
+        # - self.shape
+        # - self.cutdir
+        # - self._isperiodic
+        # Then, we can create the mpi topology.
+        #self.comm = \
+        self._isperiodic = tuple([True, ] * self.domain.dimension)
+        self.comm = self._mpis.comm.Create_cart(self.shape,
+                                                periods=self._isperiodic,
+                                                reorder=True)
+
+    def _set_with_external_topo(self, cartesian_topology, shape=None,
+                                cutdir=None, dim=None, isperiodic=None):
+        """Use a pre-defined mpi cartesian topology to set self.comm.
+        Check compatibility whith other (optional) parameters.
+        """
+        msg = 'Wrong type for input communicator.'
+        assert isinstance(cartesian_topology, MPI.Cartcomm), msg
+        self._check_topo_parameters(shape, cutdir, dim, isperiodic)
+        msg = 'The given cartesian topology does not fit with the others'
+        msg += ' input parameters.'
+        assert cartesian_topology.size == np.prod(self.shape), msg
+        assert cartesian_topology.dim == self.dimension, msg
+        assert (self.shape ==
+                npw.asdimarray(cartesian_topology.dims)).all(), msg
+        assert self._isperiodic == cartesian_topology.periods, msg
+        self.comm = cartesian_topology
+
+    def _check_topo_parameters(self, shape=None, cutdir=None,
+                               dim=None, isperiodic=None):
+        """Check compatibility between input parameters provided
+        to build the mpi topology
+
+        After this call, the following attributes must be properly set:
+        # - self.shape
+        # - self.cutdir
+        # - self._isperiodic
         """
         # number of process in parent comm
         origin_size = self._mpis.comm.Get_size()
-
         if shape is not None:
             # method (a)
             msg = ' parameter is useless when shape is provided.'
@@ -198,7 +249,7 @@ class Cartesian(object):
             msg = ' parameter is useless when cutdir is provided.'
             assert shape is None, 'shape ' + msg
             assert dim is None, 'dim ' + msg
-            self.cutdir = npw.asboolarray(cutdir)
+            self.cutdir = npw.asboolarray(cutdir).copy()
             self.dimension = self.cutdir[self.cutdir].size
             shape = npw.asdimarray(MPI.Compute_dims(origin_size,
                                                     self.dimension))
@@ -237,11 +288,11 @@ class Cartesian(object):
 
         # MPI processes grid periodicity. Default is true.
         if isperiodic is None:
-            self._isperiodic = npw.ones((self.domain.dimension),
-                                        dtype=npw.bool)
+            self._isperiodic = tuple([True, ] * self.domain.dimension)
+
         else:
-            self._isperiodic = npw.asboolarray(isperiodic)
-            assert isperiodic.size == self.domain.dimension
+            assert len(isperiodic) == self.domain.dimension
+            self._isperiodic = tuple(isperiodic)
 
         # compute real dim ...
         self.dimension = self.shape[self.cutdir].size
@@ -251,16 +302,6 @@ class Cartesian(object):
             self.dimension = 1
             self.cutdir[-1] = True
 
-        # From this point, the following parameters must be properly set:
-        # - self.shape
-        # - self.cutdir
-        # - self._isperiodic
-        # Then, we can create the mpi topology.
-        self.comm = \
-            self._mpis.comm.Create_cart(self.shape[self.cutdir],
-                                        periods=self._isperiodic[self.cutdir],
-                                        reorder=True)
-
     @staticmethod
     def _optimizeshape(shape):
         """Reorder 'shape' according to the chosen
@@ -452,10 +493,15 @@ class Cartesian(object):
 
     @staticmethod
     def reset_counter():
+        """Set global counter of cartesian topologies to 0.
+        """
         Cartesian.__ids = count(0)
 
 
 class TopoTools(object):
+    """Static class providing tools to handle set of indices, message tags,
+    and so on.
+    """
 
     @staticmethod
     def gather_global_indices(topo, toslice=True, root=None, comm=None):
diff --git a/hysop/numerics/remeshing.py b/hysop/numerics/remeshing.py
index 4cdc0ea7b..5492bc3e2 100644
--- a/hysop/numerics/remeshing.py
+++ b/hysop/numerics/remeshing.py
@@ -391,7 +391,7 @@ def polynomial_optimisation():
             tt += (MPI.Wtime() - t)
         print tt, s
 
-    from hysop.mpi.main_var import MPI
+    from hysop.mpi import MPI
     nb = 128
     a = npw.asrealarray(np.random.random((nb, nb, nb)))
     r = np.zeros_like(a)
diff --git a/hysop/operator/computational.py b/hysop/operator/computational.py
index 6af67b9d2..ee6db6c9f 100755
--- a/hysop/operator/computational.py
+++ b/hysop/operator/computational.py
@@ -327,7 +327,7 @@ class Computational(Operator):
             self.discrete_op.computation_time()
             self.time_info = self.discrete_op.time_info
         else:
-            from hysop.mpi.main_var import main_rank
+            from hysop.mpi import main_rank
             shortName = str(self.__class__).rpartition('.')[-1][0:-2]
             s = '[' + str(main_rank) + '] ' + shortName
             s += " : operator not discretized --> no computation, time = 0."
diff --git a/hysop/operator/curlAndDiffusion.py b/hysop/operator/curlAndDiffusion.py
index b171bd647..8b86d2bff 100644
--- a/hysop/operator/curlAndDiffusion.py
+++ b/hysop/operator/curlAndDiffusion.py
@@ -50,7 +50,7 @@ class CurlDiffusion(Operator):
         Create a discrete Diffusion operator from given specifications.
         """
         if self._comm is None:
-            from hysop.mpi.main_var import main_comm as comm
+            from hysop.mpi import main_comm as comm
         else:
             comm = self._comm
 
diff --git a/hysop/operator/tests/test_adaptive_time_step.py b/hysop/operator/tests/test_adaptive_time_step.py
index 81e1016e2..54284a67b 100755
--- a/hysop/operator/tests/test_adaptive_time_step.py
+++ b/hysop/operator/tests/test_adaptive_time_step.py
@@ -3,7 +3,7 @@ import hysop as pp
 from hysop.operator.adapt_timestep import AdaptTimeStep
 from hysop.problem.simulation import Simulation
 from hysop.tools.parameters import Discretization
-from hysop.mpi import main_comm
+from hysop.mpi import main_comm, main_size
 import numpy as np
 import hysop.tools.numpywrappers as npw
 import os
@@ -98,7 +98,6 @@ def test_adapt_4():
     GPU = 4
     CPU = 1
     VISU = 12
-    from hysop.mpi.main_var import main_size
     proc_tasks = [CPU, ] * main_size
 
     if main_size > 4:
diff --git a/hysop/operator/tests/test_multiresolution_filter.py b/hysop/operator/tests/test_multiresolution_filter.py
index 04c5b0514..198d8ce4a 100755
--- a/hysop/operator/tests/test_multiresolution_filter.py
+++ b/hysop/operator/tests/test_multiresolution_filter.py
@@ -7,8 +7,7 @@ import hysop.tools.numpywrappers as npw
 import numpy as np
 from hysop.methods_keys import Remesh
 from hysop.methods import Rmsh_Linear, L2_1
-from hysop.mpi.main_var import main_size
-
+from hysop.mpi import main_size
 
 L = [1., 1., 1.]
 O = [0., 0., 0.]
diff --git a/hysop/operator/tests/test_redistribute.py b/hysop/operator/tests/test_redistribute.py
index 917df64a1..ce9cadda0 100755
--- a/hysop/operator/tests/test_redistribute.py
+++ b/hysop/operator/tests/test_redistribute.py
@@ -1,7 +1,6 @@
 from hysop.operator.redistribute_intra import RedistributeIntra
 from hysop.operator.redistribute_inter import RedistributeInter
 from hysop.operator.redistribute_overlap import RedistributeOverlap
-#from hysop.mpi.main_var import main_size, main_rank, main_comm
 from hysop.tools.parameters import Discretization, MPIParams
 from hysop import testsenv
 import hysop as pp
@@ -9,11 +8,11 @@ import numpy as np
 import math
 from hysop import Simulation
 from hysop.operator.analytic import Analytic
-from hysop.mpi.main_var import main_comm, main_size
+from hysop.mpi import main_comm, main_size
 from hysop.mpi.tests.utils import create_inter_topos, CPU, GPU, OTHER,\
     create_multitask_context
 from hysop.fields.tests.func_for_tests import v3d, v2d, v3dbis
-from hysop.operator.poisson import Poisson
+from hysop.operators import EnergyEnstrophy
 from hysop.mpi.tests.test_bridge import create_subtopos
 sin = np.sin
 pi = math.pi
@@ -602,8 +601,9 @@ def test_distribute_inter_5():
         op_init.setup()
         op_init.apply(simu)
         # An empty operator for velocity
-        op = Poisson(output_field=velocity, input_field=reference,
-                     discretization=r_ng)
+        op = EnergyEnstrophy(
+            velocity=velocity, vorticity=reference,
+            discretization=r_ng)
         op.discretize()
         op.setup()
 
@@ -618,7 +618,7 @@ def test_distribute_inter_5():
     if domtasks.is_on_task(GPU):
         toporef = op.variables[reference]
         vd = velocity.discretize(toporef)
-        wd = velocity.discretize(toporef)
+        wd = reference.discretize(toporef)
         for d in xrange(domtasks.dimension):
             assert np.allclose(wd.data[d], vd.data[d])
 
@@ -653,8 +653,9 @@ def test_distribute_inter_2d():
         op_init.setup()
         op_init.apply(simu)
         # An empty operator for velocity
-        op = Poisson(output_field=velocity, input_field=vort,
-                     discretization=r_2d)
+        op = EnergyEnstrophy(velocity=velocity,
+                             vorticity=vort,
+                             discretization=r_2d)
         op.discretize()
         op.setup()
 
diff --git a/hysop/problem/problem_tasks.py b/hysop/problem/problem_tasks.py
index aaab86c4e..2a5b2904b 100644
--- a/hysop/problem/problem_tasks.py
+++ b/hysop/problem/problem_tasks.py
@@ -1,7 +1,4 @@
-"""
-@file problem_tasks.py
-
-Extending problem description to handle tasks parallelism.
+"""Extending problem description to handle tasks parallelism.
 Each operator owns a task id that define a process group that are sharing the
 same tasks.
 """
@@ -49,7 +46,7 @@ class ProblemTasks(Problem):
                          domain=domain, dumpFreq=dumpFreq, name=name)
         self.tasks_list = tasks_list
         if main_comm is None:
-            from hysop.mpi.main_var import main_comm
+            from hysop.mpi import main_comm
         self.main_comm = main_comm
         self._main_rank = self.main_comm.Get_rank()
         assert self.main_comm.Get_size() == len(self.tasks_list), \
diff --git a/hysop/tools/parameters.py b/hysop/tools/parameters.py
index 3977a3424..1d3f0a801 100755
--- a/hysop/tools/parameters.py
+++ b/hysop/tools/parameters.py
@@ -8,7 +8,7 @@
 """
 
 from collections import namedtuple
-from hysop.mpi.main_var import main_comm, main_rank, MPI
+from hysop.mpi import main_comm, main_rank, MPI
 from hysop.constants import DEFAULT_TASK_ID
 import hysop.tools.numpywrappers as npw
 
diff --git a/hysop/tools/problem2dot.py b/hysop/tools/problem2dot.py
index f0949d8a4..6eab223eb 100644
--- a/hysop/tools/problem2dot.py
+++ b/hysop/tools/problem2dot.py
@@ -4,7 +4,7 @@
 from hysop.operator.advection import Advection
 from hysop.operator.redistribute import Redistribute
 from hysop.operator.redistribute_inter import RedistributeInter
-from hysop.mpi.main_var import main_rank
+from hysop.mpi import main_rank
 import pydot
 colors = [
     "#dc322f",
diff --git a/hysop/tools/profiler.py b/hysop/tools/profiler.py
index 28e1f3a5c..11fee02cd 100644
--- a/hysop/tools/profiler.py
+++ b/hysop/tools/profiler.py
@@ -2,7 +2,7 @@
 for hysop classes.
 """
 from hysop.mpi import Wtime as ftime
-from hysop.mpi.main_var import main_rank
+from hysop.mpi import main_rank
 import hysop.tools.numpywrappers as npw
 import numpy as np
 
-- 
GitLab