From 1aeb648a06e77c30089df423150dfb969cbead05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franck=20P=C3=A9rignon?= <franck.perignon@imag.fr> Date: Sat, 21 May 2016 17:19:26 +0200 Subject: [PATCH] -Review topo creation - fix tests + pep8 things + docstrings --- CMakeLists.txt | 6 + hysop/domain/domain.py | 30 +- hysop/gpu/gpu_diffusion.py | 6 +- hysop/gpu/gpu_discrete.py | 2 +- hysop/gpu/gpu_multiphase_baroclinic_rhs.py | 6 +- hysop/gpu/gpu_particle_advection.py | 74 +++-- hysop/gpu/multi_gpu_particle_advection.py | 36 +-- .../tests/test_gpu_multiresolution_filter.py | 3 +- hysop/gpu/tools.py | 277 +++++++++++------- hysop/mpi/__init__.py | 35 +-- hysop/mpi/main_var.py | 16 - hysop/mpi/tests/test_bridge.py | 5 +- hysop/mpi/tests/utils.py | 92 +++++- hysop/mpi/topology.py | 110 +++++-- hysop/numerics/remeshing.py | 2 +- hysop/operator/computational.py | 2 +- hysop/operator/curlAndDiffusion.py | 2 +- .../operator/tests/test_adaptive_time_step.py | 3 +- .../tests/test_multiresolution_filter.py | 3 +- hysop/operator/tests/test_redistribute.py | 17 +- hysop/problem/problem_tasks.py | 7 +- hysop/tools/parameters.py | 2 +- hysop/tools/problem2dot.py | 2 +- hysop/tools/profiler.py | 2 +- 24 files changed, 465 insertions(+), 275 deletions(-) delete mode 100644 hysop/mpi/main_var.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e85aa59a..3b5fb6a5d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -348,6 +348,12 @@ if(EXISTS ${CMAKE_SOURCE_DIR}/hysop/__init__.py.in) configure_file(hysop/__init__.py.in ${CMAKE_SOURCE_DIR}/hysop/__init__.py) endif() +if(EXISTS ${CMAKE_SOURCE_DIR}/hysop/mpi/__init__.py.in) + message(STATUS "Generate mpi/__init__.py file ...") + file(REMOVE ${CMAKE_SOURCE_DIR}/hysop/mpi/__init__.py) + configure_file(hysop/mpi/__init__.py.in ${CMAKE_SOURCE_DIR}/hysop/mpi/__init__.py) +endif() + # Hysop C++ library is generated in setup.py by swig # --- C++ main and tests --- if(WITH_LIB_CXX) diff --git a/hysop/domain/domain.py b/hysop/domain/domain.py index 6868c724d..7ba343ab6 100644 --- a/hysop/domain/domain.py +++ b/hysop/domain/domain.py @@ -46,13 +46,11 @@ class Domain(object): Notes ----- - About boundary conditions - ========================== + * About boundary conditions: At the time, only periodic boundary conditions are implemented. Do not use this parameter (i.e. let default values) - About MPI Tasks - ================ + *About MPI Tasks proc_tasks[n] = 12 means that task 12 owns proc n. """ @@ -134,7 +132,8 @@ class Domain(object): return self._tasks_on_proc[main_rank] def create_topology(self, discretization, dim=None, mpi_params=None, - shape=None, cutdir=None): + shape=None, cutdir=None, isperiodic=None, + cartesian_topology=None): """Create or return an existing :class:`~hysop.mpi.topology.Cartesian`. Parameters @@ -152,6 +151,12 @@ class Domain(object): cutdir : list or array of bool set which directions must (may) be distributed, cutdir[dir] = True to distribute data along dir. + isperiodic : list or array of bool, optional + mpi grid periodicity + cartesian_topology : MPI.Cartcomm, optional + a predefined mpi cartesian topology. Use this + when you need the same communicator for two + different meshes/space distribution of data. Returns ------- @@ -161,6 +166,18 @@ class Domain(object): :class:`~hysop.mpi.topology.Cartesian`) or it creates a new topology and register it in the topology dict. + Notes: + ------ + * Almost all parameters above are optional. + Only one must be chosen among dim, cutdir and shape. + See :ref:`topologies` in the Hysop User Guide for details. + * when cartesian_topology is given, dim, shape and cutdir parameters, + if set, are not used to build the mpi topology, but compared with + cartesian_topology parameters. If they do not fit, error may occur. + * See hysop.domain.domain.Domain.create_plane_topology_from_mesh + details to build a plane topology from a given local discretization + (e.g. from fftw or scales precomputation). + """ # set task number tid = self.current_task() @@ -172,7 +189,8 @@ class Domain(object): assert mpi_params.task_id == tid, msg new_topo = Cartesian(self, discretization, dim=dim, mpi_params=mpi_params, shape=shape, - cutdir=cutdir) + cutdir=cutdir, isperiodic=isperiodic, + cartesian_topology=cartesian_topology) newid = new_topo.get_id() return self.topologies[newid] diff --git a/hysop/gpu/gpu_diffusion.py b/hysop/gpu/gpu_diffusion.py index 7496ea541..0f2084b76 100644 --- a/hysop/gpu/gpu_diffusion.py +++ b/hysop/gpu/gpu_diffusion.py @@ -13,7 +13,7 @@ from hysop.gpu.gpu_operator import GPUOperator from hysop.gpu.gpu_kernel import KernelLauncher from hysop.gpu.gpu_discrete import GPUDiscreteField from hysop.tools.profiler import FProfiler -from hysop.mpi.main_var import MPI +from hysop.mpi import Wtime class GPUDiffusion(DiscreteOperator, GPUOperator): @@ -185,7 +185,7 @@ class GPUDiffusion(DiscreteOperator, GPUOperator): def _compute_diffusion_comm(self, simulation): assert self.field_tmp is not None # Compute OpenCL transfer parameters - tc = MPI.Wtime() + tc = Wtime() topo = self.field.topology first_cut_dir = topo.cutdir.tolist().index(True) wait_evt = [] @@ -249,7 +249,7 @@ class GPUDiffusion(DiscreteOperator, GPUOperator): self._to_recv_buf[d], self._to_recv[d], is_blocking=False)) - self.profiler['comm_diffusion'] += MPI.Wtime() - tc + self.profiler['comm_diffusion'] += Wtime() - tc if len(self._cutdir_list) == 1: d_evt = self.num_diffusion( diff --git a/hysop/gpu/gpu_discrete.py b/hysop/gpu/gpu_discrete.py index 0dd5a8596..d51a5fa23 100644 --- a/hysop/gpu/gpu_discrete.py +++ b/hysop/gpu/gpu_discrete.py @@ -98,7 +98,7 @@ class GPUDiscreteField(DiscreteField): # By default, all mpi process are take, otherwise, user create and # gives his own topologies. if topology is None: - from hysop.mpi.main_var import main_rank + from hysop.mpi import main_rank self._rank = main_rank else: self._rank = topology.rank diff --git a/hysop/gpu/gpu_multiphase_baroclinic_rhs.py b/hysop/gpu/gpu_multiphase_baroclinic_rhs.py index b14706fbb..25500f61d 100644 --- a/hysop/gpu/gpu_multiphase_baroclinic_rhs.py +++ b/hysop/gpu/gpu_multiphase_baroclinic_rhs.py @@ -13,7 +13,7 @@ from hysop.gpu.gpu_operator import GPUOperator from hysop.gpu.gpu_kernel import KernelListLauncher from hysop.gpu.gpu_discrete import GPUDiscreteField from hysop.tools.profiler import FProfiler -from hysop.mpi.main_var import MPI +from hysop.mpi import Wtime from hysop.methods_keys import SpaceDiscretisation from hysop.numerics.finite_differences import FD_C_4 @@ -275,7 +275,7 @@ class BaroclinicRHS(DiscreteOperator, GPUOperator): def _compute_baroclinic_rhs_comm(self, simulation): """Compute operator with communications""" - tc = MPI.Wtime() + tc = Wtime() topo = self.rho.topology first_cut_dir = topo.cutdir.tolist().index(True) wait_evt = [] @@ -339,7 +339,7 @@ class BaroclinicRHS(DiscreteOperator, GPUOperator): self._to_recv_buf[d], self._to_recv[d], is_blocking=False)) - self.profiler['comm_baroclinic_rhs_comm'] += MPI.Wtime() - tc + self.profiler['comm_baroclinic_rhs_comm'] += Wtime() - tc self.rhs.events.append(self._call_kernel(wait_evt)) def _call_kernel_one_ghost(self, wait_evt): diff --git a/hysop/gpu/gpu_particle_advection.py b/hysop/gpu/gpu_particle_advection.py index 9a09a6767..3b065ee71 100644 --- a/hysop/gpu/gpu_particle_advection.py +++ b/hysop/gpu/gpu_particle_advection.py @@ -1,11 +1,8 @@ +"""Discrete advection for GPU """ -@file gpu_particle_advection.py - -Discrete advection representation -""" -from abc import ABCMeta, abstractmethod +from abc import abstractmethod from hysop import __VERBOSE__ -from hysop.constants import np, debug, S_DIR +from hysop.constants import np, debug, S_DIR, HYSOP_REAL from hysop.methods_keys import TimeIntegrator, Remesh, \ Support, Splitting, MultiScale from hysop.numerics.integrators.euler import Euler @@ -22,37 +19,34 @@ from hysop.numerics.update_ghosts import UpdateGhostsFull class GPUParticleAdvection(ParticleAdvection, GPUOperator): - """ - Particle advection operator representation on GPU. + """Particle advection solver on GPU """ __metaclass__ = ABCMeta @debug def __init__(self, **kwds): - """ - Create a Advection operator. - Work on a given field (scalar or vector) at a given velocity to compute - advected values. + """Particular advection of field(s) in a given direction, + on GPU. + OpenCL kernels are build once per dimension in order to handle directional splitting with resolution non uniform in directions. - @param velocity : Velocity field - @param fields_on_grid : Advected fields - @param d : Direction to advect - @param device_type : OpenCL device type (default = 'gpu'). - @param method : the method to use. {'m4prime', 'm6prime', 'm8prime', - 'l6star'} - @param user_src : User OpenCL sources. - @param splittingConfig : Directional splitting configuration - (hysop.numerics.splitting.Splitting.__init__) + Parameters + ---------- + kwds : base classes parameters + + Note + ---- + * warning : this operator is derived from ParticleAdvection AND + GPUOperator. kwds must then handle argument of both classes. """ # Set default method if unknown if 'method' not in kwds: kwds['method'] = default.ADVECTION kwds['method'][Support] = 'gpu_2k' - # init base class + # init base class (i.e. ParticleAdvection) super(GPUParticleAdvection, self).__init__(**kwds) self.fields_topo = self.fields_on_grid[0].topology self.velocity_topo = self.velocity.topology @@ -67,6 +61,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): # [ParticleAdvection, GPUOperator], i.e. ParticleAdvection.__init__) # the GPUOperator.__init__ must be explicitely called. # see http://stackoverflow.com/questions/3277367/how-does-pythons-super-work-with-multiple-inheritance + # init second base class (i.e GPUOperator) GPUOperator.__init__( self, platform_id=get_extra_args_from_method(self, 'platform_id', None), @@ -191,11 +186,11 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): if self.direction == 0: self._buffer_initialisations() - ## List of executions + # List of executions self.exec_list = None self._build_exec_list() - ## Particle initialisation OpenCL events for each field: + # Particle initialisation OpenCL events for each field: self._init_events = {self.fields_on_grid[0]: []} @abstractmethod @@ -208,6 +203,8 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): pass def _build_exec_list(self): + """Prepare GPU kernels sequence + """ # Build execution list regarding splitting: # Splitting Strang 2nd order: # 3D: X(dt/2), Y(dt/2), Z(dt), Y(dt/2), X(dt/2) @@ -253,7 +250,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): #[self._init_copy, self._init_copy_r], # X(dt) ] else: - raise ValueError('Not yet implemeted Splitting on GPU : ' + + raise ValueError('Splitting type not yet implemeted on GPU: ' + self.method[Splitting]) def globalMemoryUsagePreview(self, v_shape, shape): @@ -269,17 +266,16 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): pass def _buffer_allocations(self): + """Allocate OpenCL buffers for velocity and advected field. """ - Allocate OpenCL buffers for velocity and advected field. - """ - ## Velocity. + # Velocity. alloc = not isinstance(self.velocity, GPUDiscreteField) GPUDiscreteField.fromField(self.cl_env, self.velocity, self.gpu_precision, simple_layout=False) if alloc: self.size_global_alloc += self.velocity.mem_size - ## Transported field. + # Transported field. alloc = not isinstance(self.fields_on_grid[0], GPUDiscreteField) GPUDiscreteField.fromField(self.cl_env, self.fields_on_grid[0], @@ -288,7 +284,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): if alloc: self.size_global_alloc += self.fields_on_grid[0].mem_size - ## Fields on particles + # Fields on particles self.fields_on_part = {} start = 0 for f in self.fields_on_grid: @@ -324,10 +320,10 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): print gpudf.name, '-> OpenCL Kernel', k_name if gpudf == self.velocity: workItemNumber, gwi, lwi = \ - self.cl_env.get_WorkItems(self.v_resol_dir) + self.cl_env.get_work_items(self.v_resol_dir) else: workItemNumber, gwi, lwi = \ - self.cl_env.get_WorkItems(self.resol_dir) + self.cl_env.get_work_items(self.resol_dir) gpudf.setInitializationKernel(KernelLauncher( cl.Kernel(self.prg, k_name), self.cl_env.queue, gwi, lwi)) @@ -476,8 +472,8 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): """ if usr_src is not None: build_options = self.build_options + self._size_constants - workItemNb, gwi, lwi = self.cl_env.get_WorkItems(self.resol_dir) - v_workItemNb, gwi, lwi = self.cl_env.get_WorkItems(self.v_resol_dir) + workItemNb, gwi, lwi = self.cl_env.get_work_items(self.resol_dir) + v_workItemNb, gwi, lwi = self.cl_env.get_work_items(self.v_resol_dir) build_options += " -D WI_NB=" + str(workItemNb) build_options += " -D V_WI_NB=" + str(v_workItemNb) self.prg = self.cl_env.build_src(usr_src, build_options, 1) @@ -500,10 +496,10 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): build_options += " -D WI_NB=" + str(WINb) build_options += " -D PART_NB_PER_WI=" build_options += str(self.resol_dir[0] / WINb) - ## Build code + # Build code src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower()) for s in src] - ## Euler integrator + # Euler integrator if self.method[TimeIntegrator] is Euler: if not self._isMultiScale: src = [s for s in src if s.find(Euler.__name__.lower()) < 0] @@ -539,10 +535,10 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): # Build code src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower()) for s in src] - ## Adding remeshing weights for the multiscale advection + # Adding remeshing weights for the multiscale advection if self._isMultiScale: src.insert(1, self._kernel_cfg['remesh'][0][1]) - ## Euler integrator + # Euler integrator if self.method[TimeIntegrator] is Euler: if not self._isMultiScale: src = [s for s in src if s.find(Euler.__name__.lower()) < 0] @@ -569,7 +565,7 @@ class GPUParticleAdvection(ParticleAdvection, GPUOperator): build_options += " -D WI_NB=" + str(WINb) build_options += " -D PART_NB_PER_WI=" build_options += str(self.resol_dir[0] / WINb) - ## Build code + # Build code prg = self.cl_env.build_src( src, build_options, vec, nb_remesh_components=self.fields_on_grid[0].nb_components) diff --git a/hysop/gpu/multi_gpu_particle_advection.py b/hysop/gpu/multi_gpu_particle_advection.py index 6febad7ff..41023d6b1 100644 --- a/hysop/gpu/multi_gpu_particle_advection.py +++ b/hysop/gpu/multi_gpu_particle_advection.py @@ -14,7 +14,7 @@ from hysop.numerics.remeshing import Linear as Linear_rmsh from hysop.gpu.gpu_kernel import KernelLauncher from hysop.tools.profiler import FProfiler, profile from hysop.gpu import cl, CL_PROFILE -from hysop.mpi.main_var import MPI +from hysop.mpi import Wtime import hysop.tools.numpywrappers as npw @@ -213,7 +213,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): ## Python remeshing formula for the multiscale interpolation self._py_ms_formula = self.method[MultiScale] if self._isMultiScale: - if not self._py_ms_formula is Linear_rmsh: + if self._py_ms_formula is not Linear_rmsh: raise ValueError('Not yet implemented' + str(self.method[MultiScale])) ## Python remeshing formula @@ -608,7 +608,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): _evt_r_v.wait() def _exchange_velocity_buffers(self, dt): - ctime = MPI.Wtime() + ctime = Wtime() ghosts = self.velocity_topo.ghosts()[self.direction] if self.direction == 0: max_velo_p = np.max(self.velocity.data[0][-ghosts - 1:, :, :]) @@ -621,9 +621,9 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): max_velo_m = np.max(self.velocity.data[2][:, :, :ghosts + 1]) self._recompute_scal_buffers(max_velo_m, max_velo_p, dt) self._get_velocity_buffers(ghosts) - self.profiler['comm_cpu_advec_get'] += MPI.Wtime() - ctime + self.profiler['comm_cpu_advec_get'] += Wtime() - ctime - ctime = MPI.Wtime() + ctime = Wtime() for b in xrange(self._v_n_blocks): self._l_recv_v[b] = self._comm.Irecv( [self._v_l_buff_flat[self._v_block_slice[b]], @@ -646,7 +646,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): for b in xrange(self._v_n_blocks): self._l_recv_v[b].Wait() self._r_recv_v[b].Wait() - self.profiler['comm_cpu_advec'] += MPI.Wtime() - ctime + self.profiler['comm_cpu_advec'] += Wtime() - ctime def _todevice_velocity_buffers(self): for b in xrange(self._v_n_blocks): @@ -870,14 +870,14 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): wait_for=[evt_comm_l]) # Send the left buffer - ctime = MPI.Wtime() + ctime = Wtime() for b in xrange(self._s_n_blocks_to_l): self._evt_get_l[b].wait() self._l_send[b] = self._comm.Issend( [self._s_l_buff[self._s_block_slice_to_l[b]], self._s_elem_block_to_l, HYSOP_MPI_REAL], dest=self._L_rk, tag=333 + self._comm_rank + 17 * b) - ctime_send_l = MPI.Wtime() - ctime + ctime_send_l = Wtime() - ctime # Fill and get the right buffer evt_comm_r = self._num_comm_r(wait_evts, dt) @@ -895,14 +895,14 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): is_blocking=False, wait_for=[evt_comm_r]) # Send the right buffer - ctime = MPI.Wtime() + ctime = Wtime() for b in xrange(self._s_n_blocks_to_r): self._evt_get_r[b].wait() self._r_send[b] = self._comm.Issend( [self._s_r_buff[self._s_block_slice_to_r[b]], self._s_elem_block_to_r, HYSOP_MPI_REAL], dest=self._R_rk, tag=888 + self._comm_rank + 19 * b) - ctime_send_r = MPI.Wtime() - ctime + ctime_send_r = Wtime() - ctime # remesh in-domain particles and get left-right layer evt = self._num_comm(wait_evts, dt) @@ -929,7 +929,7 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): is_blocking=False, wait_for=[evt]) - ctime = MPI.Wtime() + ctime = Wtime() # Wait MPI transfer of data from left, add them to local # data and send back to device for b in xrange(self._s_n_blocks_to_r): @@ -937,11 +937,11 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): for b in xrange(self._s_n_blocks_from_l): self._l_recv[b].Wait() evt_get_locl.wait() - ctime_wait_l = MPI.Wtime() - ctime + ctime_wait_l = Wtime() - ctime - calctime = MPI.Wtime() + calctime = Wtime() self._s_locl_buff += self._s_froml_buff - self.profiler['comm_calc_remesh'] += MPI.Wtime() - calctime + self.profiler['comm_calc_remesh'] += Wtime() - calctime evt_set_locl = cl.enqueue_copy( self.cl_env.queue, self.fields_on_grid[0].gpu_data[0], @@ -955,16 +955,16 @@ class MultiGPUParticleAdvection(GPUParticleAdvection): # Wait MPI transfer of data from right, add them to local # data and send back to device - ctime = MPI.Wtime() + ctime = Wtime() for b in xrange(self._s_n_blocks_to_l): self._l_send[b].Wait() for b in xrange(self._s_n_blocks_from_r): self._r_recv[b].Wait() evt_get_locr.wait() - ctime_wait_r = MPI.Wtime() - ctime - calctime = MPI.Wtime() + ctime_wait_r = Wtime() - ctime + calctime = Wtime() self._s_locr_buff += self._s_fromr_buff - self.profiler['comm_calc_remesh'] += MPI.Wtime() - calctime + self.profiler['comm_calc_remesh'] += Wtime() - calctime evt_set_locr = cl.enqueue_copy( self.cl_env.queue, self.fields_on_grid[0].gpu_data[0], diff --git a/hysop/gpu/tests/test_gpu_multiresolution_filter.py b/hysop/gpu/tests/test_gpu_multiresolution_filter.py index cf1ffe130..814d2c85b 100755 --- a/hysop/gpu/tests/test_gpu_multiresolution_filter.py +++ b/hysop/gpu/tests/test_gpu_multiresolution_filter.py @@ -8,7 +8,7 @@ import numpy as np from hysop.methods_keys import Remesh, Support, ExtraArgs from hysop.methods import Rmsh_Linear, L2_1 # In parallel we need to use as many threads as gpu -from hysop.mpi.main_var import main_size, main_rank +from hysop.mpi import main_size, main_rank import pyopencl as cl n_gpu = len(cl.get_platforms()[0].get_devices( device_type=cl.device_type.GPU)) @@ -16,7 +16,6 @@ PROC_TASKS = [0, ] * main_size if main_rank < n_gpu: PROC_TASKS[main_rank] = 1 - L = [1., 1., 1.] O = [0., 0., 0.] simu = Simulation(start=0., end=0.1, nb_iter=1) diff --git a/hysop/gpu/tools.py b/hysop/gpu/tools.py index da279766e..d0ac14247 100644 --- a/hysop/gpu/tools.py +++ b/hysop/gpu/tools.py @@ -1,17 +1,15 @@ -""" -@file tools.py +"""Misc. tools for gpu/OpenCL management. -Tools for gpu management. """ from hysop import __VERBOSE__, __DEFAULT_PLATFORM_ID__, __DEFAULT_DEVICE_ID__ -from hysop.constants import np, HYSOP_REAL, ORDER -from hysop.gpu import cl, clTools, GPU_SRC, CL_PROFILE -import hysop.tools.numpywrappers as npw +from hysop.constants import np, HYSOP_REAL +from hysop.gpu import cl, GPU_SRC, CL_PROFILE import re -import mpi4py.MPI as mpi +from hysop.mpi import MPI FLOAT_GPU, DOUBLE_GPU = np.float32, np.float64 -## Global variable handling an OpenCL Environment instance + +# Global variable handling the OpenCL Environment instance __cl_env = None @@ -21,32 +19,45 @@ class OpenCLEnvironment(object): def __init__(self, platform_id, device_id, device_type, precision, gl_sharing=False, comm=None): - """Create environment. - @param platform_id : OpenCL platform id - @param device_id : OpenCL device id - @param device_type : OpenCL device type - @param precision : Recquired precision - @param resolution : Global resolution - @param gl_sharing : Flag to build a OpenGL shared OpenCL context + """Initialize an OpenCL environment + + Parameters + ---------- + platform_id : int + chosen platform id. + device_id : int + chosen device id. + device_type : string + chosen device type. + precision : int + required precision for real data. + Must be FLOAT_GPU or DOUBLE_GPU. + gl_sharing : bool, optional + True to build a context shared between OpenGL and OpenCL. + Default=False. + comm : mpi communicator, optional + Communicator which handles the OpenCL env. + Default = hysop.mpi.main_comm + """ self._platform_id = platform_id self._device_id = device_id self._device_type = device_type self._gl_sharing = gl_sharing - ## OpenCL platform + # OpenCL platform self.platform = self._get_platform(platform_id) - ## OpenCL device + # OpenCL device self.device = self._get_device(self.platform, device_id, device_type) - ## Device available memory + # Device available memory self.available_mem = self.device.global_mem_size - ## OpenCL context + # OpenCL context self.ctx = self._get_context(self.device, gl_sharing) - ## OpenCL queue + # OpenCL queue self.queue = self._get_queue(self.ctx) - ## MPI sub-communicator for all processes attached to the same device + # MPI sub-communicator for all processes attached to the same device if comm is None: - from hysop.mpi.main_var import main_comm + from hysop.mpi import main_comm else: main_comm = comm # Splitting the mpi communicator by the device id is not enough: @@ -55,44 +66,53 @@ class OpenCLEnvironment(object): import hashlib # The md5 sum of the proc name is tuncated to obtain an integer # for fortran (32bit) - hash_name = hashlib.md5(mpi.Get_processor_name()).hexdigest()[-7:] + hash_name = hashlib.md5(MPI.Get_processor_name()).hexdigest()[-7:] self.gpu_comm = main_comm.Split( color=int(hash_name, 16) + device_id, key=main_comm.Get_rank()) - ## Floating point precision + # Floating point precision self.precision = precision if self.precision is FLOAT_GPU: self.prec_size = 4 elif self.precision is DOUBLE_GPU: self.prec_size = 8 + self.macros = {} self.default_build_opts = "" if CL_PROFILE and self.device.vendor.find('NVIDIA') >= 0: self.default_build_opts += " -cl-nv-verbose" self.default_build_opts += " -Werror" + self._get_precision_opts() - ## Kernels configuration dictionary + # Kernels configuration dictionary if self.device.name == "Cayman": - from config_cayman import kernels_config as kernel_cfg + from hysop.gpu.config_cayman import kernels_config as kernel_cfg elif self.device.name == "Tesla K20m" or \ self.device.name == "Tesla K20Xm": - from config_k20m import kernels_config as kernel_cfg + from hysop.gpu.config_k20m import kernels_config as kernel_cfg else: print "/!\\ Get a defautl kernels config for", self.device.name - from config_default import kernels_config as kernel_cfg + from hysop.gpu.config_default import kernels_config as kernel_cfg self.kernels_config = kernel_cfg self._locMem_Buffers = {} def modify(self, platform_id, device_id, device_type, precision, gl_sharing=False): - """ - Modify OpenCL environment parameters. - @param platform_id : OpenCL platform id - @param device_id : OpenCL device id - @param device_type : OpenCL device type - @param precision : Recquired precision - @param resolution : Global resolution - @param gl_sharing : Flag to build a OpenGL shared OpenCL context + """Modify OpenCL environment parameters. + + Parameters + ---------- + platform_id : int + chosen platform id. + device_id : int + chosen device id. + device_type : string + chosen device type. + precision : int + required precision for real data. + Must be FLOAT_GPU or DOUBLE_GPU. + gl_sharing : bool, optional + True to build a context shared between OpenGL and OpenCL. + Default=False. """ platform_changed, device_changed = False, False if not platform_id == self._platform_id: @@ -110,14 +130,14 @@ class OpenCLEnvironment(object): self.available_mem = self.device.global_mem_size device_changed = True if platform_changed or device_changed or \ - (not self._gl_sharing and not gl_sharing is self._gl_sharing): + (not self._gl_sharing and gl_sharing is not self._gl_sharing): if self._gl_sharing and not gl_sharing: print ("Warning: Loosing Gl shared context.") self._gl_sharing = gl_sharing self.ctx = self._get_context(self.device, gl_sharing) self.queue = self._get_queue(self.ctx) - if not self.precision is precision and not precision is None: - if not self.precision is None: + if self.precision is not precision and precision is not None: + if self.precision is not None: print ("Warning, GPU precision is overrided from",) print (self.precision, 'to', precision) self.precision = precision @@ -127,10 +147,9 @@ class OpenCLEnvironment(object): self.default_build_opts += "-Werror" + self._get_precision_opts() def _get_platform(self, platform_id): - """ - Get an OpenCL platform. - @param platform_id : OpenCL platform id - @return OpenCL platform + """Returns an OpenCL platform + :param platform_id : OpenCL platform id + """ try: # OpenCL platform @@ -147,12 +166,16 @@ class OpenCLEnvironment(object): return platform def _get_device(self, platform, device_id, device_type): - """ - Get an OpenCL device. - @param platform : OpenCL platform - @param device_id : OpenCL device id - @param device_type : OpenCL device type - @return OpenCL device + """Returns an OpenCL device + + Parameters + ---------- + platform_id : int + chosen platform id. + device_id : int + chosen device id. + device_type : string + chosen device type. Try to use given parameters and in case of fails, use pyopencl context creation function. @@ -199,11 +222,15 @@ class OpenCLEnvironment(object): return device def _get_context(self, device, gl_sharing): - """ - Get an OpenCL context. - @param device : OpenCL device - @param gl_sharing : Flag to build a OpenGL shared OpenCL context - @return OpenCL context + """Returns OpenCL context + + Parameters + ---------- + device : OpenCL device + which handles the context. + gl_sharing : bool + True to build a context shared between OpenGL and OpenCL. + Default=False. """ props = None @@ -223,15 +250,15 @@ class OpenCLEnvironment(object): ctx = cl.Context([device]) if __VERBOSE__: print " Context:" - if not props is None: + if props is not None: print " - properties :", props return ctx def _get_queue(self, ctx): - """ - Get OpenCL queue from context - @param ctx : OpenCL context - @return OpenCL queue + """Returns OpenCL queue from context + + :param ctx : OpenCL context + """ props = None if CL_PROFILE: @@ -241,20 +268,31 @@ class OpenCLEnvironment(object): queue = cl.CommandQueue(ctx) if __VERBOSE__: print " Queue" - if not props is None: + if props is not None: print " - properties :", props print "===" return queue def create_other_queue(self): + """Create OpenCL queue from current context + """ return self._get_queue(self.ctx) - def get_WorkItems(self, resolution, vector_width=1): - """ - Set the optimal work-item number and OpenCL space index. - @param resolution : Problem resolution - @param vector_width : OpenCL vector types width - @return work-item number, global space index and local space index + def get_work_items(self, resolution, vector_width=1): + """Set the optimal work-item number and OpenCL space index. + + Parameters + ---------- + resolution : tuple + local mesh resolution + vector_width : int + OpenCL vector types width + + Returns + ------- + int : work-item number + tuple : global space index + tuple : local space index Use 64 work-items in 3D and 256 in 2D. \todo Use Both the number from device capability @@ -297,7 +335,7 @@ class OpenCLEnvironment(object): def _get_precision_opts(self): """Check if device is capable regarding given precision - @return build options regarding precision recquired + and returns build options regarding the required precision. """ opts = "" # Precision supported @@ -336,14 +374,21 @@ class OpenCLEnvironment(object): return opts def build_src(self, files, options="", vector_width=4, - nb_remesh_components=1, macros=None): - """ - Build OpenCL sources - @param files: Source files - @param options : Compiler options to use for buildind - @param vector_width : OpenCL vector type width - @param nb_remesh_components : number of remeshed components - @return OpenCL binaries + nb_remesh_components=1): + """Build OpenCL sources + + Parameters + ---------- + files : file or list of files + sources + options : string + Compiler options to use for buildind + vector_width : int + OpenCL vector type width + nb_remesh_components : int + number of remeshed components + + Returns OpenCL binaries Parse the sources to handle single and double precision. """ @@ -371,9 +416,8 @@ class OpenCLEnvironment(object): self.parse_file(f, vector_width, nb_remesh_components)) f.close() #print gpu_src - if macros is not None: - for k in macros: - gpu_src = gpu_src.replace(k, str(macros[k])) + for k in self.macros: + gpu_src = gpu_src.replace(k, str(self.macros[k])) if self.precision is FLOAT_GPU: # Rexexp to add 'f' suffix to float constants # Match 1.2, 1.234, 1.2e3, 1.2E-05 @@ -413,12 +457,19 @@ class OpenCLEnvironment(object): return build def parse_file(self, f, n=8, nb_remesh_components=1): - """ - Parse a file containing OpenCL sources. - @param f : source file - @param n : vector width - @param nb_remesh_components : number of remeshed components - @return parsed sources as string. + """Parse a file containing OpenCL sources. + + Parameters + ---------- + f : file + n : int, optional + vector width, default=8 + nb_remesh_components : int + number of remeshed components + + Returns + ------- + string, the parsed sources. - <code>__N__</code> is expanded as an integer corresponding to a vector with @@ -569,14 +620,27 @@ def get_opengl_shared_environment(platform_id=None, device_id=None, device_type=None, precision=HYSOP_REAL, comm=None): - """ - Get an OpenCL environment with OpenGL shared enable. - - @param platform_id :OpenCL platform id - @param device_id : OpenCL device id - @param device_type : OpenCL device type - @param precision : Required precision - @return OpenCL platform, device, context and queue + """Build or get an OpenCL environment. + + Parameters + ---------- + platform_id : int + chosen platform id. + device_id : int + chosen device id. + device_type : string + chosen device type. + precision : int, optional + required precision for real data. + Default : HYSOP_REAL + comm : mpi communicator, optional + Communicator which handles the OpenCL env. + Default = hysop.mpi.main_comm + + Returns + ------- + :class:`~hysop.gpu.tools.OpenCLEnvironment` + handling OpenCL platform, device, context and queue The context is obtained with gl-shared properties depending on the OS. """ @@ -598,14 +662,27 @@ def get_opencl_environment(platform_id=None, device_id=None, device_type=None, precision=HYSOP_REAL, comm=None): - """ - Get an OpenCL environment. - - @param platform_id :OpenCL platform id - @param device_id : OpenCL device id - @param device_type : OpenCL device type - @param precision : Required precision - @return OpenCL platform, device, context and queue + """Build or get an OpenCL environment. + + Parameters + ---------- + platform_id : int + chosen platform id. + device_id : int + chosen device id. + device_type : string + chosen device type. + precision : int, optional + required precision for real data. + Default : HYSOP_REAL + comm : mpi communicator, optional + Communicator which handles the OpenCL env. + Default = hysop.mpi.main_comm + + Returns + ------- + :class:`~hysop.gpu.tools.OpenCLEnvironment` + handling OpenCL platform, device, context and queue """ if platform_id is None: diff --git a/hysop/mpi/__init__.py b/hysop/mpi/__init__.py index c10d709c6..f4f4d7a38 100644 --- a/hysop/mpi/__init__.py +++ b/hysop/mpi/__init__.py @@ -11,29 +11,30 @@ in order to make any change of this interface, if required, easiest. At the time we use mpi4py : http://mpi4py.scipy.org - """ -# Everything concerning the chosen mpi implementation is hidden in main_var +# Everything concerning the chosen mpi implementation is hidden from hysop +# main interface. # Why? --> to avoid that things like mpi4py. ... spread everywhere in the # soft so to ease a change of this implementation (if needed). -from hysop.mpi import main_var - -MPI = main_var.MPI -"""MPI underlying implementation -""" +from mpi4py import MPI as MPI +"""MPI interface""" -main_comm = main_var.main_comm -"""Main communicator (copy of comm_world) -""" +main_comm = MPI.COMM_WORLD.Dup() +"""Main communicator""" -main_rank = main_var.main_rank -"""Rank of the current process in the main communicator -""" +main_rank = main_comm.Get_rank() +"""Rank of the current process in main communicator""" -main_size = main_var.main_size -"""Size of the main communicator -""" +main_size = main_comm.Get_size() +"""Number of mpi process in main communicator""" Wtime = MPI.Wtime -"""Timer for MPI calls""" +"""Function to return elapsed time since some time in the past. + +Usage: +tref = Wtime() +# proceed with some computations ... +elapsed = Wtime() - tref +# -> elapsed == time for 'some computations' on the current mpi process +""" diff --git a/hysop/mpi/main_var.py b/hysop/mpi/main_var.py deleted file mode 100644 index 0840b4d5e..000000000 --- a/hysop/mpi/main_var.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -@file main_var.py - -Global parameters related to mpi, for hysop package. - -""" - -# Set the underlying implementation of MPI -# mpi4py must be hidden from user -import mpi4py.MPI -MPI = mpi4py.MPI - -# Create hysop main communicator from COMM_WORLD -main_comm = MPI.COMM_WORLD.Dup() -main_rank = main_comm.Get_rank() -main_size = main_comm.Get_size() diff --git a/hysop/mpi/tests/test_bridge.py b/hysop/mpi/tests/test_bridge.py index ac28390f7..5bfe802d8 100755 --- a/hysop/mpi/tests/test_bridge.py +++ b/hysop/mpi/tests/test_bridge.py @@ -1,6 +1,7 @@ from hysop.domain.box import Box from hysop.tools.parameters import Discretization from hysop.mpi.bridge import Bridge +from hysop.mpi import main_size, main_comm from hysop.mpi.bridge_overlap import BridgeOverlap from hysop.mpi.bridge_inter import BridgeInter import math @@ -42,10 +43,6 @@ def test_bridge3D(): print bridge -from hysop.mpi.main_var import main_size, main_comm -from hysop.mpi.tests.utils import create_subtopos, create_inter_topos - - def test_bridge_overlap(): """ Try the pathologic case where source and target do not apply on diff --git a/hysop/mpi/tests/utils.py b/hysop/mpi/tests/utils.py index 94b56d619..6de9749e4 100644 --- a/hysop/mpi/tests/utils.py +++ b/hysop/mpi/tests/utils.py @@ -1,8 +1,7 @@ -""" -Functions used through mpi-related tests. +"""Functions used in mpi-related tests. """ -from hysop.mpi.main_var import main_comm, main_rank, main_size +from hysop.mpi import main_comm, main_rank, main_size from hysop.tools.parameters import MPIParams import hysop as pp GPU = 4 @@ -11,10 +10,30 @@ OTHER = 12 def create_multitask_context(dim, discr): - # MPI procs are distributed among three tasks - # Two tasks: - # proc 0 and last proc work on 'GPU' task. - # The others on 'CPU' task. + """Create a domain and a topology in a multi-task context + + Parameters + ---------- + dim : int + domain dimension + discr : :class:`~hysop.tools.parameters.Discretization + space discretization + + Returns + ------- + dom : :class:`~hysop.domain.box.Box` + a domain (box-shaped) defined on three differents tasks + topo : :class:`~hysop.mpi.topology.Cartesian` + topology associated to the domain + + Notes + ----- + + This function has sense only if the number of mpi processe + is is greater than or equal to 3. In that case, 3 tasks are defined: + proc 0 and last proc handle 'GPU' task, proc 1 'OTHER' task and all other + procs, 'CPU' task. + """ proc_tasks = [CPU, ] * main_size if main_size > 2: proc_tasks[-1] = GPU @@ -30,6 +49,22 @@ def create_multitask_context(dim, discr): def create_subtopos(domain, discr_source, discr_target): + """Split main mpi-communicator into two topologies. + + Parameters + ---------- + dom : :class:`~hysop.domain.box.Box` + a domain (box-shaped) + discr_source, discr_target : :class:`~hysop.tools.parameters.Discretization + space discretizations for each sub-topo + + + Returns + ------- + source_topo, target_topo: class:`~hysop.mpi.topology.Cartesian` + topologies associated with rank 0 and last rank for 'target' + and other ranks for 'source'. + """ # split main comm into two groups rk_source = [i for i in xrange(main_size)] rk_target = list(rk_source) @@ -57,7 +92,22 @@ def create_subtopos(domain, discr_source, discr_target): def create_nonoverlap_topos(domain, discr_source, discr_target): - # split main comm into two groups + """Split main mpi-communicator into two topologies. + + Parameters + ---------- + dom : :class:`~hysop.domain.box.Box` + a domain (box-shaped) + discr_source, discr_target : :class:`~hysop.tools.parameters.Discretization + space discretizations for each sub-topo + + + Returns + ------- + source_topo, target_topo: class:`~hysop.mpi.topology.Cartesian` + topologies associated with even ranks for 'source' + and odd ranks for 'target'. + """ rk_source = [i for i in xrange(main_size) if i % 2 == 0] rk_target = [i for i in xrange(main_size) if i % 2 != 0] g_source = main_comm.group.Incl(rk_source) @@ -82,10 +132,30 @@ def create_nonoverlap_topos(domain, discr_source, discr_target): def create_inter_topos(dim, discr1, discr2): - # MPI procs are distributed among two tasks + """Create a domain and two topologies in a multi-task context + + Parameters + ---------- + dim : int + domain dimension + discr1, discr2 : :class:`~hysop.tools.parameters.Discretization + space discretizations for topo1 and topo2 + + Returns + ------- + dom : :class:`~hysop.domain.box.Box` + a domain (box-shaped) defined on three differents tasks + topo1, topo2 : :class:`~hysop.mpi.topology.Cartesian` + topologies associated to the domain + + Notes + ----- - # proc 0 and last proc work on 'GPU' task. - # The others on 'CPU' task. + This function has sense only if the number of mpi processe + is is greater than or equal to 3. In that case, 2 tasks are defined: + proc 0 and last proc handle 'GPU' task and all other + procs, 'CPU' task. + """ proc_tasks = [CPU, ] * main_size if main_size > 2: proc_tasks[-1] = GPU diff --git a/hysop/mpi/topology.py b/hysop/mpi/topology.py index 11a8057b1..f21282fdc 100644 --- a/hysop/mpi/topology.py +++ b/hysop/mpi/topology.py @@ -9,7 +9,7 @@ from hysop.constants import debug, ORDER, PERIODIC from hysop.domain.mesh import Mesh from itertools import count -from hysop.mpi.main_var import MPI +from hysop.mpi import MPI from hysop.tools.parameters import Discretization, MPIParams import numpy as np import hysop.tools.numpywrappers as npw @@ -43,18 +43,19 @@ class Cartesian(object): @debug def __new__(cls, *args, **kw): return object.__new__(cls, *args, **kw) -# + # # Counter of topology.Cartesian instances to set a unique id for each # Cartesian topology instance. __ids = count(0) @debug def __init__(self, domain, discretization, dim=None, mpi_params=None, - isperiodic=None, cutdir=None, shape=None, mesh=None): + isperiodic=None, cutdir=None, shape=None, mesh=None, + cartesian_topology=None): """ Parameters - ----------- + ---------- domain : :class:`~hysop.domain.domain.Domain` the geometry on which the topology is defined. discretization : :class:`~hysop.tools.parameters.Discretization` @@ -65,7 +66,7 @@ class Cartesian(object): mpi_params : :class:`~hysop.tools.parameters.MPIParams`, optional mpi setup (comm, task ...). If None, comm = main_comm, task = DEFAULT_TASK_ID. - isperiodic : list or array of bool, optional + isperiodic : tuple, list or array of bool, optional mpi grid periodicity cutdir : list or array of bool set which directions must (may) be distributed, @@ -74,22 +75,28 @@ class Cartesian(object): mpi grid layout mesh : :class:`~hysop.domain.mesh.Mesh`, optional a predifined mesh (it includes local and global grids.) + cartesian_topology : MPI.Cartcomm, optional + a predefined mpi cartesian topology. Use this + when you need the same communicator for two + different meshes/space distribution of data. Notes: ------ - Almost all parameters above are optional. - Only one must be chosen among dim, cutdir and shape. - See :ref:`topologies` in the Hysop User Guide for details. - - See hysop.mpi.topology.Cartesian.plane_precomputed - details to build a plane topology from a given local discretization - (e.g. from fftw or scales precomputation). + * Almost all parameters above are optional. + Only one must be chosen among dim, cutdir and shape. + See :ref:`topologies` in the Hysop User Guide for details. + * when cartesian_topology is given, dim, shape and cutdir parameters, + if set, are not used to build the mpi topology, but compared with + cartesian_topology parameters. If they do not fit, error may occur. + * See hysop.mpi.topology.Cartesian.plane_precomputed + details to build a plane topology from a given local discretization + (e.g. from fftw or scales precomputation). """ # ===== 1 - Required parameters : domain and mpi (comm, task) ==== # An id for the topology self.__id = self.__ids.next() - ## Associated domain + # Associated domain self.domain = domain # - MPI topo params : if mpi_params is None: @@ -121,7 +128,11 @@ class Cartesian(object): # mpi communicator (cartesian topo) self.comm = None - self._build_mpi_topo(shape, cutdir, dim, isperiodic) + if cartesian_topology is None: + self._build_mpi_topo(shape, cutdir, dim, isperiodic) + else: + self._set_with_external_topo(cartesian_topology, shape, + cutdir, dim, isperiodic) # ===== 3 - Get features of the mpi processes grid ===== # Size of the topology (i.e. total number of mpi processes) @@ -136,8 +147,9 @@ class Cartesian(object): # --> proc_coords has values even for directions that # are not distributed. If cutdir = [False, False, True] # then reduced_coords = [ nx ] and proc_coords = [0, 0, nx] - self.proc_coords = npw.dim_zeros(self.domain.dimension) - self.proc_coords[self.cutdir] = reduced_coords + # self.proc_coords = npw.dim_zeros(self.domain.dimension) + # self.proc_coords[self.cutdir] = reduced_coords + self.proc_coords = reduced_coords # Neighbours : self.neighbours[0,i] (resp. [1,i]) # previous (resp. next) neighbour in direction i # (warning : direction in the grid of process). @@ -178,10 +190,49 @@ class Cartesian(object): """ default builder See :ref:`topologies` for details. + """ + self._check_topo_parameters(shape, cutdir, dim, isperiodic) + + # From this point, the following parameters must be properly set: + # - self.shape + # - self.cutdir + # - self._isperiodic + # Then, we can create the mpi topology. + #self.comm = \ + self._isperiodic = tuple([True, ] * self.domain.dimension) + self.comm = self._mpis.comm.Create_cart(self.shape, + periods=self._isperiodic, + reorder=True) + + def _set_with_external_topo(self, cartesian_topology, shape=None, + cutdir=None, dim=None, isperiodic=None): + """Use a pre-defined mpi cartesian topology to set self.comm. + Check compatibility whith other (optional) parameters. + """ + msg = 'Wrong type for input communicator.' + assert isinstance(cartesian_topology, MPI.Cartcomm), msg + self._check_topo_parameters(shape, cutdir, dim, isperiodic) + msg = 'The given cartesian topology does not fit with the others' + msg += ' input parameters.' + assert cartesian_topology.size == np.prod(self.shape), msg + assert cartesian_topology.dim == self.dimension, msg + assert (self.shape == + npw.asdimarray(cartesian_topology.dims)).all(), msg + assert self._isperiodic == cartesian_topology.periods, msg + self.comm = cartesian_topology + + def _check_topo_parameters(self, shape=None, cutdir=None, + dim=None, isperiodic=None): + """Check compatibility between input parameters provided + to build the mpi topology + + After this call, the following attributes must be properly set: + # - self.shape + # - self.cutdir + # - self._isperiodic """ # number of process in parent comm origin_size = self._mpis.comm.Get_size() - if shape is not None: # method (a) msg = ' parameter is useless when shape is provided.' @@ -198,7 +249,7 @@ class Cartesian(object): msg = ' parameter is useless when cutdir is provided.' assert shape is None, 'shape ' + msg assert dim is None, 'dim ' + msg - self.cutdir = npw.asboolarray(cutdir) + self.cutdir = npw.asboolarray(cutdir).copy() self.dimension = self.cutdir[self.cutdir].size shape = npw.asdimarray(MPI.Compute_dims(origin_size, self.dimension)) @@ -237,11 +288,11 @@ class Cartesian(object): # MPI processes grid periodicity. Default is true. if isperiodic is None: - self._isperiodic = npw.ones((self.domain.dimension), - dtype=npw.bool) + self._isperiodic = tuple([True, ] * self.domain.dimension) + else: - self._isperiodic = npw.asboolarray(isperiodic) - assert isperiodic.size == self.domain.dimension + assert len(isperiodic) == self.domain.dimension + self._isperiodic = tuple(isperiodic) # compute real dim ... self.dimension = self.shape[self.cutdir].size @@ -251,16 +302,6 @@ class Cartesian(object): self.dimension = 1 self.cutdir[-1] = True - # From this point, the following parameters must be properly set: - # - self.shape - # - self.cutdir - # - self._isperiodic - # Then, we can create the mpi topology. - self.comm = \ - self._mpis.comm.Create_cart(self.shape[self.cutdir], - periods=self._isperiodic[self.cutdir], - reorder=True) - @staticmethod def _optimizeshape(shape): """Reorder 'shape' according to the chosen @@ -452,10 +493,15 @@ class Cartesian(object): @staticmethod def reset_counter(): + """Set global counter of cartesian topologies to 0. + """ Cartesian.__ids = count(0) class TopoTools(object): + """Static class providing tools to handle set of indices, message tags, + and so on. + """ @staticmethod def gather_global_indices(topo, toslice=True, root=None, comm=None): diff --git a/hysop/numerics/remeshing.py b/hysop/numerics/remeshing.py index 4cdc0ea7b..5492bc3e2 100644 --- a/hysop/numerics/remeshing.py +++ b/hysop/numerics/remeshing.py @@ -391,7 +391,7 @@ def polynomial_optimisation(): tt += (MPI.Wtime() - t) print tt, s - from hysop.mpi.main_var import MPI + from hysop.mpi import MPI nb = 128 a = npw.asrealarray(np.random.random((nb, nb, nb))) r = np.zeros_like(a) diff --git a/hysop/operator/computational.py b/hysop/operator/computational.py index 6af67b9d2..ee6db6c9f 100755 --- a/hysop/operator/computational.py +++ b/hysop/operator/computational.py @@ -327,7 +327,7 @@ class Computational(Operator): self.discrete_op.computation_time() self.time_info = self.discrete_op.time_info else: - from hysop.mpi.main_var import main_rank + from hysop.mpi import main_rank shortName = str(self.__class__).rpartition('.')[-1][0:-2] s = '[' + str(main_rank) + '] ' + shortName s += " : operator not discretized --> no computation, time = 0." diff --git a/hysop/operator/curlAndDiffusion.py b/hysop/operator/curlAndDiffusion.py index b171bd647..8b86d2bff 100644 --- a/hysop/operator/curlAndDiffusion.py +++ b/hysop/operator/curlAndDiffusion.py @@ -50,7 +50,7 @@ class CurlDiffusion(Operator): Create a discrete Diffusion operator from given specifications. """ if self._comm is None: - from hysop.mpi.main_var import main_comm as comm + from hysop.mpi import main_comm as comm else: comm = self._comm diff --git a/hysop/operator/tests/test_adaptive_time_step.py b/hysop/operator/tests/test_adaptive_time_step.py index 81e1016e2..54284a67b 100755 --- a/hysop/operator/tests/test_adaptive_time_step.py +++ b/hysop/operator/tests/test_adaptive_time_step.py @@ -3,7 +3,7 @@ import hysop as pp from hysop.operator.adapt_timestep import AdaptTimeStep from hysop.problem.simulation import Simulation from hysop.tools.parameters import Discretization -from hysop.mpi import main_comm +from hysop.mpi import main_comm, main_size import numpy as np import hysop.tools.numpywrappers as npw import os @@ -98,7 +98,6 @@ def test_adapt_4(): GPU = 4 CPU = 1 VISU = 12 - from hysop.mpi.main_var import main_size proc_tasks = [CPU, ] * main_size if main_size > 4: diff --git a/hysop/operator/tests/test_multiresolution_filter.py b/hysop/operator/tests/test_multiresolution_filter.py index 04c5b0514..198d8ce4a 100755 --- a/hysop/operator/tests/test_multiresolution_filter.py +++ b/hysop/operator/tests/test_multiresolution_filter.py @@ -7,8 +7,7 @@ import hysop.tools.numpywrappers as npw import numpy as np from hysop.methods_keys import Remesh from hysop.methods import Rmsh_Linear, L2_1 -from hysop.mpi.main_var import main_size - +from hysop.mpi import main_size L = [1., 1., 1.] O = [0., 0., 0.] diff --git a/hysop/operator/tests/test_redistribute.py b/hysop/operator/tests/test_redistribute.py index 917df64a1..ce9cadda0 100755 --- a/hysop/operator/tests/test_redistribute.py +++ b/hysop/operator/tests/test_redistribute.py @@ -1,7 +1,6 @@ from hysop.operator.redistribute_intra import RedistributeIntra from hysop.operator.redistribute_inter import RedistributeInter from hysop.operator.redistribute_overlap import RedistributeOverlap -#from hysop.mpi.main_var import main_size, main_rank, main_comm from hysop.tools.parameters import Discretization, MPIParams from hysop import testsenv import hysop as pp @@ -9,11 +8,11 @@ import numpy as np import math from hysop import Simulation from hysop.operator.analytic import Analytic -from hysop.mpi.main_var import main_comm, main_size +from hysop.mpi import main_comm, main_size from hysop.mpi.tests.utils import create_inter_topos, CPU, GPU, OTHER,\ create_multitask_context from hysop.fields.tests.func_for_tests import v3d, v2d, v3dbis -from hysop.operator.poisson import Poisson +from hysop.operators import EnergyEnstrophy from hysop.mpi.tests.test_bridge import create_subtopos sin = np.sin pi = math.pi @@ -602,8 +601,9 @@ def test_distribute_inter_5(): op_init.setup() op_init.apply(simu) # An empty operator for velocity - op = Poisson(output_field=velocity, input_field=reference, - discretization=r_ng) + op = EnergyEnstrophy( + velocity=velocity, vorticity=reference, + discretization=r_ng) op.discretize() op.setup() @@ -618,7 +618,7 @@ def test_distribute_inter_5(): if domtasks.is_on_task(GPU): toporef = op.variables[reference] vd = velocity.discretize(toporef) - wd = velocity.discretize(toporef) + wd = reference.discretize(toporef) for d in xrange(domtasks.dimension): assert np.allclose(wd.data[d], vd.data[d]) @@ -653,8 +653,9 @@ def test_distribute_inter_2d(): op_init.setup() op_init.apply(simu) # An empty operator for velocity - op = Poisson(output_field=velocity, input_field=vort, - discretization=r_2d) + op = EnergyEnstrophy(velocity=velocity, + vorticity=vort, + discretization=r_2d) op.discretize() op.setup() diff --git a/hysop/problem/problem_tasks.py b/hysop/problem/problem_tasks.py index aaab86c4e..2a5b2904b 100644 --- a/hysop/problem/problem_tasks.py +++ b/hysop/problem/problem_tasks.py @@ -1,7 +1,4 @@ -""" -@file problem_tasks.py - -Extending problem description to handle tasks parallelism. +"""Extending problem description to handle tasks parallelism. Each operator owns a task id that define a process group that are sharing the same tasks. """ @@ -49,7 +46,7 @@ class ProblemTasks(Problem): domain=domain, dumpFreq=dumpFreq, name=name) self.tasks_list = tasks_list if main_comm is None: - from hysop.mpi.main_var import main_comm + from hysop.mpi import main_comm self.main_comm = main_comm self._main_rank = self.main_comm.Get_rank() assert self.main_comm.Get_size() == len(self.tasks_list), \ diff --git a/hysop/tools/parameters.py b/hysop/tools/parameters.py index 3977a3424..1d3f0a801 100755 --- a/hysop/tools/parameters.py +++ b/hysop/tools/parameters.py @@ -8,7 +8,7 @@ """ from collections import namedtuple -from hysop.mpi.main_var import main_comm, main_rank, MPI +from hysop.mpi import main_comm, main_rank, MPI from hysop.constants import DEFAULT_TASK_ID import hysop.tools.numpywrappers as npw diff --git a/hysop/tools/problem2dot.py b/hysop/tools/problem2dot.py index f0949d8a4..6eab223eb 100644 --- a/hysop/tools/problem2dot.py +++ b/hysop/tools/problem2dot.py @@ -4,7 +4,7 @@ from hysop.operator.advection import Advection from hysop.operator.redistribute import Redistribute from hysop.operator.redistribute_inter import RedistributeInter -from hysop.mpi.main_var import main_rank +from hysop.mpi import main_rank import pydot colors = [ "#dc322f", diff --git a/hysop/tools/profiler.py b/hysop/tools/profiler.py index 28e1f3a5c..11fee02cd 100644 --- a/hysop/tools/profiler.py +++ b/hysop/tools/profiler.py @@ -2,7 +2,7 @@ for hysop classes. """ from hysop.mpi import Wtime as ftime -from hysop.mpi.main_var import main_rank +from hysop.mpi import main_rank import hysop.tools.numpywrappers as npw import numpy as np -- GitLab