diff --git a/examples/fixed_point/heat_equation.py b/examples/fixed_point/heat_equation.py index 531b0000e1f9066cc0352530c609e0a31add6142..b11af4ab32a0c4f787a3366e532f4ef832a233a6 100644 --- a/examples/fixed_point/heat_equation.py +++ b/examples/fixed_point/heat_equation.py @@ -16,6 +16,9 @@ def CS( data, coords, t, component): chi = lambda x,y,z: np.sqrt((x-pos[0])*(x-pos[0])+(y-pos[1])*(y-pos[1])+0.*z)<=RADIUS data[...] = 0. data[chi(x,y,z)] = np.cos(t()) +def init_u(data, coords, component): + (x,y,z) = coords + data[...] = 0. def compute(args): from hysop import Box, Simulation, Problem, Field, MPIParams, IO, IOParams, main_rank @@ -52,10 +55,10 @@ def compute(args): # and configure how the code is generated and compiled at runtime. # Create an explicit OpenCL context from user parameters - from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env + from hysop.backend.device.opencl.opencl_tools import get_or_create_opencl_env, get_device_number cl_env = get_or_create_opencl_env(mpi_params=mpi_params, platform_id=args.cl_platform_id, - device_id=args.cl_device_id) + device_id=box.machine_rank%get_device_number() if args.cl_device_id is None else None) # Configure OpenCL kernel generation and tuning (already done by HysopArgParser) from hysop.methods import OpenClKernelConfig @@ -170,6 +173,7 @@ def compute(args): t=t, dt=dt) simu.write_parameters(t, fixedPoint.it_num, filename='parameters.txt', precision=8) + problem.initialize_field(u, formula=init_u) problem.solve(simu, dry_run=args.dry_run) problem.finalize() diff --git a/hysop/backend/device/opencl/opencl_tools.py b/hysop/backend/device/opencl/opencl_tools.py index cc4b66f6d9d69a798fef41653167d8d5722b1c54..2be6854855f6c8be265a429d21eaf5ef8b380552 100644 --- a/hysop/backend/device/opencl/opencl_tools.py +++ b/hysop/backend/device/opencl/opencl_tools.py @@ -89,7 +89,7 @@ def convert_device_type(device_type): if (device_type is None): return None check_instance(device_type, DeviceType) - + conversion = { DeviceType.ALL: cl.device_type.ALL, DeviceType.ACCELERATOR: cl.device_type.ACCELERATOR, @@ -112,14 +112,14 @@ def convert_precision(precision): if (precision is None): return None check_instance(precision, Precision) - + if precision == Precision.SAME: msg='Cannot convert Precision.SAME to numpy dtype.' raise ValueError(msg) if precision == Precision.QUAD: msg= 'Numpy does not support the 128-bit IEEE quad precision data type.' raise RuntimeError(msg) - + #TODO when long double will be supported check if device has np.float96 or np.float128 long doubles # (ie padded to 3*32bits or 2*64bits) conversion = { @@ -136,6 +136,11 @@ def convert_precision(precision): return conversion[precision] +@static_vars(opencl_environments=dict()) +def get_device_number(platform_id = None): + platform_id = first_not_None(platform_id, __DEFAULT_PLATFORM_ID__) + platform = get_platform(platform_id, strict=True) + return len(platform.get_devices()) @static_vars(opencl_environments=dict()) def get_or_create_opencl_env(mpi_params, @@ -146,35 +151,35 @@ def get_or_create_opencl_env(mpi_params, **kargs): """ Create or an OpenClEnvironment from given parameters if it does not already exists. - All environements are kept alive (cached) in a dictionary local to this + All environements are kept alive (cached) in a dictionary local to this function (ie. all opencl operators can share the same OpenClEnvironment). """ - + platform_id = first_not_None(platform_id, __DEFAULT_PLATFORM_ID__) device_id = first_not_None(device_id, __DEFAULT_DEVICE_ID__) device_type = first_not_None(device_type, DeviceType.ALL) - + check_instance(mpi_params, MPIParams) check_instance(platform_id, int) check_instance(device_id, int) check_instance(device_type, DeviceType, allow_none=True) check_instance(gl_sharing, bool) - + key = (mpi_params, platform_id, device_id, device_type, gl_sharing,) - + opencl_envs = get_or_create_opencl_env.opencl_environments if key in opencl_envs: return opencl_envs[key] - + from hysop.backend.device.opencl.opencl_env import OpenClEnvironment - env = OpenClEnvironment(platform_id=platform_id, device_id=device_id, - device_type=device_type, gl_sharing=gl_sharing, mpi_params=mpi_params, + env = OpenClEnvironment(platform_id=platform_id, device_id=device_id, + device_type=device_type, gl_sharing=gl_sharing, mpi_params=mpi_params, **kargs) opencl_envs[key] = env return env - + def create_queue(ctx, props=None): """ diff --git a/hysop/domain/domain.py b/hysop/domain/domain.py index 1f0d43f01528a22d31b1846ac7a23639a90d72a7..5daa7ec03588215046fafdc2ba0eb60383de6799 100644 --- a/hysop/domain/domain.py +++ b/hysop/domain/domain.py @@ -12,13 +12,14 @@ from hysop.tools.handle import RegisteredObject, TaggedObjectView from hysop.tools.types import check_instance from hysop.tools.numpywrappers import npw from hysop.symbolic.frame import SymbolicFrame +from hysop.deps import hashlib, np class DomainView(TaggedObjectView): """Abstract base class for views on domains. """ __metaclass__ = ABCMeta __slots__ = ('_domain', '_topology_state') - + @debug def __new__(cls, topology_state, domain=None, **kwds): """Create and initialize a DomainView.""" @@ -41,7 +42,7 @@ class DomainView(TaggedObjectView): def _get_topology_state(self): """Return the topology state altering this domain view.""" return self._topology_state - + def _get_dim(self): """Return the dimension of the domain.""" return self._domain._dim @@ -61,6 +62,16 @@ class DomainView(TaggedObjectView): def _get_task_rank(self): """Return the rank of the process in the task communicator.""" return self._domain._task_rank + def _get_machine_comm(self): + """ + Return the communicator that owns the current process. + This is the sub-communicator which has been obtained by splitting. + the parent communicator by machine name. + """ + return self._domain._machine_comm + def _get_machine_rank(self): + """Return the rank of the process in the machine communicator.""" + return self._domain._machine_rank def _get_proc_tasks(self): """Return mapping between mpi process rank and task identifier.""" return self._domain._proc_tasks @@ -73,7 +84,7 @@ class DomainView(TaggedObjectView): def _get_frame(self): """Get symbolic frame associated to this domain.""" return self._domain._frame - + def task_on_proc(self, parent_rank): """Get task identifier for a given mpi process (parent communicator rank).""" if parent_rank >= len(self._domain._proc_tasks): @@ -84,7 +95,7 @@ class DomainView(TaggedObjectView): def current_task(self): """Get task number of the current mpi process.""" return self.task_on_proc(self._domain._parent_rank) - + def is_on_task(self, params): """Test if the current process corresponds to param task.""" if isinstance(params, MPIParams): @@ -96,7 +107,7 @@ class DomainView(TaggedObjectView): msg=msg.format(type(params)) raise TypeError(msg) return (self.current_task() == task_id) - + def print_topologies(self): """Print all topologies registered on the domain.""" print self.short_description() + ' defined the following topologies:' @@ -129,11 +140,11 @@ class DomainView(TaggedObjectView): def __hash__(self): return id(self._domain) ^ hash(self._topology_state) - + def __str__(self): """Equivalent to self.long_description()""" return self.long_description() - + domain = property(_get_domain) dim = property(_get_dim) proc_tasks = property(_get_proc_tasks) @@ -141,18 +152,20 @@ class DomainView(TaggedObjectView): parent_rank = property(_get_parent_rank) task_comm = property(_get_task_comm) task_rank = property(_get_task_rank) + machine_comm = property(_get_machine_comm) + machine_rank = property(_get_machine_rank) registered_topologies = property(_get_registered_topologies) frame = property(_get_frame) - + class Domain(RegisteredObject): """Abstract base class for the description of physical domains. """ __metaclass__ = ABCMeta - + @debug def __new__(cls, dim, parent_comm=None, proc_tasks=None, **kwds): """ - Create or get an existing physical domain of given dim on a specified MPI + Create or get an existing physical domain of given dim on a specified MPI communicator and specific tasks. Parameters @@ -190,33 +203,33 @@ class Domain(RegisteredObject): ----- *Parent communicator is split according to proc_tasks. *About MPI Tasks - proc_tasks[n] = 12 means that task 12 owns proc n + proc_tasks[n] = 12 means that task 12 owns proc n or equivalently that proc n is dedicated to task 12. - *A dupped parent_comm will return another idenpendent domain instance, - because MPI communicators are hashed trough their python object id. + *A dupped parent_comm will return another idenpendent domain instance, + because MPI communicators are hashed trough their python object id. """ - + dim = int(dim) parent_comm = parent_comm or main_comm proc_tasks = proc_tasks or [HYSOP_DEFAULT_TASK_ID,] * parent_comm.Get_size() proc_tasks = npw.asdimarray(proc_tasks) assert proc_tasks.size == parent_comm.Get_size() - + npw.set_readonly(proc_tasks) - + # double check types, to be sure RegisteredObject will work as expected check_instance(dim, int) check_instance(parent_comm, MPI.Intracomm) check_instance(proc_tasks, npw.ndarray, dtype=HYSOP_DIM) - obj = super(Domain,cls).__new__(cls, dim=dim, parent_comm=parent_comm, + obj = super(Domain,cls).__new__(cls, dim=dim, parent_comm=parent_comm, proc_tasks=proc_tasks, tag_prefix='d', **kwds) if not obj.obj_initialized: obj.__initialize(dim, parent_comm, proc_tasks) return obj - + @debug def __initialize(self, dim, parent_comm, proc_tasks): parent_rank = parent_comm.Get_rank() @@ -227,15 +240,24 @@ class Domain(RegisteredObject): else: assert len(proc_tasks) == parent_size task_comm = parent_comm.Split(color=proc_tasks[parent_rank], - key=parent_rank) + key=parent_rank) task_rank = task_comm.Get_rank() - + + # Build a per-machine communicator in order to get a rank on local machines + # Split accoring to machine name hashed and converted to integer (strings generally differs only from a single character) + machine_comm = parent_comm.Split( + color=np.int32(int(hashlib.md5(MPI.Get_processor_name()).hexdigest(),16)%np.iinfo(np.int32).max), + key=parent_rank) + machine_rank = machine_comm.Get_rank() + self._dim = dim self._parent_comm = parent_comm self._parent_rank = parent_rank self._task_comm = task_comm self._task_rank = task_rank + self._machine_comm = machine_comm + self._machine_rank = machine_rank self._proc_tasks = proc_tasks self._registered_topologies = {} self._frame = SymbolicFrame(dim=dim) @@ -271,5 +293,3 @@ class Domain(RegisteredObject): def view(self, topology_state): """Return a view of this domain altered by some topology_state.""" pass - -