From 64899172d3b8e725ca3eaf827dd6142084d4de3d Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Keck <Jean-Baptiste.Keck@imag.fr> Date: Tue, 19 Sep 2017 17:01:32 +0200 Subject: [PATCH] working inplace and out of place transpose operators --- hysop/__init__.py | 1 - .../device/codegen/kernels/transpose.py | 83 ++++------------ hysop/backend/device/kernel_autotuner.py | 11 ++- .../backend/device/kernel_autotuner_config.py | 1 + .../opencl/autotunable_kernels/transpose.py | 89 +++++++++-------- .../opencl/opencl_autotunable_kernel.py | 22 ++++- hysop/backend/device/opencl/opencl_copy.py | 99 ++++++++++++++----- hysop/backend/device/opencl/opencl_kernel.py | 8 ++ .../opencl/opencl_kernel_autotuner_config.py | 4 +- .../device/opencl/opencl_kernel_launcher.py | 71 ++++++++++--- .../device/opencl/operator/transpose.py | 22 ++--- hysop/core/arrays/array_backend.py | 4 + hysop/core/graph/computational_operator.py | 1 + hysop/core/graph/node_generator.py | 4 +- hysop/fields/cartesian_discrete_field.py | 6 +- hysop/operator/base/transpose_operator.py | 2 + hysop/operator/tests/test_transpose.py | 73 ++++++++------ hysop/operator/transpose.py | 5 +- hysop/tools/io_utils.py | 36 +------ 19 files changed, 310 insertions(+), 232 deletions(-) diff --git a/hysop/__init__.py b/hysop/__init__.py index bfab7f120..3afa33583 100644 --- a/hysop/__init__.py +++ b/hysop/__init__.py @@ -28,7 +28,6 @@ __ENABLE_LONG_TESTS__ = False __DEFAULT_PLATFORM_ID__ = 1 __DEFAULT_DEVICE_ID__ = 0 - if __MPI_ENABLED__: from hysop.core.mpi import MPI, main_rank, main_size, \ host_rank, interhost_size, \ diff --git a/hysop/backend/device/codegen/kernels/transpose.py b/hysop/backend/device/codegen/kernels/transpose.py index d4199d1a8..05f8df051 100644 --- a/hysop/backend/device/codegen/kernels/transpose.py +++ b/hysop/backend/device/codegen/kernels/transpose.py @@ -37,8 +37,8 @@ class TransposeKernelGenerator(KernelCodeGenerator): pdim = len(axes) assert pdim>=2 assert set(axes)==set(range(pdim)) - last_axe_permuted = (axes[-1] != (pdim-1)) - if last_axe_permuted: + contiguous_permutation = (axes[-1] != (pdim-1)) + if contiguous_permutation: tile_indexes = (pdim-1, axes[-1]) else: tile_indexes = (pdim-1,) @@ -59,7 +59,7 @@ class TransposeKernelGenerator(KernelCodeGenerator): else: continue j+=1 - return (last_axe_permuted, wdim, work_shape, tile_indexes) + return (contiguous_permutation, wdim, work_shape, tile_indexes) @classmethod def max_local_worksize(cls, shape, work_dim, tile_size, vectorization, axes): @@ -91,15 +91,10 @@ class TransposeKernelGenerator(KernelCodeGenerator): return max_local_worksize @classmethod - def shape_to_worksize(cls, shape, tile_size, - vectorization, axes, local_work_size, - return_tile_indexes=False, convert_to_numpy_axes=False): - if convert_to_numpy_axes: - # numpy axes are fortan contiguous (ie. axe 0 is the one with the greatest stride) - axes = np.asarray(axes) - axes = (axes.size - axes - 1)[::-1] - nt = tile_size * vectorization - + def compute_global_size(cls, shape, tile_size, + vectorization, axes, + local_work_size, work_load): + pdim = len(axes) contiguous_permutation = (axes[-1] != (pdim-1)) if contiguous_permutation: @@ -112,58 +107,25 @@ class TransposeKernelGenerator(KernelCodeGenerator): assert wdim <= pdim, 'workdim to big.' assert wdim >= (1 + int(contiguous_permutation)), 'workdim to small.' - work_size = np.empty(shape=(wdim,), dtype=np.int32) + ngroups = np.empty(shape=(wdim,), dtype=np.int32) + vts = tile_size * vectorization + ts = tile_size j=0 for i,Si in enumerate(shape): if i==0: - work_size[j] = (Si+tile_size*vectorization-1)/(tile_size*vectorization) - work_size[j] *=local_work_size[j] + wl = work_load[j] + ngroups[j] = (Si+vts*wl-1)/(vts*wl) elif i in tile_indexes: - work_size[j] = (Si+tile_size-1)/tile_size * local_work_size[j] + wl = work_load[j] + ngroups[j] = ((Si+ts*wl-1)/(ts*wl)) elif i < (wdim - int(contiguous_permutation and tile_indexes[1]>wdim-1)): - work_size[j] = Si + wl = work_load[j] + ngroups[j] = (Si+wl-1)/wl else: continue j+=1 assert j==wdim, '{} != {}'.format(j, wdim) - - if return_tile_indexes: - return (tile_indexes, work_size) - else: - return work_size - - @classmethod - def get_max_global_size(cls, work_size, work_load, **kargs): - """ - Return global_work_size from effective work_size without - taking into account local_work_size alignment - """ - - work_size = np.asarray(work_size).copy() - work_load = np.asarray(work_load).copy() - global_size = ((work_size+work_load-1)/work_load) - - return global_size - - def get_global_size(self, work_size, local_work_size, work_load=None): - """ - Return global_work_size from effective work_size and given local_work_size - global_work_size will be a multiple of local_work_size - """ - work_dim = self.work_dim - work_load = [1]*work_dim if (work_load is None) else work_load - - work_size = np.asarray(work_size) - work_load = np.asarray(work_load) - local_work_size = np.asarray(local_work_size) - - if 'local_size' in self.known_vars: - assert (self.known_vars['local_size'] == local_work_size[:work_dim]).all(),\ - 'local_work_size mismatch!' - - max_global_size = self.get_max_global_size(work_size, work_load) - global_size = ((max_global_size+local_work_size-1)/local_work_size) * local_work_size - + global_size = ngroups * local_work_size return global_size def required_workgroup_cache_size(self): @@ -192,7 +154,6 @@ class TransposeKernelGenerator(KernelCodeGenerator): def __init__(self, typegen, ctype, vectorization, axes, tile_size, tile_padding, use_diagonal_coordinates = True, - convert_to_numpy_axes = False, is_inplace = False, known_vars = None, debug_mode = False, @@ -201,13 +162,10 @@ class TransposeKernelGenerator(KernelCodeGenerator): axes = np.asarray(axes) pdim = axes.size Pdim = upper_pow2_or_3(pdim) - - # numpy axes are fortan contiguous (ie. axe 0 is the one with the greatest stride) - if convert_to_numpy_axes: - axes = (pdim - axes - 1)[::-1] assert pdim <= 16, 'Maximal permutation dimension is 16.' assert Pdim in [1,2,3,4,8,16] assert vectorization in [1,2,4,8,16] + assert tile_padding >= 0 # check permutation axes msg='Invalid permutation {} for dimension {}.' @@ -250,8 +208,9 @@ class TransposeKernelGenerator(KernelCodeGenerator): tile_index_to_id = dict( (j,i) for (i,j) in enumerate(tile_indexes) ) device = typegen.device - if device.max_work_item_dimensions < tdim: - msg='OpenCL device {} does not support {} working dimensions required to transpose whith axes {}.' + if (device.max_work_item_dimensions < tdim): + msg='OpenCL device {} does not support {} working dimensions required ' + msg+='to transpose whith axes {}.' msg=msg.format(device.name, tdim, axes) work_dim = min(pdim, device.max_work_item_dimensions) diff --git a/hysop/backend/device/kernel_autotuner.py b/hysop/backend/device/kernel_autotuner.py index 9151b265e..a8323ea28 100644 --- a/hysop/backend/device/kernel_autotuner.py +++ b/hysop/backend/device/kernel_autotuner.py @@ -48,7 +48,6 @@ class KernelAutotuner(object): self.build_opts = tunable_kernel.build_opts self.indent = lambda i: ' '*i - self.autotuner_config.verbose = 1 self.verbose = self.autotuner_config.verbose #self._init_and_load_cache() @@ -148,7 +147,8 @@ class KernelAutotuner(object): tuple(work_load), tuple(global_work_size), tuple(local_work_size), - prg, kernel, statistics, src_hash) + prg, kernel, statistics, + kernel_src, src_hash) kept_count += 1 except KernelGenerationError as e: if verbose>1: @@ -179,7 +179,7 @@ class KernelAutotuner(object): self._print_step(step_count, '{} BEST'.format(len(candidates)), nruns) for (run_key, run_params) in candidates: (extra_params, work_load, global_work_size, local_work_size, - prg, kernel, old_stats, src_hash) = run_params + _, kernel, old_stats, _, _) = run_params self.bench_one_from_binary(kernel=kernel, target_nruns=nruns, @@ -195,7 +195,8 @@ class KernelAutotuner(object): self._print_footer(ellapsed=timer.interval, best_candidate=best_candidate) result_keys = ('extra_parameters', 'work_load', 'global_work_size', 'local_work_size', - 'program', 'kernel', 'kernel_statistics', 'src_hash') + 'program', 'kernel', 'kernel_statistics', 'kernel_src', 'src_hash') + assert len(result_keys) == len(best_candidate[1]) return dict(zip(result_keys, best_candidate[1])) @@ -260,7 +261,7 @@ class KernelAutotuner(object): def _print_footer(self, ellapsed, best_candidate): if self.verbose: (best_extra_params, best_work_load, best_global_size, best_local_size, - _, _, best_stats, _) = best_candidate[1] + _, _, best_stats, _, _) = best_candidate[1] if ellapsed is not None: self._print_separator() msg='\n|| AUTOTUNING SUCCESSFULLY FINISHED IN {}.' diff --git a/hysop/backend/device/kernel_autotuner_config.py b/hysop/backend/device/kernel_autotuner_config.py index 1bd102af8..00179a6f5 100644 --- a/hysop/backend/device/kernel_autotuner_config.py +++ b/hysop/backend/device/kernel_autotuner_config.py @@ -44,6 +44,7 @@ class KernelAutotunerConfig(object): self.debug = debug self.override_cache = override_cache self.nruns = nruns + self.dump_folder = dump_folder @abstractmethod def default_dump_folder(self): diff --git a/hysop/backend/device/opencl/autotunable_kernels/transpose.py b/hysop/backend/device/opencl/autotunable_kernels/transpose.py index 9121b87f5..88ba45921 100644 --- a/hysop/backend/device/opencl/autotunable_kernels/transpose.py +++ b/hysop/backend/device/opencl/autotunable_kernels/transpose.py @@ -1,8 +1,9 @@ from hysop.tools.numpywrappers import npw from hysop.tools.types import check_instance -from hysop.tools.misc import upper_pow2 +from hysop.tools.misc import upper_pow2, previous_pow2 from hysop.tools.units import bytes2str +from hysop.constants import AutotunerFlags from hysop.backend.device.opencl import cl, clTools from hysop.backend.device.opencl.opencl_autotunable_kernel import OpenClAutotunableKernel from hysop.backend.device.codegen.kernels.transpose import TransposeKernelGenerator @@ -43,7 +44,8 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): check_instance(axes, tuple, values=int) check_instance(is_inplace, bool) - self._check_cartesian_fields(input_field, output_field, check_res=True) + self._check_cartesian_fields(input_field, output_field, + check_res=False, check_size=True) dim = input_field.domain.dim dtype = input_field.dtype @@ -57,13 +59,18 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): assert axes != tuple(range(dim)) # check if is_inplace is allowed - assert (is_inplace == (input_field == output_field)) + assert (is_inplace == (input_field.dfield == output_field.dfield)) if is_inplace: #Only 2D square matrix inplace transposition is supported - compute_inplace = (self.dim == 2) + compute_inplace = (dim == 2) compute_inplace &= all(shape[0]==shape) else: compute_inplace = False + + if compute_inplace: + kernel_args = (input_field(0).data,) + else: + kernel_args = (input_field(0).data, output_field(0).data) if (name is None): name = 'transpose_{}_[{}]_{}'.format(ctype, @@ -75,8 +82,7 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): (last_axe_permuted, work_dim, work_shape, tile_indices) = \ TransposeKernelGenerator.characterize_permutation(shape, axes, self.max_work_dim()) - - kernel_args = (input_field(0).data, output_field(0).data) + # keyword arguments will be agregated into extra_kwds dictionnary return super(OpenClAutotunableTransposeKernel, self).autotune(name=name, @@ -96,19 +102,39 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): def compute_parameters(self, extra_kwds): """Register extra parameters to optimize.""" check_instance(extra_kwds, dict, keys=str) - params = super(OpenClAutotunableTransposeKernel, self).compute_parameters(extra_kwds=extra_kwds) + params = super(OpenClAutotunableTransposeKernel, self).compute_parameters( + extra_kwds=extra_kwds) ## Register extra parameters # compute max tile fize from device cache tile_indices = extra_kwds['tile_indices'] dtype = extra_kwds['dtype'] shape = extra_kwds['shape'] + last_axe_permuted = extra_kwds['last_axe_permuted'] max_tile_size = self._max_tile_size(shape, dtype, tile_indices) - + + flag = self.autotuner_config.autotuner_flag vectorization = (1,) use_diagonal_coordinates = (False,) - tile_padding = (0,) + if last_axe_permuted: + use_diagonal_coordinates += (True,) + tile_padding = (0,1,) + tile_sizes = (max_tile_size,) + tile_size = max_tile_size + while tile_size>1: + tile_size = previous_pow2(tile_size) + tile_sizes += (tile_size,) + if flag == AutotunerFlags.ESTIMATE: + ntiles = 1 + elif flag == AutotunerFlags.MEASURE: + ntiles = 2 + elif flag == AutotunerFlags.PATIENT: + ntiles = 4 + elif flag == AutotunerFlags.EXHAUSTIVE: + ntiles = len(tile_sizes) + ntiles = min(ntiles, len(tile_sizes)) + tile_sizes = tile_sizes[:ntiles] params.register_extra_parameter('vectorization', vectorization) params.register_extra_parameter('use_diagonal_coordinates', use_diagonal_coordinates) @@ -117,39 +143,16 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): return params - - def compute_work_bounds(self, extra_parameters, extra_kwds): - """ - Configure workbounds (work_dim, work_size, max_work_load). - Return a WorkBoundsConfiguration object. - """ - check_instance(extra_parameters, dict, keys=str) - check_instance(extra_kwds, dict, keys=str) - - tile_indices = extra_kwds['tile_indices'] - work_size = extra_kwds['work_size'] - last_axe_permuted = extra_kwds['last_axe_permuted'] - - tile_size = extra_parameters['tile_size'] - tile_padding = extra_parameters['tile_padding'] - - assert npw.all(tile_size <= upper_pow2(work_size[tile_indices])) - - work_bounds = super(OpenClAutotunableTransposeKernel, self).compute_work_bounds( - extra_parameters=extra_parameters, - extra_kwds=extra_kwds) - return work_bounds - - def compute_work_candidates(self, work_bounds, work_load, extra_parameters, extra_kwds): """ - Configure work (global_size, local_size candidates) given a OpenClWorkBoundsConfiguration - object and a work_load. + Configure work (global_size, local_size candidates) given a + OpenClWorkBoundsConfiguration object and a work_load. + Return a WorkConfiguration object. Notes ----- - global_work_size can be set to None if it depends on local_work_size and will be set + global_work_size can be ignored if it depends on local_work_size and will be set in self.compute_global_work_size(). """ work = super(OpenClAutotunableTransposeKernel, self).compute_work_candidates( @@ -171,8 +174,16 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): return work - #def compute_global_work_size(self, local_work_size, work, extra_parameters, extra_kwds): - #return None + def compute_global_work_size(self, local_work_size, work, extra_parameters, extra_kwds): + shape = extra_kwds['shape'] + axes = extra_kwds['axes'] + vectorization = extra_parameters['vectorization'] + tile_size = extra_parameters['tile_size'] + + gs = TransposeKernelGenerator.compute_global_size(shape=shape, tile_size=tile_size, + vectorization=vectorization, axes=axes, local_work_size=local_work_size, + work_load=work.work_load) + return gs def generate_kernel_src(self, global_work_size, local_work_size, extra_parameters, extra_kwds, @@ -223,7 +234,7 @@ class OpenClAutotunableTransposeKernel(OpenClAutotunableKernel): if extra_kwds['compute_inplace']: args_mapping = { 'inout': (0, cl.MemoryObjectHolder) } else: - args_mapping = { 'input' : (0, cl.MemoryObjectHolder), + args_mapping = { 'input' : (0, cl.MemoryObjectHolder), 'output': (1, cl.MemoryObjectHolder) } return args_mapping diff --git a/hysop/backend/device/opencl/opencl_autotunable_kernel.py b/hysop/backend/device/opencl/opencl_autotunable_kernel.py index fe9c49eef..37c9ffbc1 100644 --- a/hysop/backend/device/opencl/opencl_autotunable_kernel.py +++ b/hysop/backend/device/opencl/opencl_autotunable_kernel.py @@ -1,5 +1,7 @@ from abc import ABCMeta, abstractmethod +from hysop import __KERNEL_DEBUG__ +from hysop.deps import os from hysop.constants import Backend from hysop.tools.numpywrappers import npw from hysop.tools.types import check_instance, first_not_None @@ -22,7 +24,7 @@ class OpenClAutotunableKernel(AutotunableKernel): self.cl_env = cl_env self.usable_cache_bytes_per_wg = clCharacterize.usable_local_mem_size(cl_env.device) - + def autotune(self, name, **extra_kwds): from hysop.backend.device.opencl.opencl_kernel_autotuner import OpenClKernelAutotuner autotuner = OpenClKernelAutotuner(name=name, tunable_kernel=self) @@ -67,7 +69,7 @@ class OpenClAutotunableKernel(AutotunableKernel): def format_best_candidate(self, name, extra_kwds, extra_parameters, work_load, global_work_size, local_work_size, - program, kernel, kernel_statistics, src_hash): + program, kernel, kernel_src, kernel_statistics, src_hash): """ Post treatment callback for autotuner results. Transform autotuner results in user friendly kernel wrappers. @@ -84,9 +86,20 @@ class OpenClAutotunableKernel(AutotunableKernel): check_instance(local_work_size, tuple, values=npw.int32) check_instance(program, cl.Program) check_instance(kernel, cl.Kernel) + check_instance(kernel_src, str) check_instance(kernel_statistics, OpenClKernelStatistics) check_instance(src_hash, str) + if __KERNEL_DEBUG__: + # dump the best kernel + dump_folder = self.autotuner_config.dump_folder + dump_file=dump_folder+'/'+'{}.cl'.format(name.replace(' ', '_')) + if not os.path.exists(dump_folder): + os.makedirs(dump_folder) + with open(dump_file, 'w+') as f: + print '>Saving OpenCL kernel source to \'{}\'.'.format(dump_file) + f.write(kernel_src) + args_mapping = self.compute_args_mapping(extra_kwds=extra_kwds, extra_parameters=extra_parameters) check_instance(args_mapping, dict, keys=str, values=tuple) @@ -106,7 +119,10 @@ class OpenClAutotunableKernel(AutotunableKernel): return self.cl_env.device.max_work_group_size def max_work_item_sizes(self): - """Maximum number of work-items that can be specified in each dimension of the work-group.""" + """ + Maximum number of work-items that can be specified in each dimension + of the work-group. + """ return self.cl_env.device.max_work_item_sizes @classmethod diff --git a/hysop/backend/device/opencl/opencl_copy.py b/hysop/backend/device/opencl/opencl_copy.py index 470efd617..b8feebe56 100644 --- a/hysop/backend/device/opencl/opencl_copy.py +++ b/hysop/backend/device/opencl/opencl_copy.py @@ -4,12 +4,12 @@ from hysop.deps import np from hysop.tools.decorators import debug from hysop.tools.types import check_instance, first_not_None from hysop.tools.numpywrappers import npw -from hysop.backend.device.opencl import cl +from hysop.backend.device.opencl import cl, clArray +from hysop.backend.device.opencl.opencl_kernel_launcher import OpenClKernelLauncher from hysop.backend.device.opencl.opencl_kernel_statistics import OpenClKernelStatistics class OpenClCopyKernelLauncher(OpenClKernelLauncher): """Interface to non-blocking OpenCL copy kernels.""" - __metaclass__ = ABCMeta @debug def __init__(self, name, dst, src, @@ -25,9 +25,11 @@ class OpenClCopyKernelLauncher(OpenClKernelLauncher): assert 'default_global_work_size' not in kwds assert 'default_local_work_size' not in kwds assert 'is_blocking' not in kwds + enqueue_copy_kwds['dest'] = dst enqueue_copy_kwds['src'] = src - enqueue_copy_kwds['is_blocking'] = False + if isinstance(src, np.ndarray) or isinstance(dst, np.ndarray): + enqueue_copy_kwds['is_blocking'] = False super(OpenClCopyKernelLauncher, self).__init__(name=name, kernel=None, args_list=(), **kwds) @@ -45,12 +47,14 @@ class OpenClCopyKernelLauncher(OpenClKernelLauncher): def __call__(self, queue=None, wait_for=None): queue = first_not_None(queue, self._default_queue) check_instance(queue, cl.CommandQueue) - dprint(self._apply_msg) evt = cl.enqueue_copy(queue=queue, **self._enqueue_copy_kwds) + + def global_size_configured(self): + return True enqueue_copy_kwds = property(_get_enqueue_copy_kwds) -class OpenClCopyBuffer(OpenClCopyKernelLauncher): +class OpenClCopyBufferLauncher(OpenClCopyKernelLauncher): """Non-blocking OpenCL copy kernel between host buffers and/or opencl device buffers.""" def __init__(self, varname, src, dst, src_device_offset=None, @@ -64,19 +68,19 @@ class OpenClCopyBuffer(OpenClCopyKernelLauncher): ---------- varname: str Name of the variable copied for loggin purposes. - src: cl.MemoryObject or np.ndarray + src: cl.MemoryObjectHolder or np.ndarray The source buffer. - dst: cl.MemoryObject or np.ndarray + dst: cl.MemoryObjectHolder or np.ndarray The destination buffer. src_device_offset: int, optional Offset in the source buffer, only valid if - source buffer is a cl.MemoryObject. + source buffer is a cl.MemoryObjectHolder. dst_device_offset: int, optional Offset in the source buffer, only valid if - source buffer is a cl.MemoryObject. + source buffer is a cl.MemoryObjectHolder. byte_count: int Byte count to copy if and only if source and destination - buffers are cl.MemoryObjects. + buffers are cl.MemoryObjectHolders. Notes ----- @@ -90,39 +94,88 @@ class OpenClCopyBuffer(OpenClCopyKernelLauncher): Device buffers cannot have views like np.ndarrays, an offset in bytes can be given as src_device_offset or dst_device_offset instead. """ - check_instance(src, (cl.MemoryObject, np.ndarray)) - check_instance(dst, (cl.MemoryObject, np.ndarray)) + check_instance(src, (cl.MemoryObjectHolder, np.ndarray)) + check_instance(dst, (cl.MemoryObjectHolder, np.ndarray)) check_instance(src_device_offset, (int, np.integer), allow_none=True) check_instance(dst_device_offset, (int, np.integer), allow_none=True) check_instance(byte_count, (int, np.integer), allow_none=True) + msg='Host to host copy is not supported.' + assert not (isinstance(src, np.ndarray) and isinstance(dst, np.ndarray)), msg + enqueue_copy_kwds = {} if (src_device_offset is not None): - assert isinstance(src, cl.MemoryObject) + assert isinstance(src, cl.MemoryObjectHolder) enqueue_copy_kwds['src_offset'] = src_device_offset if (dst_device_offset is not None): - assert isinstance(dst, cl.MemoryObject) + assert isinstance(dst, cl.MemoryObjectHolder) enqueue_copy_kwds['dst_offset'] = dst_device_offset if (byte_count is not None): - assert isinstance(src, cl.MemoryObject) - assert isinstance(dst, cl.MemoryObject) + assert isinstance(src, cl.MemoryObjectHolder) + assert isinstance(dst, cl.MemoryObjectHolder) enqueue_copy_kwds['byte_count'] = byte_count shape = first_not_None((byte_count,), - getattr(src, shape, None), - getattr(dst, shape, None), + getattr(src, 'shape', None), + getattr(dst, 'shape', None), '...') assert 'name' not in kwds name = 'enqueue_copy_{}__{}_to_{}'.format(varname, - 'host' is isinstance(src, np.ndarray) else 'device', - 'host' is isinstance(dst, np.ndarray) else 'device') + 'host' if isinstance(src, np.ndarray) else 'device', + 'host' if isinstance(dst, np.ndarray) else 'device') apply_msg='{}<<<{}>>>'.format(name, shape) - super(OpenClCopyHostBuffer, self).__init__(dst=dst, src=src, + super(OpenClCopyBufferLauncher, self).__init__(dst=dst, src=src, enqueue_copy_kwds=enqueue_copy_kwds, name=name, apply_msg=apply_msg, **kwds) -class OpenClCopyHost2Device(OpenClCopyKernelLauncher): - pass + def _format_device_arg(self, arg, arg_offset): + from hysop.backend.device.opencl.opencl_array import OpenClArray + nbytes=None + if isinstance(arg, (OpenClArray, clArray.Array)): + arg_offset = first_not_None(arg_offset, 0) + arg_offset += arg.offset + nbytes = arg.nbytes + arg = arg.base_data + elif isinstance(arg, cl.MemoryObjectHolder): + pass + else: + msg='Unknown type {} to format device buffer arguments.' + msg=msg.format(type(arg)) + raise TypeError(msg) + return (arg, arg_offset, nbytes) + +class OpenClCopyHost2DeviceLauncher(OpenClCopyBufferLauncher): + """Reduced interface for host to device copy kernels.""" + def __init__(self, varname, src, dst, dst_device_offset=None): + check_instance(src, (np.ndarray,)) + check_instance(dst, (cl.MemoryObjectHolder,)) + check_instance(dst_device_offset, (int, np.integer), allow_none=True) + super(OpenClCopyHost2DeviceLauncher, self).__init__(varname=varname, src=src, + dst=dst, dst_device_offset=dst_device_offset) + +class OpenClCopyDevice2HostLauncher(OpenClCopyBufferLauncher): + """Reduced interface for device to host copy kernels.""" + def __init__(self, varname, src, dst, src_device_offset=None): + check_instance(src, (cl.MemoryObjectHolder,)) + check_instance(dst, (np.ndarray,)) + check_instance(src_device_offset, (int, np.integer), allow_none=True) + super(OpenClCopyDevice2HostLauncher, self).__init__(varname=varname, src=src, + dst=dst, src_device_offset=src_device_offset) +class OpenClCopyDevice2DeviceLauncher(OpenClCopyBufferLauncher): + """Reduced interface for device to device copy kernels.""" + def __init__(self, varname, src, dst, + src_device_offset=None, dst_device_offset=None, byte_count=None): + src, src_device_offset, src_nbytes = self._format_device_arg(src, src_device_offset) + dst, dst_device_offset, dst_nbytes = self._format_device_arg(dst, dst_device_offset) + byte_count = first_not_None(byte_count, min(src_nbytes, dst_nbytes)) + check_instance(src, (cl.MemoryObjectHolder,)) + check_instance(dst, (cl.MemoryObjectHolder,)) + check_instance(src_device_offset, (int, np.integer), allow_none=True) + check_instance(dst_device_offset, (int, np.integer), allow_none=True) + check_instance(byte_count, (int, np.integer), allow_none=True) + super(OpenClCopyDevice2DeviceLauncher, self).__init__(varname=varname, src=src, dst=dst, + src_device_offset=src_device_offset, dst_device_offset=dst_device_offset, + byte_count=byte_count) diff --git a/hysop/backend/device/opencl/opencl_kernel.py b/hysop/backend/device/opencl/opencl_kernel.py index c809f7327..1af0e1915 100644 --- a/hysop/backend/device/opencl/opencl_kernel.py +++ b/hysop/backend/device/opencl/opencl_kernel.py @@ -100,6 +100,14 @@ class OpenClKernel(object): default_global_work_size = property(_get_default_global_work_size) default_local_work_size = property(_get_default_local_work_size) + def build_list_launcher(self, launcher_name=None, *args, **kwds): + """ + Build a OpenClKernelLauncher and return it as a OpenClKernelListLauncher. + See self.build_launcher() and OpenClKernelLauncher.as_list_launcher() + """ + launcher_name = first_not_None(launcher_name, self.name) + return self.build_launcher(*args, **kwds).as_list_launcher(name=launcher_name) + def build_launcher(self, name=None, name_prefix=None, name_postfix=None, queue=None, local_work_size=None, global_work_size=None, **kwds): """ diff --git a/hysop/backend/device/opencl/opencl_kernel_autotuner_config.py b/hysop/backend/device/opencl/opencl_kernel_autotuner_config.py index bd5002aa0..301920627 100644 --- a/hysop/backend/device/opencl/opencl_kernel_autotuner_config.py +++ b/hysop/backend/device/opencl/opencl_kernel_autotuner_config.py @@ -1,4 +1,5 @@ +from hysop.tools.io_utils import IO from hysop.backend.device.kernel_autotuner_config import KernelAutotunerConfig from hysop.backend.device.opencl import OPENCL_KERNEL_DUMP_FOLDER @@ -8,4 +9,5 @@ class OpenClKernelAutotunerConfig(KernelAutotunerConfig): super(OpenClKernelAutotunerConfig, self).__init__(*args, **kwds) def default_dump_folder(self): - return OPENCL_KERNEL_DUMP_FOLDER + default_path = IO.default_path() + return '{}/{}'.format(default_path, OPENCL_KERNEL_DUMP_FOLDER) diff --git a/hysop/backend/device/opencl/opencl_kernel_launcher.py b/hysop/backend/device/opencl/opencl_kernel_launcher.py index e509a1b59..291486827 100644 --- a/hysop/backend/device/opencl/opencl_kernel_launcher.py +++ b/hysop/backend/device/opencl/opencl_kernel_launcher.py @@ -29,25 +29,66 @@ class OpenClKernelListLauncher(object): check_instance(name, str) self._name = name self._kernels = () - self._apply_msg = 'OpenClKernelListLauncher {}.__apply__()'.format(name) + self._apply_msg = '>OpenClKernelListLauncher {}'.format(name) + + def push_copy_host_device(self, varname, src, dst, + src_device_offset=None, dst_device_offset=None, byte_count=None): + """Shortcut for OpenClCopyBuffer kernels creation.""" + from hysop.backend.device.opencl.opencl_copy import OpenClCopyBufferLauncher + kernel = OpenClCopyBufferLauncher(varname=varname, + src=src, dst=dst, byte_count=byte_count, + src_device_offset=src_device_offset, dst_device_offset=dst_device_offset) + self.push_kernels(kernel) + return self + + def push_copy_host_to_device(self, varname, src, dst, dst_device_offset=None): + """Shortcut for OpenClCopyHost2Device kernels creation.""" + from hysop.backend.device.opencl.opencl_copy import OpenClCopyHost2DeviceLauncher + kernel = OpenClCopyHost2DeviceLauncher(varname=varname, src=src, dst=dst, + dst_device_offset=dst_device_offset) + self.push_kernels(kernel) + return self + + def push_copy_device_to_host(self, varname, src, dst, src_device_offset=None): + """Shortcut for OpenClCopyDevice2Host kernels creation.""" + from hysop.backend.device.opencl.opencl_copy import OpenClCopyDevice2HostLauncher + kernel = OpenClCopyDevice2HostLauncher(varname=varname, + src=src, dst=dst, + src_device_offset=src_device_offset) + self.push_kernels(kernel) + return self + + def push_copy_device_to_device(self, varname, src, dst, + src_device_offset=None, dst_device_offset=None, byte_count=None): + """Shortcut for OpenClCopyDevice2Device kernels creation.""" + from hysop.backend.device.opencl.opencl_copy import OpenClCopyDevice2DeviceLauncher + kernel = OpenClCopyDevice2DeviceLauncher(varname=varname, + src=src, dst=dst, byte_count=byte_count, + src_device_offset=src_device_offset, dst_device_offset=dst_device_offset) + self.push_kernels(kernel) + return self def push_kernels(self, *kernel_launchers): """ Push OpenClKernelLaunchers into the list. None values are ignored for convenience. """ - for kernel in kernels: + for kernel in kernel_launchers: if (kernel is None): continue - if not isinstance(kernel, OpenClKernelLauncher): - msg='Expected an OpenClKernelLauncher but got a {}.' + if isinstance(kernel, OpenClKernelLauncher): + if not kernel.global_size_configured(): + msg='OpenClKernelLauncher {} global_work_size has not been configured.' + msg=msg.format(kernel.name) + raise RuntimeError(msg) + self._kernels += (kernel,) + elif isinstance(kernel, OpenClKernelListLauncher): + self._kernels += kernel._kernels + else: + msg='Expected an OpenClKernelLauncher or a OpenClKernelListLauncher but got a {}.' msg=msg.format(type(kernel)) raise TypeError(msg) - if not kernel.global_size_configured(): - msg='OpenClKernelLauncher {} global_work_size has not been configured.' - msg=msg.format(kernel.name) - raise RuntimeError(msg) - self._kernels += (kernel,) + return self def __call__(self, queue, wait_for=None): """ @@ -56,10 +97,10 @@ class OpenClKernelListLauncher(object): If this OpenClKernelListLauncher is empty, cl.wait_for_events will be called instead. """ - dprint(self._apply_msg.format()) + dprint(self._apply_msg) kernels = self._kernels if kernels: - evt = kernels[0).__call__(queue=queue, wait_for=wait_for) + evt = kernels[0].__call__(queue=queue, wait_for=wait_for) for kernel in kernels[1:]: evt = kernel.__call__(queue=queue) else: @@ -139,7 +180,7 @@ class OpenClKernelLauncher(object): self._events = () self._kernel_is_shared = kernel_is_shared self._kernel_statistics = OpenClKernelStatistics() - self._apply_msg = ' {}<<<{}, {}>>>' + self._apply_msg = ' {}<<<{}, {}>>>'.format(name, '{}', '{}') def queue_configured(self): """ @@ -155,7 +196,7 @@ class OpenClKernelLauncher(object): """ return (self._default_global_work_size is not None) - def as_list_launcher(self, name) + def as_list_launcher(self, name): """Convert a OpenClKernelLauncher to a OpenClKernelListLauncher.""" llauncher = OpenClKernelListLauncher(name=name) llauncher.push_kernels(self) @@ -216,7 +257,7 @@ class OpenClKernelLauncher(object): assert isinstance(queue, cl.CommandQueue) assert isinstance(global_work_size, tuple) assert isinstance(local_work_size, (tuple, type(None))) - + dprint(self._apply_msg.format(global_work_size, local_work_size)) kernel = self._kernel @@ -227,7 +268,7 @@ class OpenClKernelLauncher(object): global_work_size=global_work_size, local_work_size=local_work_size, wait_for=wait_for) - if (cl.command_queue_properties.PROFILING_ENABLE in queue.properties): + if (cl.command_queue_properties.PROFILING_ENABLE & queue.properties): self._events.append(evt) return evt diff --git a/hysop/backend/device/opencl/operator/transpose.py b/hysop/backend/device/opencl/operator/transpose.py index e39aac913..ccb2782a8 100644 --- a/hysop/backend/device/opencl/operator/transpose.py +++ b/hysop/backend/device/opencl/operator/transpose.py @@ -39,18 +39,20 @@ class OpenClTranspose(TransposeOperatorBase, OpenClOperator): is_inplace=is_inplace, input_field=input_field, output_field=output_field) kernel_launchers=() - for i in xrange(input_field.nb_components): + for i in xrange(self.nb_components): if compute_inplace: launcher = transpose.build_launcher(inout=input_field[i].data) elif is_inplace: - launcher = transpose.build_launcher(input=input_field[i].data, output=self.dtmp.data) - launcher = launcher.as_list_launcher(name='transpose_copy_{}{}'.format(input_field.name, i)) - launcher.enqueue_copy(dst='output', src='input') + launcher = transpose.build_list_launcher(input=input_field[i].data, + output=self.dtmp.data) + launcher.push_copy_device_to_device(varname='tmp', src=self.dtmp, + dst=input_field[i]) else: - launcher = transpose.build_launcher(input=input_field[i].data, output=output_field[i].data) + launcher = transpose.build_launcher(input=input_field[i].data, + output=output_field[i].data) kernel_launchers += (launcher,) - self._transpose_kernel_launchers = kernel_launchers + self._kernel_launchers = kernel_launchers def enqueue_copy_kernel(self, _dst, _src, queue): pass @@ -60,11 +62,9 @@ class OpenClTranspose(TransposeOperatorBase, OpenClOperator): super(OpenClTranspose,self).apply(**kwds) queue = self.cl_env.default_queue - compute_inplace = self.compute_inplace - is_inplace = self.is_inplace - - kernel_launchers = self.transpose_kernel_launchers - for i in range(din.nb_components): + + kernel_launchers = self._kernel_launchers + for i in range(self.nb_components): kernel = kernel_launchers[i] evt = kernel(queue=queue) diff --git a/hysop/core/arrays/array_backend.py b/hysop/core/arrays/array_backend.py index 587943df2..eabbac65f 100644 --- a/hysop/core/arrays/array_backend.py +++ b/hysop/core/arrays/array_backend.py @@ -486,6 +486,10 @@ Exception was: if isinstance(dst.backend, backend_cls): src = dst.backend.wrap(src) dst.backend.copyto(dst, src, **kargs) + elif backend_cls is HostArrayBackend: + host_array_backend = dst.backend.host_array_backend + src = host_array_backend.wrap(src) + host_array_backend.copyto(dst, src,**kargs) else: msg='dst does not match registered backend for type {}.' msg=msg.format(cls) diff --git a/hysop/core/graph/computational_operator.py b/hysop/core/graph/computational_operator.py index ed912f73f..9cd6354bf 100644 --- a/hysop/core/graph/computational_operator.py +++ b/hysop/core/graph/computational_operator.py @@ -535,6 +535,7 @@ class ComputationalGraphOperator(ComputationalGraphNode): from hysop.core.graph.computational_graph import ComputationalGraph name = name or '{}_graph'.format(self.name) graph = ComputationalGraph(name=name) + print self.operators graph.push_nodes(self.operators) return graph diff --git a/hysop/core/graph/node_generator.py b/hysop/core/graph/node_generator.py index db6f2d6d5..79b7a06c0 100644 --- a/hysop/core/graph/node_generator.py +++ b/hysop/core/graph/node_generator.py @@ -79,6 +79,6 @@ class ComputationalGraphNodeGenerator(object): graph.push_nodes(*self.nodes) return graph - def build(self, name=None, **kwds): + def build(self, name=None, outputs_are_inputs=False, **kwds): """Convert a computational node generator to a graph and prepares it for apply.""" - return self.to_graph(name=name).build(**kwds) + return self.to_graph(name=name).build(outputs_are_inputs=outputs_are_inputs, **kwds) diff --git a/hysop/fields/cartesian_discrete_field.py b/hysop/fields/cartesian_discrete_field.py index 9164cebc6..cce095d68 100644 --- a/hysop/fields/cartesian_discrete_field.py +++ b/hysop/fields/cartesian_discrete_field.py @@ -291,7 +291,7 @@ CartesianDiscreteFieldView (id={}, tag={}) def randomize(self, **kwds): """Initialize a the with random values.""" for d in xrange(self.nb_components): - self.array_backend.rand(out=self.data[d], **kwds) + self.backend.rand(out=self.data[d], **kwds) def copy(self, field_in, **kwds): @@ -303,8 +303,8 @@ CartesianDiscreteFieldView (id={}, tag={}) field to be copied """ for d in xrange(self.nb_components): - self.array_backend.memcpy(dst=self.data[d], src=field_in[d], **kwds) - + self.backend.memcpy(dst=self.data[d], src=field_in[d], **kwds) + def initialize(self, formula, vectorize=False, **kwds): """ Initialize the field components diff --git a/hysop/operator/base/transpose_operator.py b/hysop/operator/base/transpose_operator.py index f5ff5ee6a..dde1cae34 100644 --- a/hysop/operator/base/transpose_operator.py +++ b/hysop/operator/base/transpose_operator.py @@ -46,6 +46,7 @@ class TransposeOperatorBase(object): assert input_field.domain is output_field.domain dim = input_field.domain.dim + nb_components = input_field.nb_components assert dim>=2 assert set(axes)==set(range(dim)) assert tuple(axes)!=tuple(range(dim)) @@ -58,6 +59,7 @@ class TransposeOperatorBase(object): self.input_field = input_field self.output_field = output_field + self.nb_components = nb_components self.dim = dim self.axes = axes diff --git a/hysop/operator/tests/test_transpose.py b/hysop/operator/tests/test_transpose.py index 9ab872f6a..1d0c690e9 100644 --- a/hysop/operator/tests/test_transpose.py +++ b/hysop/operator/tests/test_transpose.py @@ -6,6 +6,7 @@ from hysop.testsenv import opencl_failed, iter_clenv from hysop.tools.contexts import printoptions from hysop.tools.numerics import is_fp, is_integer from hysop.tools.types import check_instance +from hysop.tools.io_utils import IO from hysop.operator.transpose import Transpose, Implementation from hysop import Field, Box @@ -16,17 +17,18 @@ class TestTransposeOperator(object): def setup_class(cls, enable_extra_tests=__ENABLE_LONG_TESTS__, enable_debug_mode=False): + + IO.set_default_path('/tmp/hysop_tests/test_transpose') if enable_debug_mode: - cls.size_min = 4 + cls.size_min = 3 cls.size_max = 5 else: cls.size_min = 2 - cls.size_max = 32 + cls.size_max = 16 cls.enable_extra_tests = enable_extra_tests cls.enable_debug_mode = enable_debug_mode - @classmethod def teardown_class(cls): @@ -36,17 +38,13 @@ class TestTransposeOperator(object): def _test(self, dim, dtype, is_inplace): enable_extra_tests = self.enable_extra_tests assert dim > 1 - if is_inplace: - msg='is_inplace transposition has not been implmented yet.' - raise ValueError(msg) nshapes = 9 if enable_extra_tests else 3 - shapes = ((np.random.randint(low=self.size_min, high=self.size_max),)*dim,) - shapes += tuple( tuple( np.random.randint(low=self.size_min, - high=self.size_max, size=dim).tolist() ) - for i in xrange(nshapes-1) ) - + shapes = ((np.random.randint(low=self.size_min, high=self.size_max+1),)*dim,) + shapes += tuple(set( tuple( np.random.randint(low=self.size_min, + high=self.size_max+1, size=dim).tolist() ) + for i in xrange(nshapes-1) )) all_axes = set(it.permutations(range(dim))) all_axes.remove(tuple(range(dim))) @@ -78,7 +76,7 @@ class TestTransposeOperator(object): def _test_one(self, shape, axes, dim, dtype, is_inplace, domain, Fin, Fout): - + print 'Testing inplace={} dtype={} shape={} axes={}'.format( is_inplace, dtype.__name__, shape, axes) if is_inplace: @@ -95,11 +93,10 @@ class TestTransposeOperator(object): # Compute reference solution transpose = Transpose(fields=fin, output_fields=fout, variables=variables, axes=axes, - implementation=ref_impl, - name='test_transpose_{}'.format(str(ref_impl))).build() + implementation=ref_impl).build() dfin, dfout = transpose.input_discrete_fields[fin], transpose.output_discrete_fields[fout] dfin.initialize(self.__field_init, dtype=dtype) - + if is_inplace: refin = tuple(df.copy() for df in dfin.buffers) else: @@ -108,13 +105,14 @@ class TestTransposeOperator(object): transpose.apply() refout = tuple(df.copy() for df in dfout.buffers) + for in_,out_ in zip(refin, refout): assert np.all(out_ == np.transpose(in_, axes=axes)) def iter_impl(impl): base_kwds = dict(fields=fin, output_fields=fout, variables=variables, axes=axes, implementation=impl, - name='test_transpose_{}'.format(str(impl))) + name='test_transpose_{}'.format(str(impl).lower())) if impl is ref_impl: return elif impl is Implementation.OPENCL_CODEGEN: @@ -129,8 +127,8 @@ class TestTransposeOperator(object): for op in iter_impl(impl): op = op.build() dfin, dfout = op.input_discrete_fields[fin], op.output_discrete_fields[fout] - dfin.initialize(self.__field_init, dtype=dtype) - transpose.apply() + dfin.copy(refin) + op.apply() out = tuple( data.get().handle for data in dfout.data ) self._check_output(impl, op, refin, refout, out) @@ -171,29 +169,44 @@ class TestTransposeOperator(object): raise RuntimeError(msg) - def test_2d_int_out_of_place(self): self._test(dim=2, dtype=np.int32, is_inplace=False) - def test_2d_uint_out_of_place(self): - self._test(dim=2, dtype=np.uint32, is_inplace=False) def test_2d_float_out_of_place(self): self._test(dim=2, dtype=np.float32, is_inplace=False) - def test_3d_int_out_of_place(self): self._test(dim=3, dtype=np.int32, is_inplace=False) - def test_3d_uint_out_of_place(self): - self._test(dim=3, dtype=np.uint32, is_inplace=False) def test_3d_float_out_of_place(self): self._test(dim=3, dtype=np.float32, is_inplace=False) + def test_4d_int_out_of_place(self): + self._test(dim=4, dtype=np.int32, is_inplace=False) + + def test_2d_int_inplace(self): + self._test(dim=2, dtype=np.int32, is_inplace=True) + def test_2d_float_inplace(self): + self._test(dim=2, dtype=np.float32, is_inplace=True) + def test_3d_int_inplace(self): + self._test(dim=3, dtype=np.int32, is_inplace=True) + def test_3d_float_inplace(self): + self._test(dim=3, dtype=np.float32, is_inplace=True) + def test_4d_int_inplace(self): + self._test(dim=4, dtype=np.int32, is_inplace=False) def perform_tests(self): - self.test_2d_int_out_of_place() - self.test_2d_uint_out_of_place() - self.test_2d_float_out_of_place() + # self.test_2d_int_out_of_place() + # self.test_2d_float_out_of_place() + + # self.test_3d_int_out_of_place() + # self.test_3d_float_out_of_place() + + # self.test_4d_int_out_of_place() + + self.test_2d_int_inplace() + self.test_2d_float_inplace() + + self.test_3d_int_inplace() + self.test_3d_float_inplace() - self.test_3d_int_out_of_place() - self.test_3d_uint_out_of_place() - self.test_3d_float_out_of_place() + self.test_4d_int_inplace() if __name__ == '__main__': TestTransposeOperator.setup_class(enable_extra_tests=False, diff --git a/hysop/operator/transpose.py b/hysop/operator/transpose.py index badcaa4e2..74c98b93b 100644 --- a/hysop/operator/transpose.py +++ b/hysop/operator/transpose.py @@ -77,7 +77,7 @@ class Transpose(ComputationalGraphNodeGenerator): Input and output are matched by order int list/tuple. variables: dict Dictionary of fields as keys and CartesianTopologyDescriptors as values. - axes: tuple of ints, of array like of tuple of ints, or dictionnary of (tuple of ints -> TranspositionState). + axes: tuple of ints, or array like of tuples, or dict of (tuple, TranspositionState). Permutation of axes in numpy notations (as a tuple of ints). Axe dim-1 is the contiguous axe, axe 0 has the greatest stride in memory. @@ -114,7 +114,8 @@ class Transpose(ComputationalGraphNodeGenerator): Out of place transpose will always be faster to process. The only exception to this rule may be 2D square matrices. - Component-wise transpose is *not* yet supported in Fields and will raise directly in frontend. + Component-wise transpose is *not* yet supported in Fields and will + raise directly in frontend. Inplace transposition may request a temporary buffer because not all implementations may support inplace transposition. diff --git a/hysop/tools/io_utils.py b/hysop/tools/io_utils.py index 3211a174b..a74aaafa7 100755 --- a/hysop/tools/io_utils.py +++ b/hysop/tools/io_utils.py @@ -25,7 +25,6 @@ class IO(object): """ _default_path = None - _default_cache_path = os.path.expanduser('~') + '/.cache/hysop' _cache_path = None @@ -69,37 +68,6 @@ class IO(object): #ind = -1 interactive_path = './interactive/p' + str(mpi.main_size) interactive_path = os.path.abspath(interactive_path) - # --- ipython --- - #from hysop.tools.sys_utils import SysUtils - # if SysUtils.in_ipython(): - # # Note FP: because of set_default_path call - # # in __init__.py, this condition must never happen. - # # But we keep the code below, just in case ... - - # # list of files (fullpath) which contain the callers - # sublist = [i[1] for i in a] - # # look for ipython in callers ... - # # If found, keep index of the file just before - # # first occurence of ipython, i.e. the name - # # of the 'main' file - # for val in sublist: - # ll = findall('ipython', val, IGNORECASE) - # if len(ll) > 0: - # ind = sublist.index(val) - 1 - # break - - # if ind > -1: - # # -- interactive ipython but call with execfile-- - # if len(findall('io_utils', a[ind][1])) > 0: - # return interactive_path - # a = a[ind] - # else: - # # -- interactive ipython without execfile call -- - # return interactive_path - - # else: - # -- python -- - # if test session, set default path to interactive_path for fname in a: cond1 = len(findall('py.test', fname[1])) > 0 cond2 = len(findall('pytest', fname[1])) > 0 @@ -166,12 +134,10 @@ class IO(object): used for the simulation. """ - IO._default_path = pathdir - IO._default_path = os.path.join(IO._default_path, + IO._default_path = os.path.join(pathdir, 'p' + str(mpi.main_size)) IO.check_dir(IO._default_path) - @staticmethod def default_cache_path(): return IO._default_cache_path -- GitLab